Add vLLM message normalization for OpenAI content format compatibility
- Normalize 'developer' role to 'system' (vLLM doesn't support developer role) - Flatten array content to string for text-only messages - Preserve mixed content (text + images) as array - Add comprehensive unit tests for normalization logic Fixes HTTP 422 errors when clients send OpenAI multi-content format
This commit is contained in:
@@ -27,6 +27,47 @@ logger = logging.getLogger(__name__)
|
|||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_messages_for_vllm(messages: list) -> list:
|
||||||
|
"""Normalize OpenAI message format for vLLM compatibility.
|
||||||
|
|
||||||
|
vLLM is stricter than OpenAI API and requires:
|
||||||
|
1. Message content as string (not array of content parts)
|
||||||
|
2. Role must be system/user/assistant/function/tool (not "developer")
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: List of message dicts
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized list of messages
|
||||||
|
"""
|
||||||
|
normalized = []
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
normalized_msg = msg.copy()
|
||||||
|
|
||||||
|
# Normalize "developer" role to "system"
|
||||||
|
if normalized_msg.get("role") == "developer":
|
||||||
|
normalized_msg["role"] = "system"
|
||||||
|
logger.debug("Normalized 'developer' role to 'system'")
|
||||||
|
|
||||||
|
# Normalize array content to string for text-only messages
|
||||||
|
content = normalized_msg.get("content")
|
||||||
|
if isinstance(content, list):
|
||||||
|
# Check if all parts are text-only
|
||||||
|
if all(isinstance(p, dict) and p.get("type") == "text" for p in content):
|
||||||
|
# Flatten to concatenated string
|
||||||
|
normalized_msg["content"] = "\n".join(p.get("text", "") for p in content)
|
||||||
|
logger.debug(f"Normalized array content to string: {len(content)} parts")
|
||||||
|
else:
|
||||||
|
# Has image_url or other non-text types - keep as is
|
||||||
|
# vLLM may reject this, but we preserve the original format
|
||||||
|
logger.warning("Message contains non-text content parts, keeping array format")
|
||||||
|
|
||||||
|
normalized.append(normalized_msg)
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/v1/chat/completions",
|
"/v1/chat/completions",
|
||||||
response_model=Union[ChatCompletionResponse, ErrorResponse],
|
response_model=Union[ChatCompletionResponse, ErrorResponse],
|
||||||
@@ -51,8 +92,14 @@ async def create_chat_completion(
|
|||||||
watsonx_model = settings.map_model(request.model)
|
watsonx_model = settings.map_model(request.model)
|
||||||
logger.info(f"Chat completion request: {request.model} -> {watsonx_model}")
|
logger.info(f"Chat completion request: {request.model} -> {watsonx_model}")
|
||||||
|
|
||||||
# Transform messages
|
# Normalize messages for vLLM compatibility (handles array content and developer role)
|
||||||
watsonx_messages = transform_messages_to_watsonx(request.messages)
|
normalized_messages = normalize_messages_for_vllm([msg.model_dump() for msg in request.messages])
|
||||||
|
|
||||||
|
# Transform normalized messages to watsonx format
|
||||||
|
# Convert back to ChatMessage objects for the transformer
|
||||||
|
from app.models.openai_models import ChatMessage
|
||||||
|
normalized_chat_messages = [ChatMessage(**msg) for msg in normalized_messages]
|
||||||
|
watsonx_messages = transform_messages_to_watsonx(normalized_chat_messages)
|
||||||
|
|
||||||
# Transform tools if present
|
# Transform tools if present
|
||||||
watsonx_tools = transform_tools_to_watsonx(request.tools)
|
watsonx_tools = transform_tools_to_watsonx(request.tools)
|
||||||
|
|||||||
95
tests/test_message_normalization.py
Normal file
95
tests/test_message_normalization.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
"""Tests for message normalization for vLLM compatibility."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from app.routers.chat import normalize_messages_for_vllm
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_developer_role():
|
||||||
|
"""Test that 'developer' role is normalized to 'system'."""
|
||||||
|
messages = [
|
||||||
|
{"role": "developer", "content": "You are helpful."},
|
||||||
|
{"role": "user", "content": "Hello"}
|
||||||
|
]
|
||||||
|
|
||||||
|
normalized = normalize_messages_for_vllm(messages)
|
||||||
|
|
||||||
|
assert normalized[0]["role"] == "system"
|
||||||
|
assert normalized[1]["role"] == "user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_array_content_text_only():
|
||||||
|
"""Test that array content with only text parts is flattened to string."""
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "Hello"},
|
||||||
|
{"type": "text", "text": "World"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
normalized = normalize_messages_for_vllm(messages)
|
||||||
|
|
||||||
|
assert isinstance(normalized[0]["content"], str)
|
||||||
|
assert normalized[0]["content"] == "Hello\nWorld"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_string_content_unchanged():
|
||||||
|
"""Test that string content remains unchanged."""
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Hello world"}
|
||||||
|
]
|
||||||
|
|
||||||
|
normalized = normalize_messages_for_vllm(messages)
|
||||||
|
|
||||||
|
assert normalized[0]["content"] == "Hello world"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_mixed_content_preserved():
|
||||||
|
"""Test that mixed content (text + image) is preserved as array."""
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
normalized = normalize_messages_for_vllm(messages)
|
||||||
|
|
||||||
|
# Should keep array format for mixed content
|
||||||
|
assert isinstance(normalized[0]["content"], list)
|
||||||
|
assert len(normalized[0]["content"]) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_multiple_messages():
|
||||||
|
"""Test normalization of multiple messages with different formats."""
|
||||||
|
messages = [
|
||||||
|
{"role": "developer", "content": "You are helpful."},
|
||||||
|
{"role": "user", "content": [{"type": "text", "text": "Hello"}]},
|
||||||
|
{"role": "assistant", "content": "Hi there!"},
|
||||||
|
{"role": "user", "content": "How are you?"}
|
||||||
|
]
|
||||||
|
|
||||||
|
normalized = normalize_messages_for_vllm(messages)
|
||||||
|
|
||||||
|
assert normalized[0]["role"] == "system"
|
||||||
|
assert normalized[1]["content"] == "Hello"
|
||||||
|
assert normalized[2]["content"] == "Hi there!"
|
||||||
|
assert normalized[3]["content"] == "How are you?"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_empty_content():
|
||||||
|
"""Test handling of empty or None content."""
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": None},
|
||||||
|
{"role": "user", "content": ""}
|
||||||
|
]
|
||||||
|
|
||||||
|
normalized = normalize_messages_for_vllm(messages)
|
||||||
|
|
||||||
|
assert normalized[0]["content"] is None
|
||||||
|
assert normalized[1]["content"] == ""
|
||||||
Reference in New Issue
Block a user