From d924b7c45f7482cccad0bdc9a8b304d5a443c4e3 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 23 Feb 2026 11:59:23 -0500 Subject: [PATCH] Add vLLM message normalization for OpenAI content format compatibility - Normalize 'developer' role to 'system' (vLLM doesn't support developer role) - Flatten array content to string for text-only messages - Preserve mixed content (text + images) as array - Add comprehensive unit tests for normalization logic Fixes HTTP 422 errors when clients send OpenAI multi-content format --- app/routers/chat.py | 51 +++++++++++++++- tests/test_message_normalization.py | 95 +++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 tests/test_message_normalization.py diff --git a/app/routers/chat.py b/app/routers/chat.py index f03fc1f..1d46dae 100644 --- a/app/routers/chat.py +++ b/app/routers/chat.py @@ -27,6 +27,47 @@ logger = logging.getLogger(__name__) router = APIRouter() +def normalize_messages_for_vllm(messages: list) -> list: + """Normalize OpenAI message format for vLLM compatibility. + + vLLM is stricter than OpenAI API and requires: + 1. Message content as string (not array of content parts) + 2. Role must be system/user/assistant/function/tool (not "developer") + + Args: + messages: List of message dicts + + Returns: + Normalized list of messages + """ + normalized = [] + + for msg in messages: + normalized_msg = msg.copy() + + # Normalize "developer" role to "system" + if normalized_msg.get("role") == "developer": + normalized_msg["role"] = "system" + logger.debug("Normalized 'developer' role to 'system'") + + # Normalize array content to string for text-only messages + content = normalized_msg.get("content") + if isinstance(content, list): + # Check if all parts are text-only + if all(isinstance(p, dict) and p.get("type") == "text" for p in content): + # Flatten to concatenated string + normalized_msg["content"] = "\n".join(p.get("text", "") for p in content) + logger.debug(f"Normalized array content to string: {len(content)} parts") + else: + # Has image_url or other non-text types - keep as is + # vLLM may reject this, but we preserve the original format + logger.warning("Message contains non-text content parts, keeping array format") + + normalized.append(normalized_msg) + + return normalized + + @router.post( "/v1/chat/completions", response_model=Union[ChatCompletionResponse, ErrorResponse], @@ -51,8 +92,14 @@ async def create_chat_completion( watsonx_model = settings.map_model(request.model) logger.info(f"Chat completion request: {request.model} -> {watsonx_model}") - # Transform messages - watsonx_messages = transform_messages_to_watsonx(request.messages) + # Normalize messages for vLLM compatibility (handles array content and developer role) + normalized_messages = normalize_messages_for_vllm([msg.model_dump() for msg in request.messages]) + + # Transform normalized messages to watsonx format + # Convert back to ChatMessage objects for the transformer + from app.models.openai_models import ChatMessage + normalized_chat_messages = [ChatMessage(**msg) for msg in normalized_messages] + watsonx_messages = transform_messages_to_watsonx(normalized_chat_messages) # Transform tools if present watsonx_tools = transform_tools_to_watsonx(request.tools) diff --git a/tests/test_message_normalization.py b/tests/test_message_normalization.py new file mode 100644 index 0000000..6f891ee --- /dev/null +++ b/tests/test_message_normalization.py @@ -0,0 +1,95 @@ +"""Tests for message normalization for vLLM compatibility.""" + +import pytest +from app.routers.chat import normalize_messages_for_vllm + + +def test_normalize_developer_role(): + """Test that 'developer' role is normalized to 'system'.""" + messages = [ + {"role": "developer", "content": "You are helpful."}, + {"role": "user", "content": "Hello"} + ] + + normalized = normalize_messages_for_vllm(messages) + + assert normalized[0]["role"] == "system" + assert normalized[1]["role"] == "user" + + +def test_normalize_array_content_text_only(): + """Test that array content with only text parts is flattened to string.""" + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Hello"}, + {"type": "text", "text": "World"} + ] + } + ] + + normalized = normalize_messages_for_vllm(messages) + + assert isinstance(normalized[0]["content"], str) + assert normalized[0]["content"] == "Hello\nWorld" + + +def test_normalize_string_content_unchanged(): + """Test that string content remains unchanged.""" + messages = [ + {"role": "user", "content": "Hello world"} + ] + + normalized = normalize_messages_for_vllm(messages) + + assert normalized[0]["content"] == "Hello world" + + +def test_normalize_mixed_content_preserved(): + """Test that mixed content (text + image) is preserved as array.""" + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ] + } + ] + + normalized = normalize_messages_for_vllm(messages) + + # Should keep array format for mixed content + assert isinstance(normalized[0]["content"], list) + assert len(normalized[0]["content"]) == 2 + + +def test_normalize_multiple_messages(): + """Test normalization of multiple messages with different formats.""" + messages = [ + {"role": "developer", "content": "You are helpful."}, + {"role": "user", "content": [{"type": "text", "text": "Hello"}]}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ] + + normalized = normalize_messages_for_vllm(messages) + + assert normalized[0]["role"] == "system" + assert normalized[1]["content"] == "Hello" + assert normalized[2]["content"] == "Hi there!" + assert normalized[3]["content"] == "How are you?" + + +def test_normalize_empty_content(): + """Test handling of empty or None content.""" + messages = [ + {"role": "user", "content": None}, + {"role": "user", "content": ""} + ] + + normalized = normalize_messages_for_vllm(messages) + + assert normalized[0]["content"] is None + assert normalized[1]["content"] == ""