From d924b7c45f7482cccad0bdc9a8b304d5a443c4e3 Mon Sep 17 00:00:00 2001
From: Michael <michael.schapira@us.ibm.com>
Date: Mon, 23 Feb 2026 11:59:23 -0500
Subject: [PATCH] Add vLLM message normalization for OpenAI content format
 compatibility

- Normalize 'developer' role to 'system' (vLLM doesn't support developer role)
- Flatten array content to string for text-only messages
- Preserve mixed content (text + images) as array
- Add comprehensive unit tests for normalization logic

Fixes HTTP 422 errors when clients send OpenAI multi-content format
---
 app/routers/chat.py                 | 51 +++++++++++++++-
 tests/test_message_normalization.py | 95 +++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_message_normalization.py

diff --git a/app/routers/chat.py b/app/routers/chat.py
index f03fc1f..1d46dae 100644
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@@ -27,6 +27,47 @@ logger = logging.getLogger(__name__)
 router = APIRouter()
 
 
+def normalize_messages_for_vllm(messages: list) -> list:
+    """Normalize OpenAI message format for vLLM compatibility.
+    
+    vLLM is stricter than OpenAI API and requires:
+    1. Message content as string (not array of content parts)
+    2. Role must be system/user/assistant/function/tool (not "developer")
+    
+    Args:
+        messages: List of message dicts
+        
+    Returns:
+        Normalized list of messages
+    """
+    normalized = []
+    
+    for msg in messages:
+        normalized_msg = msg.copy()
+        
+        # Normalize "developer" role to "system"
+        if normalized_msg.get("role") == "developer":
+            normalized_msg["role"] = "system"
+            logger.debug("Normalized 'developer' role to 'system'")
+        
+        # Normalize array content to string for text-only messages
+        content = normalized_msg.get("content")
+        if isinstance(content, list):
+            # Check if all parts are text-only
+            if all(isinstance(p, dict) and p.get("type") == "text" for p in content):
+                # Flatten to concatenated string
+                normalized_msg["content"] = "\n".join(p.get("text", "") for p in content)
+                logger.debug(f"Normalized array content to string: {len(content)} parts")
+            else:
+                # Has image_url or other non-text types - keep as is
+                # vLLM may reject this, but we preserve the original format
+                logger.warning("Message contains non-text content parts, keeping array format")
+        
+        normalized.append(normalized_msg)
+    
+    return normalized
+
+
 @router.post(
     "/v1/chat/completions",
     response_model=Union[ChatCompletionResponse, ErrorResponse],
@@ -51,8 +92,14 @@ async def create_chat_completion(
         watsonx_model = settings.map_model(request.model)
         logger.info(f"Chat completion request: {request.model} -> {watsonx_model}")
         
-        # Transform messages
-        watsonx_messages = transform_messages_to_watsonx(request.messages)
+        # Normalize messages for vLLM compatibility (handles array content and developer role)
+        normalized_messages = normalize_messages_for_vllm([msg.model_dump() for msg in request.messages])
+        
+        # Transform normalized messages to watsonx format
+        # Convert back to ChatMessage objects for the transformer
+        from app.models.openai_models import ChatMessage
+        normalized_chat_messages = [ChatMessage(**msg) for msg in normalized_messages]
+        watsonx_messages = transform_messages_to_watsonx(normalized_chat_messages)
         
         # Transform tools if present
         watsonx_tools = transform_tools_to_watsonx(request.tools)
diff --git a/tests/test_message_normalization.py b/tests/test_message_normalization.py
new file mode 100644
index 0000000..6f891ee
--- /dev/null
+++ b/tests/test_message_normalization.py
@@ -0,0 +1,95 @@
+"""Tests for message normalization for vLLM compatibility."""
+
+import pytest
+from app.routers.chat import normalize_messages_for_vllm
+
+
+def test_normalize_developer_role():
+    """Test that 'developer' role is normalized to 'system'."""
+    messages = [
+        {"role": "developer", "content": "You are helpful."},
+        {"role": "user", "content": "Hello"}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["role"] == "system"
+    assert normalized[1]["role"] == "user"
+
+
+def test_normalize_array_content_text_only():
+    """Test that array content with only text parts is flattened to string."""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Hello"},
+                {"type": "text", "text": "World"}
+            ]
+        }
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert isinstance(normalized[0]["content"], str)
+    assert normalized[0]["content"] == "Hello\nWorld"
+
+
+def test_normalize_string_content_unchanged():
+    """Test that string content remains unchanged."""
+    messages = [
+        {"role": "user", "content": "Hello world"}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["content"] == "Hello world"
+
+
+def test_normalize_mixed_content_preserved():
+    """Test that mixed content (text + image) is preserved as array."""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image?"},
+                {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
+            ]
+        }
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    # Should keep array format for mixed content
+    assert isinstance(normalized[0]["content"], list)
+    assert len(normalized[0]["content"]) == 2
+
+
+def test_normalize_multiple_messages():
+    """Test normalization of multiple messages with different formats."""
+    messages = [
+        {"role": "developer", "content": "You are helpful."},
+        {"role": "user", "content": [{"type": "text", "text": "Hello"}]},
+        {"role": "assistant", "content": "Hi there!"},
+        {"role": "user", "content": "How are you?"}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["role"] == "system"
+    assert normalized[1]["content"] == "Hello"
+    assert normalized[2]["content"] == "Hi there!"
+    assert normalized[3]["content"] == "How are you?"
+
+
+def test_normalize_empty_content():
+    """Test handling of empty or None content."""
+    messages = [
+        {"role": "user", "content": None},
+        {"role": "user", "content": ""}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["content"] is None
+    assert normalized[1]["content"] == ""