Add vLLM message normalization for OpenAI content format compatibility

- Normalize 'developer' role to 'system' (vLLM doesn't support developer role) - Flatten array content to string for text-only messages - Preserve mixed content (text + images) as array - Add comprehensive unit tests for normalization logic Fixes HTTP 422 errors when clients send OpenAI multi-content format
2026-02-23 11:59:23 -05:00
parent debfb466ad
commit d924b7c45f
2 changed files with 144 additions and 2 deletions
--- a/tests/test_message_normalization.py
+++ b/tests/test_message_normalization.py
@@ -0,0 +1,95 @@
+"""Tests for message normalization for vLLM compatibility."""
+
+import pytest
+from app.routers.chat import normalize_messages_for_vllm
+
+
+def test_normalize_developer_role():
+    """Test that 'developer' role is normalized to 'system'."""
+    messages = [
+        {"role": "developer", "content": "You are helpful."},
+        {"role": "user", "content": "Hello"}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["role"] == "system"
+    assert normalized[1]["role"] == "user"
+
+
+def test_normalize_array_content_text_only():
+    """Test that array content with only text parts is flattened to string."""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Hello"},
+                {"type": "text", "text": "World"}
+            ]
+        }
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert isinstance(normalized[0]["content"], str)
+    assert normalized[0]["content"] == "Hello\nWorld"
+
+
+def test_normalize_string_content_unchanged():
+    """Test that string content remains unchanged."""
+    messages = [
+        {"role": "user", "content": "Hello world"}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["content"] == "Hello world"
+
+
+def test_normalize_mixed_content_preserved():
+    """Test that mixed content (text + image) is preserved as array."""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image?"},
+                {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
+            ]
+        }
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    # Should keep array format for mixed content
+    assert isinstance(normalized[0]["content"], list)
+    assert len(normalized[0]["content"]) == 2
+
+
+def test_normalize_multiple_messages():
+    """Test normalization of multiple messages with different formats."""
+    messages = [
+        {"role": "developer", "content": "You are helpful."},
+        {"role": "user", "content": [{"type": "text", "text": "Hello"}]},
+        {"role": "assistant", "content": "Hi there!"},
+        {"role": "user", "content": "How are you?"}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["role"] == "system"
+    assert normalized[1]["content"] == "Hello"
+    assert normalized[2]["content"] == "Hi there!"
+    assert normalized[3]["content"] == "How are you?"
+
+
+def test_normalize_empty_content():
+    """Test handling of empty or None content."""
+    messages = [
+        {"role": "user", "content": None},
+        {"role": "user", "content": ""}
+    ]
+    
+    normalized = normalize_messages_for_vllm(messages)
+    
+    assert normalized[0]["content"] is None
+    assert normalized[1]["content"] == ""