- Normalize 'developer' role to 'system' (vLLM doesn't support developer role) - Flatten array content to string for text-only messages - Preserve mixed content (text + images) as array - Add comprehensive unit tests for normalization logic Fixes HTTP 422 errors when clients send OpenAI multi-content format
96 lines
2.8 KiB
Python
96 lines
2.8 KiB
Python
"""Tests for message normalization for vLLM compatibility."""
|
|
|
|
import pytest
|
|
from app.routers.chat import normalize_messages_for_vllm
|
|
|
|
|
|
def test_normalize_developer_role():
|
|
"""Test that 'developer' role is normalized to 'system'."""
|
|
messages = [
|
|
{"role": "developer", "content": "You are helpful."},
|
|
{"role": "user", "content": "Hello"}
|
|
]
|
|
|
|
normalized = normalize_messages_for_vllm(messages)
|
|
|
|
assert normalized[0]["role"] == "system"
|
|
assert normalized[1]["role"] == "user"
|
|
|
|
|
|
def test_normalize_array_content_text_only():
|
|
"""Test that array content with only text parts is flattened to string."""
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "Hello"},
|
|
{"type": "text", "text": "World"}
|
|
]
|
|
}
|
|
]
|
|
|
|
normalized = normalize_messages_for_vllm(messages)
|
|
|
|
assert isinstance(normalized[0]["content"], str)
|
|
assert normalized[0]["content"] == "Hello\nWorld"
|
|
|
|
|
|
def test_normalize_string_content_unchanged():
|
|
"""Test that string content remains unchanged."""
|
|
messages = [
|
|
{"role": "user", "content": "Hello world"}
|
|
]
|
|
|
|
normalized = normalize_messages_for_vllm(messages)
|
|
|
|
assert normalized[0]["content"] == "Hello world"
|
|
|
|
|
|
def test_normalize_mixed_content_preserved():
|
|
"""Test that mixed content (text + image) is preserved as array."""
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "What's in this image?"},
|
|
{"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
|
|
]
|
|
}
|
|
]
|
|
|
|
normalized = normalize_messages_for_vllm(messages)
|
|
|
|
# Should keep array format for mixed content
|
|
assert isinstance(normalized[0]["content"], list)
|
|
assert len(normalized[0]["content"]) == 2
|
|
|
|
|
|
def test_normalize_multiple_messages():
|
|
"""Test normalization of multiple messages with different formats."""
|
|
messages = [
|
|
{"role": "developer", "content": "You are helpful."},
|
|
{"role": "user", "content": [{"type": "text", "text": "Hello"}]},
|
|
{"role": "assistant", "content": "Hi there!"},
|
|
{"role": "user", "content": "How are you?"}
|
|
]
|
|
|
|
normalized = normalize_messages_for_vllm(messages)
|
|
|
|
assert normalized[0]["role"] == "system"
|
|
assert normalized[1]["content"] == "Hello"
|
|
assert normalized[2]["content"] == "Hi there!"
|
|
assert normalized[3]["content"] == "How are you?"
|
|
|
|
|
|
def test_normalize_empty_content():
|
|
"""Test handling of empty or None content."""
|
|
messages = [
|
|
{"role": "user", "content": None},
|
|
{"role": "user", "content": ""}
|
|
]
|
|
|
|
normalized = normalize_messages_for_vllm(messages)
|
|
|
|
assert normalized[0]["content"] is None
|
|
assert normalized[1]["content"] == ""
|