watsonx-openai-proxy/tests/test_message_normalization.py

"""Tests for message normalization for vLLM compatibility."""

import pytest
from app.routers.chat import normalize_messages_for_vllm


def test_normalize_developer_role():
    """Test that 'developer' role is normalized to 'system'."""
    messages = [
        {"role": "developer", "content": "You are helpful."},
        {"role": "user", "content": "Hello"}
    ]

    normalized = normalize_messages_for_vllm(messages)

    assert normalized[0]["role"] == "system"
    assert normalized[1]["role"] == "user"


def test_normalize_array_content_text_only():
    """Test that array content with only text parts is flattened to string."""
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Hello"},
                {"type": "text", "text": "World"}
            ]
        }
    ]

    normalized = normalize_messages_for_vllm(messages)

    assert isinstance(normalized[0]["content"], str)
    assert normalized[0]["content"] == "Hello\nWorld"


def test_normalize_string_content_unchanged():
    """Test that string content remains unchanged."""
    messages = [
        {"role": "user", "content": "Hello world"}
    ]

    normalized = normalize_messages_for_vllm(messages)

    assert normalized[0]["content"] == "Hello world"


def test_normalize_mixed_content_preserved():
    """Test that mixed content (text + image) is preserved as array."""
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What's in this image?"},
                {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
            ]
        }
    ]

    normalized = normalize_messages_for_vllm(messages)

    # Should keep array format for mixed content
    assert isinstance(normalized[0]["content"], list)
    assert len(normalized[0]["content"]) == 2


def test_normalize_multiple_messages():
    """Test normalization of multiple messages with different formats."""
    messages = [
        {"role": "developer", "content": "You are helpful."},
        {"role": "user", "content": [{"type": "text", "text": "Hello"}]},
        {"role": "assistant", "content": "Hi there!"},
        {"role": "user", "content": "How are you?"}
    ]

    normalized = normalize_messages_for_vllm(messages)

    assert normalized[0]["role"] == "system"
    assert normalized[1]["content"] == "Hello"
    assert normalized[2]["content"] == "Hi there!"
    assert normalized[3]["content"] == "How are you?"


def test_normalize_empty_content():
    """Test handling of empty or None content."""
    messages = [
        {"role": "user", "content": None},
        {"role": "user", "content": ""}
    ]

    normalized = normalize_messages_for_vllm(messages)

    assert normalized[0]["content"] is None
    assert normalized[1]["content"] == ""