Add vLLM message normalization for OpenAI content format compatibility

- Normalize 'developer' role to 'system' (vLLM doesn't support developer role)
- Flatten array content to string for text-only messages
- Preserve mixed content (text + images) as array
- Add comprehensive unit tests for normalization logic

Fixes HTTP 422 errors when clients send OpenAI multi-content format
This commit is contained in:
2026-02-23 11:59:23 -05:00
parent debfb466ad
commit d924b7c45f
2 changed files with 144 additions and 2 deletions

View File

@@ -27,6 +27,47 @@ logger = logging.getLogger(__name__)
router = APIRouter()
def normalize_messages_for_vllm(messages: list) -> list:
"""Normalize OpenAI message format for vLLM compatibility.
vLLM is stricter than OpenAI API and requires:
1. Message content as string (not array of content parts)
2. Role must be system/user/assistant/function/tool (not "developer")
Args:
messages: List of message dicts
Returns:
Normalized list of messages
"""
normalized = []
for msg in messages:
normalized_msg = msg.copy()
# Normalize "developer" role to "system"
if normalized_msg.get("role") == "developer":
normalized_msg["role"] = "system"
logger.debug("Normalized 'developer' role to 'system'")
# Normalize array content to string for text-only messages
content = normalized_msg.get("content")
if isinstance(content, list):
# Check if all parts are text-only
if all(isinstance(p, dict) and p.get("type") == "text" for p in content):
# Flatten to concatenated string
normalized_msg["content"] = "\n".join(p.get("text", "") for p in content)
logger.debug(f"Normalized array content to string: {len(content)} parts")
else:
# Has image_url or other non-text types - keep as is
# vLLM may reject this, but we preserve the original format
logger.warning("Message contains non-text content parts, keeping array format")
normalized.append(normalized_msg)
return normalized
@router.post(
"/v1/chat/completions",
response_model=Union[ChatCompletionResponse, ErrorResponse],
@@ -51,8 +92,14 @@ async def create_chat_completion(
watsonx_model = settings.map_model(request.model)
logger.info(f"Chat completion request: {request.model} -> {watsonx_model}")
# Transform messages
watsonx_messages = transform_messages_to_watsonx(request.messages)
# Normalize messages for vLLM compatibility (handles array content and developer role)
normalized_messages = normalize_messages_for_vllm([msg.model_dump() for msg in request.messages])
# Transform normalized messages to watsonx format
# Convert back to ChatMessage objects for the transformer
from app.models.openai_models import ChatMessage
normalized_chat_messages = [ChatMessage(**msg) for msg in normalized_messages]
watsonx_messages = transform_messages_to_watsonx(normalized_chat_messages)
# Transform tools if present
watsonx_tools = transform_tools_to_watsonx(request.tools)