Files
watsonx-openai-proxy/app/routers/completions.py

110 lines
3.5 KiB
Python

"""Text completions endpoint router (legacy)."""
import uuid
from typing import Union
from fastapi import APIRouter, HTTPException, Request
from app.models.openai_models import (
CompletionRequest,
CompletionResponse,
ErrorResponse,
ErrorDetail,
)
from app.services.watsonx_service import watsonx_service
from app.utils.transformers import transform_watsonx_to_openai_completion
from app.config import settings
import logging
logger = logging.getLogger(__name__)
router = APIRouter()
@router.post(
"/v1/completions",
response_model=Union[CompletionResponse, ErrorResponse],
responses={
200: {"model": CompletionResponse},
400: {"model": ErrorResponse},
401: {"model": ErrorResponse},
500: {"model": ErrorResponse},
},
)
async def create_completion(
request: CompletionRequest,
http_request: Request,
):
"""Create a text completion using OpenAI-compatible API (legacy).
This endpoint accepts OpenAI-formatted completion requests and translates
them to watsonx.ai text generation API calls.
"""
try:
# Map model name if needed
watsonx_model = settings.map_model(request.model)
logger.info(f"Completion request: {request.model} -> {watsonx_model}")
# Handle prompt (can be string or list)
if isinstance(request.prompt, list):
if len(request.prompt) == 0:
raise HTTPException(
status_code=400,
detail={
"error": {
"message": "Prompt cannot be empty",
"type": "invalid_request_error",
"code": "invalid_prompt",
}
},
)
# For now, just use the first prompt
# TODO: Handle multiple prompts with n parameter
prompt = request.prompt[0] if isinstance(request.prompt[0], str) else ""
else:
prompt = request.prompt
# Note: Streaming not implemented for completions yet
if request.stream:
raise HTTPException(
status_code=400,
detail={
"error": {
"message": "Streaming not supported for completions endpoint",
"type": "invalid_request_error",
"code": "streaming_not_supported",
}
},
)
# Call watsonx text generation
watsonx_response = await watsonx_service.text_generation(
model_id=watsonx_model,
prompt=prompt,
temperature=request.temperature or 1.0,
max_tokens=request.max_tokens,
top_p=request.top_p or 1.0,
stop=request.stop if isinstance(request.stop, list) else [request.stop] if request.stop else None,
)
# Transform response
openai_response = transform_watsonx_to_openai_completion(
watsonx_response,
request.model,
)
return openai_response
except HTTPException:
raise
except Exception as e:
logger.error(f"Error in completion: {str(e)}", exc_info=True)
raise HTTPException(
status_code=500,
detail={
"error": {
"message": str(e),
"type": "internal_error",
"code": "internal_error",
}
},
)