"""Text completions endpoint router (legacy).""" import uuid from typing import Union from fastapi import APIRouter, HTTPException, Request from app.models.openai_models import ( CompletionRequest, CompletionResponse, ErrorResponse, ErrorDetail, ) from app.services.watsonx_service import watsonx_service from app.utils.transformers import transform_watsonx_to_openai_completion from app.config import settings import logging logger = logging.getLogger(__name__) router = APIRouter() @router.post( "/v1/completions", response_model=Union[CompletionResponse, ErrorResponse], responses={ 200: {"model": CompletionResponse}, 400: {"model": ErrorResponse}, 401: {"model": ErrorResponse}, 500: {"model": ErrorResponse}, }, ) async def create_completion( request: CompletionRequest, http_request: Request, ): """Create a text completion using OpenAI-compatible API (legacy). This endpoint accepts OpenAI-formatted completion requests and translates them to watsonx.ai text generation API calls. """ try: # Map model name if needed watsonx_model = settings.map_model(request.model) logger.info(f"Completion request: {request.model} -> {watsonx_model}") # Handle prompt (can be string or list) if isinstance(request.prompt, list): if len(request.prompt) == 0: raise HTTPException( status_code=400, detail={ "error": { "message": "Prompt cannot be empty", "type": "invalid_request_error", "code": "invalid_prompt", } }, ) # For now, just use the first prompt # TODO: Handle multiple prompts with n parameter prompt = request.prompt[0] if isinstance(request.prompt[0], str) else "" else: prompt = request.prompt # Note: Streaming not implemented for completions yet if request.stream: raise HTTPException( status_code=400, detail={ "error": { "message": "Streaming not supported for completions endpoint", "type": "invalid_request_error", "code": "streaming_not_supported", } }, ) # Call watsonx text generation watsonx_response = await watsonx_service.text_generation( model_id=watsonx_model, prompt=prompt, temperature=request.temperature or 1.0, max_tokens=request.max_tokens, top_p=request.top_p or 1.0, stop=request.stop if isinstance(request.stop, list) else [request.stop] if request.stop else None, ) # Transform response openai_response = transform_watsonx_to_openai_completion( watsonx_response, request.model, ) return openai_response except HTTPException: raise except Exception as e: logger.error(f"Error in completion: {str(e)}", exc_info=True) raise HTTPException( status_code=500, detail={ "error": { "message": str(e), "type": "internal_error", "code": "internal_error", } }, )