110 lines
3.5 KiB
Python
110 lines
3.5 KiB
Python
"""Text completions endpoint router (legacy)."""
|
|
|
|
import uuid
|
|
from typing import Union
|
|
from fastapi import APIRouter, HTTPException, Request
|
|
from app.models.openai_models import (
|
|
CompletionRequest,
|
|
CompletionResponse,
|
|
ErrorResponse,
|
|
ErrorDetail,
|
|
)
|
|
from app.services.watsonx_service import watsonx_service
|
|
from app.utils.transformers import transform_watsonx_to_openai_completion
|
|
from app.config import settings
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.post(
|
|
"/v1/completions",
|
|
response_model=Union[CompletionResponse, ErrorResponse],
|
|
responses={
|
|
200: {"model": CompletionResponse},
|
|
400: {"model": ErrorResponse},
|
|
401: {"model": ErrorResponse},
|
|
500: {"model": ErrorResponse},
|
|
},
|
|
)
|
|
async def create_completion(
|
|
request: CompletionRequest,
|
|
http_request: Request,
|
|
):
|
|
"""Create a text completion using OpenAI-compatible API (legacy).
|
|
|
|
This endpoint accepts OpenAI-formatted completion requests and translates
|
|
them to watsonx.ai text generation API calls.
|
|
"""
|
|
try:
|
|
# Map model name if needed
|
|
watsonx_model = settings.map_model(request.model)
|
|
logger.info(f"Completion request: {request.model} -> {watsonx_model}")
|
|
|
|
# Handle prompt (can be string or list)
|
|
if isinstance(request.prompt, list):
|
|
if len(request.prompt) == 0:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail={
|
|
"error": {
|
|
"message": "Prompt cannot be empty",
|
|
"type": "invalid_request_error",
|
|
"code": "invalid_prompt",
|
|
}
|
|
},
|
|
)
|
|
# For now, just use the first prompt
|
|
# TODO: Handle multiple prompts with n parameter
|
|
prompt = request.prompt[0] if isinstance(request.prompt[0], str) else ""
|
|
else:
|
|
prompt = request.prompt
|
|
|
|
# Note: Streaming not implemented for completions yet
|
|
if request.stream:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail={
|
|
"error": {
|
|
"message": "Streaming not supported for completions endpoint",
|
|
"type": "invalid_request_error",
|
|
"code": "streaming_not_supported",
|
|
}
|
|
},
|
|
)
|
|
|
|
# Call watsonx text generation
|
|
watsonx_response = await watsonx_service.text_generation(
|
|
model_id=watsonx_model,
|
|
prompt=prompt,
|
|
temperature=request.temperature or 1.0,
|
|
max_tokens=request.max_tokens,
|
|
top_p=request.top_p or 1.0,
|
|
stop=request.stop if isinstance(request.stop, list) else [request.stop] if request.stop else None,
|
|
)
|
|
|
|
# Transform response
|
|
openai_response = transform_watsonx_to_openai_completion(
|
|
watsonx_response,
|
|
request.model,
|
|
)
|
|
|
|
return openai_response
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error in completion: {str(e)}", exc_info=True)
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail={
|
|
"error": {
|
|
"message": str(e),
|
|
"type": "internal_error",
|
|
"code": "internal_error",
|
|
}
|
|
},
|
|
)
|