"""Models endpoint router.""" import time from fastapi import APIRouter from app.models.openai_models import ModelsResponse, ModelInfo from app.config import settings import logging logger = logging.getLogger(__name__) router = APIRouter() # Predefined list of available models # This can be extended or made dynamic based on watsonx.ai API AVAILABLE_MODELS = [ # Granite Models "ibm/granite-3-1-8b-base", "ibm/granite-3-2-8b-instruct", "ibm/granite-3-3-8b-instruct", "ibm/granite-3-8b-instruct", "ibm/granite-4-h-small", "ibm/granite-8b-code-instruct", # Llama Models "meta-llama/llama-3-1-70b-gptq", "meta-llama/llama-3-1-8b", "meta-llama/llama-3-2-11b-vision-instruct", "meta-llama/llama-3-2-90b-vision-instruct", "meta-llama/llama-3-3-70b-instruct", "meta-llama/llama-3-405b-instruct", "meta-llama/llama-4-maverick-17b-128e-instruct-fp8", # Mistral Models "mistral-large-2512", "mistralai/mistral-medium-2505", "mistralai/mistral-small-3-1-24b-instruct-2503", # Other Models "openai/gpt-oss-120b", # Embedding Models "ibm/slate-125m-english-rtrvr", "ibm/slate-30m-english-rtrvr", ] @router.get( "/v1/models", response_model=ModelsResponse, ) async def list_models(): """List available models in OpenAI-compatible format. Returns a list of models that can be used with the API. Includes both the actual watsonx model IDs and any mapped names. """ created_time = int(time.time()) models = [] # Add all available watsonx models for model_id in AVAILABLE_MODELS: models.append( ModelInfo( id=model_id, created=created_time, owned_by="ibm-watsonx", ) ) # Add mapped model names (e.g., gpt-4 -> ibm/granite-4-h-small) model_mapping = settings.get_model_mapping() for openai_name, watsonx_id in model_mapping.items(): if watsonx_id in AVAILABLE_MODELS: models.append( ModelInfo( id=openai_name, created=created_time, owned_by="ibm-watsonx", ) ) return ModelsResponse(data=models) @router.get( "/v1/models/{model_id}", response_model=ModelInfo, ) async def retrieve_model(model_id: str): """Retrieve information about a specific model. Args: model_id: The model ID to retrieve Returns: Model information """ # Map the model if needed watsonx_model = settings.map_model(model_id) # Check if model exists if watsonx_model not in AVAILABLE_MODELS: from fastapi import HTTPException raise HTTPException( status_code=404, detail={ "error": { "message": f"Model '{model_id}' not found", "type": "invalid_request_error", "code": "model_not_found", } }, ) return ModelInfo( id=model_id, created=int(time.time()), owned_by="ibm-watsonx", )