watsonx-openai-proxy/app/routers/models.py

"""Models endpoint router."""

import time
from fastapi import APIRouter
from app.models.openai_models import ModelsResponse, ModelInfo
from app.config import settings
import logging

logger = logging.getLogger(__name__)

router = APIRouter()


# Predefined list of available models
# This can be extended or made dynamic based on watsonx.ai API
AVAILABLE_MODELS = [
    # Granite Models
    "ibm/granite-3-1-8b-base",
    "ibm/granite-3-2-8b-instruct",
    "ibm/granite-3-3-8b-instruct",
    "ibm/granite-3-8b-instruct",
    "ibm/granite-4-h-small",
    "ibm/granite-8b-code-instruct",

    # Llama Models
    "meta-llama/llama-3-1-70b-gptq",
    "meta-llama/llama-3-1-8b",
    "meta-llama/llama-3-2-11b-vision-instruct",
    "meta-llama/llama-3-2-90b-vision-instruct",
    "meta-llama/llama-3-3-70b-instruct",
    "meta-llama/llama-3-405b-instruct",
    "meta-llama/llama-4-maverick-17b-128e-instruct-fp8",

    # Mistral Models
    "mistral-large-2512",
    "mistralai/mistral-medium-2505",
    "mistralai/mistral-small-3-1-24b-instruct-2503",

    # Other Models
    "openai/gpt-oss-120b",

    # Embedding Models
    "ibm/slate-125m-english-rtrvr",
    "ibm/slate-30m-english-rtrvr",
]


@router.get(
    "/v1/models",
    response_model=ModelsResponse,
)
async def list_models():
    """List available models in OpenAI-compatible format.

    Returns a list of models that can be used with the API.
    Includes both the actual watsonx model IDs and any mapped names.
    """
    created_time = int(time.time())
    models = []

    # Add all available watsonx models
    for model_id in AVAILABLE_MODELS:
        models.append(
            ModelInfo(
                id=model_id,
                created=created_time,
                owned_by="ibm-watsonx",
            )
        )

    # Add mapped model names (e.g., gpt-4 -> ibm/granite-4-h-small)
    model_mapping = settings.get_model_mapping()
    for openai_name, watsonx_id in model_mapping.items():
        if watsonx_id in AVAILABLE_MODELS:
            models.append(
                ModelInfo(
                    id=openai_name,
                    created=created_time,
                    owned_by="ibm-watsonx",
                )
            )

    return ModelsResponse(data=models)


@router.get(
    "/v1/models/{model_id}",
    response_model=ModelInfo,
)
async def retrieve_model(model_id: str):
    """Retrieve information about a specific model.

    Args:
        model_id: The model ID to retrieve

    Returns:
        Model information
    """
    # Map the model if needed
    watsonx_model = settings.map_model(model_id)

    # Check if model exists
    if watsonx_model not in AVAILABLE_MODELS:
        from fastapi import HTTPException
        raise HTTPException(
            status_code=404,
            detail={
                "error": {
                    "message": f"Model '{model_id}' not found",
                    "type": "invalid_request_error",
                    "code": "model_not_found",
                }
            },
        )

    return ModelInfo(
        id=model_id,
        created=int(time.time()),
        owned_by="ibm-watsonx",
    )