Add AGENTS.md documentation for AI agent guidance
This commit is contained in:
120
app/routers/models.py
Normal file
120
app/routers/models.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""Models endpoint router."""
|
||||
|
||||
import time
|
||||
from fastapi import APIRouter
|
||||
from app.models.openai_models import ModelsResponse, ModelInfo
|
||||
from app.config import settings
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# Predefined list of available models
|
||||
# This can be extended or made dynamic based on watsonx.ai API
|
||||
AVAILABLE_MODELS = [
|
||||
# Granite Models
|
||||
"ibm/granite-3-1-8b-base",
|
||||
"ibm/granite-3-2-8b-instruct",
|
||||
"ibm/granite-3-3-8b-instruct",
|
||||
"ibm/granite-3-8b-instruct",
|
||||
"ibm/granite-4-h-small",
|
||||
"ibm/granite-8b-code-instruct",
|
||||
|
||||
# Llama Models
|
||||
"meta-llama/llama-3-1-70b-gptq",
|
||||
"meta-llama/llama-3-1-8b",
|
||||
"meta-llama/llama-3-2-11b-vision-instruct",
|
||||
"meta-llama/llama-3-2-90b-vision-instruct",
|
||||
"meta-llama/llama-3-3-70b-instruct",
|
||||
"meta-llama/llama-3-405b-instruct",
|
||||
"meta-llama/llama-4-maverick-17b-128e-instruct-fp8",
|
||||
|
||||
# Mistral Models
|
||||
"mistral-large-2512",
|
||||
"mistralai/mistral-medium-2505",
|
||||
"mistralai/mistral-small-3-1-24b-instruct-2503",
|
||||
|
||||
# Other Models
|
||||
"openai/gpt-oss-120b",
|
||||
|
||||
# Embedding Models
|
||||
"ibm/slate-125m-english-rtrvr",
|
||||
"ibm/slate-30m-english-rtrvr",
|
||||
]
|
||||
|
||||
|
||||
@router.get(
|
||||
"/v1/models",
|
||||
response_model=ModelsResponse,
|
||||
)
|
||||
async def list_models():
|
||||
"""List available models in OpenAI-compatible format.
|
||||
|
||||
Returns a list of models that can be used with the API.
|
||||
Includes both the actual watsonx model IDs and any mapped names.
|
||||
"""
|
||||
created_time = int(time.time())
|
||||
models = []
|
||||
|
||||
# Add all available watsonx models
|
||||
for model_id in AVAILABLE_MODELS:
|
||||
models.append(
|
||||
ModelInfo(
|
||||
id=model_id,
|
||||
created=created_time,
|
||||
owned_by="ibm-watsonx",
|
||||
)
|
||||
)
|
||||
|
||||
# Add mapped model names (e.g., gpt-4 -> ibm/granite-4-h-small)
|
||||
model_mapping = settings.get_model_mapping()
|
||||
for openai_name, watsonx_id in model_mapping.items():
|
||||
if watsonx_id in AVAILABLE_MODELS:
|
||||
models.append(
|
||||
ModelInfo(
|
||||
id=openai_name,
|
||||
created=created_time,
|
||||
owned_by="ibm-watsonx",
|
||||
)
|
||||
)
|
||||
|
||||
return ModelsResponse(data=models)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/v1/models/{model_id}",
|
||||
response_model=ModelInfo,
|
||||
)
|
||||
async def retrieve_model(model_id: str):
|
||||
"""Retrieve information about a specific model.
|
||||
|
||||
Args:
|
||||
model_id: The model ID to retrieve
|
||||
|
||||
Returns:
|
||||
Model information
|
||||
"""
|
||||
# Map the model if needed
|
||||
watsonx_model = settings.map_model(model_id)
|
||||
|
||||
# Check if model exists
|
||||
if watsonx_model not in AVAILABLE_MODELS:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"error": {
|
||||
"message": f"Model '{model_id}' not found",
|
||||
"type": "invalid_request_error",
|
||||
"code": "model_not_found",
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
return ModelInfo(
|
||||
id=model_id,
|
||||
created=int(time.time()),
|
||||
owned_by="ibm-watsonx",
|
||||
)
|
||||
Reference in New Issue
Block a user