Add AGENTS.md documentation for AI agent guidance

2026-02-23 09:59:52 -05:00
commit 2e2b817435
21 changed files with 2513 additions and 0 deletions
--- a/app/routers/models.py
+++ b/app/routers/models.py
@@ -0,0 +1,120 @@
+"""Models endpoint router."""
+
+import time
+from fastapi import APIRouter
+from app.models.openai_models import ModelsResponse, ModelInfo
+from app.config import settings
+import logging
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+# Predefined list of available models
+# This can be extended or made dynamic based on watsonx.ai API
+AVAILABLE_MODELS = [
+    # Granite Models
+    "ibm/granite-3-1-8b-base",
+    "ibm/granite-3-2-8b-instruct",
+    "ibm/granite-3-3-8b-instruct",
+    "ibm/granite-3-8b-instruct",
+    "ibm/granite-4-h-small",
+    "ibm/granite-8b-code-instruct",
+    
+    # Llama Models
+    "meta-llama/llama-3-1-70b-gptq",
+    "meta-llama/llama-3-1-8b",
+    "meta-llama/llama-3-2-11b-vision-instruct",
+    "meta-llama/llama-3-2-90b-vision-instruct",
+    "meta-llama/llama-3-3-70b-instruct",
+    "meta-llama/llama-3-405b-instruct",
+    "meta-llama/llama-4-maverick-17b-128e-instruct-fp8",
+    
+    # Mistral Models
+    "mistral-large-2512",
+    "mistralai/mistral-medium-2505",
+    "mistralai/mistral-small-3-1-24b-instruct-2503",
+    
+    # Other Models
+    "openai/gpt-oss-120b",
+    
+    # Embedding Models
+    "ibm/slate-125m-english-rtrvr",
+    "ibm/slate-30m-english-rtrvr",
+]
+
+
+@router.get(
+    "/v1/models",
+    response_model=ModelsResponse,
+)
+async def list_models():
+    """List available models in OpenAI-compatible format.
+    
+    Returns a list of models that can be used with the API.
+    Includes both the actual watsonx model IDs and any mapped names.
+    """
+    created_time = int(time.time())
+    models = []
+    
+    # Add all available watsonx models
+    for model_id in AVAILABLE_MODELS:
+        models.append(
+            ModelInfo(
+                id=model_id,
+                created=created_time,
+                owned_by="ibm-watsonx",
+            )
+        )
+    
+    # Add mapped model names (e.g., gpt-4 -> ibm/granite-4-h-small)
+    model_mapping = settings.get_model_mapping()
+    for openai_name, watsonx_id in model_mapping.items():
+        if watsonx_id in AVAILABLE_MODELS:
+            models.append(
+                ModelInfo(
+                    id=openai_name,
+                    created=created_time,
+                    owned_by="ibm-watsonx",
+                )
+            )
+    
+    return ModelsResponse(data=models)
+
+
+@router.get(
+    "/v1/models/{model_id}",
+    response_model=ModelInfo,
+)
+async def retrieve_model(model_id: str):
+    """Retrieve information about a specific model.
+    
+    Args:
+        model_id: The model ID to retrieve
+        
+    Returns:
+        Model information
+    """
+    # Map the model if needed
+    watsonx_model = settings.map_model(model_id)
+    
+    # Check if model exists
+    if watsonx_model not in AVAILABLE_MODELS:
+        from fastapi import HTTPException
+        raise HTTPException(
+            status_code=404,
+            detail={
+                "error": {
+                    "message": f"Model '{model_id}' not found",
+                    "type": "invalid_request_error",
+                    "code": "model_not_found",
+                }
+            },
+        )
+    
+    return ModelInfo(
+        id=model_id,
+        created=int(time.time()),
+        owned_by="ibm-watsonx",
+    )