Add AGENTS.md documentation for AI agent guidance

2026-02-23 09:59:52 -05:00
commit 2e2b817435
21 changed files with 2513 additions and 0 deletions
@@ -0,0 +1,5 @@
+"""Services for watsonx-openai-proxy."""
+
+from app.services.watsonx_service import watsonx_service, WatsonxService
+
+__all__ = ["watsonx_service", "WatsonxService"]
@@ -0,0 +1,316 @@
+"""Service for interacting with IBM watsonx.ai APIs."""
+
+import asyncio
+import time
+from typing import AsyncIterator, Dict, List, Optional
+import httpx
+from app.config import settings
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class WatsonxService:
+    """Service for managing watsonx.ai API interactions."""
+
+    def __init__(self):
+        self.base_url = settings.watsonx_base_url
+        self.project_id = settings.watsonx_project_id
+        self.api_key = settings.ibm_cloud_api_key
+        self._bearer_token: Optional[str] = None
+        self._token_expiry: Optional[float] = None
+        self._token_lock = asyncio.Lock()
+        self._client: Optional[httpx.AsyncClient] = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create HTTP client."""
+        if self._client is None:
+            self._client = httpx.AsyncClient(timeout=300.0)
+        return self._client
+
+    async def close(self):
+        """Close the HTTP client."""
+        if self._client:
+            await self._client.aclose()
+            self._client = None
+
+    async def _refresh_token(self) -> str:
+        """Get a fresh bearer token from IBM Cloud IAM."""
+        async with self._token_lock:
+            # Check if token is still valid
+            if self._bearer_token and self._token_expiry:
+                if time.time() < self._token_expiry - 300:  # 5 min buffer
+                    return self._bearer_token
+
+            logger.info("Refreshing IBM Cloud bearer token...")
+            
+            client = await self._get_client()
+            response = await client.post(
+                "https://iam.cloud.ibm.com/identity/token",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+                data=f"grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey={self.api_key}",
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"Failed to get bearer token: {response.text}")
+            
+            data = response.json()
+            self._bearer_token = data["access_token"]
+            self._token_expiry = time.time() + data.get("expires_in", 3600)
+            
+            logger.info(f"Bearer token refreshed. Expires in {data.get('expires_in', 3600)} seconds")
+            return self._bearer_token
+
+    async def _get_headers(self) -> Dict[str, str]:
+        """Get headers with valid bearer token."""
+        token = await self._refresh_token()
+        return {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
+
+    async def chat_completion(
+        self,
+        model_id: str,
+        messages: List[Dict],
+        temperature: float = 1.0,
+        max_tokens: Optional[int] = None,
+        top_p: float = 1.0,
+        stop: Optional[List[str]] = None,
+        stream: bool = False,
+        tools: Optional[List[Dict]] = None,
+        **kwargs,
+    ) -> Dict:
+        """Create a chat completion using watsonx.ai.
+        
+        Args:
+            model_id: The watsonx model ID
+            messages: List of chat messages
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            top_p: Nucleus sampling parameter
+            stop: Stop sequences
+            stream: Whether to stream the response
+            tools: Tool/function definitions
+            **kwargs: Additional parameters
+            
+        Returns:
+            Chat completion response
+        """
+        headers = await self._get_headers()
+        client = await self._get_client()
+        
+        # Build watsonx request
+        payload = {
+            "model_id": model_id,
+            "project_id": self.project_id,
+            "messages": messages,
+            "parameters": {
+                "temperature": temperature,
+                "top_p": top_p,
+            },
+        }
+        
+        if max_tokens:
+            payload["parameters"]["max_tokens"] = max_tokens
+        
+        if stop:
+            payload["parameters"]["stop_sequences"] = stop if isinstance(stop, list) else [stop]
+        
+        if tools:
+            payload["tools"] = tools
+        
+        url = f"{self.base_url}/text/chat"
+        params = {"version": "2024-02-13"}
+        
+        if stream:
+            params["stream"] = "true"
+        
+        response = await client.post(
+            url,
+            headers=headers,
+            json=payload,
+            params=params,
+        )
+        
+        if response.status_code != 200:
+            raise Exception(f"watsonx API error: {response.status_code} - {response.text}")
+        
+        return response.json()
+
+    async def chat_completion_stream(
+        self,
+        model_id: str,
+        messages: List[Dict],
+        temperature: float = 1.0,
+        max_tokens: Optional[int] = None,
+        top_p: float = 1.0,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[Dict]] = None,
+        **kwargs,
+    ) -> AsyncIterator[Dict]:
+        """Stream a chat completion using watsonx.ai.
+        
+        Args:
+            model_id: The watsonx model ID
+            messages: List of chat messages
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            top_p: Nucleus sampling parameter
+            stop: Stop sequences
+            tools: Tool/function definitions
+            **kwargs: Additional parameters
+            
+        Yields:
+            Chat completion chunks
+        """
+        headers = await self._get_headers()
+        client = await self._get_client()
+        
+        # Build watsonx request
+        payload = {
+            "model_id": model_id,
+            "project_id": self.project_id,
+            "messages": messages,
+            "parameters": {
+                "temperature": temperature,
+                "top_p": top_p,
+            },
+        }
+        
+        if max_tokens:
+            payload["parameters"]["max_tokens"] = max_tokens
+        
+        if stop:
+            payload["parameters"]["stop_sequences"] = stop if isinstance(stop, list) else [stop]
+        
+        if tools:
+            payload["tools"] = tools
+        
+        url = f"{self.base_url}/text/chat_stream"
+        params = {"version": "2024-02-13"}
+        
+        async with client.stream(
+            "POST",
+            url,
+            headers=headers,
+            json=payload,
+            params=params,
+        ) as response:
+            if response.status_code != 200:
+                text = await response.aread()
+                raise Exception(f"watsonx API error: {response.status_code} - {text.decode()}")
+            
+            async for line in response.aiter_lines():
+                if line.startswith("data: "):
+                    data = line[6:]  # Remove "data: " prefix
+                    if data.strip() == "[DONE]":
+                        break
+                    try:
+                        import json
+                        yield json.loads(data)
+                    except json.JSONDecodeError:
+                        continue
+
+    async def text_generation(
+        self,
+        model_id: str,
+        prompt: str,
+        temperature: float = 1.0,
+        max_tokens: Optional[int] = None,
+        top_p: float = 1.0,
+        stop: Optional[List[str]] = None,
+        **kwargs,
+    ) -> Dict:
+        """Generate text completion using watsonx.ai.
+        
+        Args:
+            model_id: The watsonx model ID
+            prompt: The input prompt
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            top_p: Nucleus sampling parameter
+            stop: Stop sequences
+            **kwargs: Additional parameters
+            
+        Returns:
+            Text generation response
+        """
+        headers = await self._get_headers()
+        client = await self._get_client()
+        
+        payload = {
+            "model_id": model_id,
+            "project_id": self.project_id,
+            "input": prompt,
+            "parameters": {
+                "temperature": temperature,
+                "top_p": top_p,
+            },
+        }
+        
+        if max_tokens:
+            payload["parameters"]["max_new_tokens"] = max_tokens
+        
+        if stop:
+            payload["parameters"]["stop_sequences"] = stop if isinstance(stop, list) else [stop]
+        
+        url = f"{self.base_url}/text/generation"
+        params = {"version": "2024-02-13"}
+        
+        response = await client.post(
+            url,
+            headers=headers,
+            json=payload,
+            params=params,
+        )
+        
+        if response.status_code != 200:
+            raise Exception(f"watsonx API error: {response.status_code} - {response.text}")
+        
+        return response.json()
+
+    async def embeddings(
+        self,
+        model_id: str,
+        inputs: List[str],
+        **kwargs,
+    ) -> Dict:
+        """Generate embeddings using watsonx.ai.
+        
+        Args:
+            model_id: The watsonx embedding model ID
+            inputs: List of texts to embed
+            **kwargs: Additional parameters
+            
+        Returns:
+            Embeddings response
+        """
+        headers = await self._get_headers()
+        client = await self._get_client()
+        
+        payload = {
+            "model_id": model_id,
+            "project_id": self.project_id,
+            "inputs": inputs,
+        }
+        
+        url = f"{self.base_url}/text/embeddings"
+        params = {"version": "2024-02-13"}
+        
+        response = await client.post(
+            url,
+            headers=headers,
+            json=payload,
+            params=params,
+        )
+        
+        if response.status_code != 200:
+            raise Exception(f"watsonx API error: {response.status_code} - {response.text}")
+        
+        return response.json()
+
+
+# Global service instance
+watsonx_service = WatsonxService()