watsonx-openai-proxy/example_usage.py

"""Example usage of watsonx-openai-proxy with OpenAI Python SDK."""

import os
from openai import OpenAI

# Configure the client to use the proxy
client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key=os.getenv("API_KEY", "not-needed-if-proxy-has-no-auth"),
)


def example_chat_completion():
    """Example: Basic chat completion."""
    print("\n=== Chat Completion Example ===")

    response = client.chat.completions.create(
        model="ibm/granite-3-8b-instruct",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "What is the capital of France?"},
        ],
        temperature=0.7,
        max_tokens=100,
    )

    print(f"Response: {response.choices[0].message.content}")
    print(f"Tokens used: {response.usage.total_tokens}")


def example_streaming_chat():
    """Example: Streaming chat completion."""
    print("\n=== Streaming Chat Example ===")

    stream = client.chat.completions.create(
        model="ibm/granite-3-8b-instruct",
        messages=[
            {"role": "user", "content": "Tell me a short story about a robot."},
        ],
        stream=True,
        max_tokens=200,
    )

    print("Response: ", end="", flush=True)
    for chunk in stream:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
    print()


def example_with_model_mapping():
    """Example: Using mapped model names."""
    print("\n=== Model Mapping Example ===")

    # If you configured MODEL_MAP_GPT4=ibm/granite-4-h-small in .env
    # you can use "gpt-4" and it will be mapped automatically
    response = client.chat.completions.create(
        model="gpt-4",  # This gets mapped to ibm/granite-4-h-small
        messages=[
            {"role": "user", "content": "Explain quantum computing in one sentence."},
        ],
        max_tokens=50,
    )

    print(f"Response: {response.choices[0].message.content}")


def example_embeddings():
    """Example: Generate embeddings."""
    print("\n=== Embeddings Example ===")

    response = client.embeddings.create(
        model="ibm/slate-125m-english-rtrvr",
        input=[
            "The quick brown fox jumps over the lazy dog.",
            "Machine learning is a subset of artificial intelligence.",
        ],
    )

    print(f"Generated {len(response.data)} embeddings")
    print(f"Embedding dimension: {len(response.data[0].embedding)}")
    print(f"First embedding (first 5 values): {response.data[0].embedding[:5]}")


def example_list_models():
    """Example: List available models."""
    print("\n=== List Models Example ===")

    models = client.models.list()

    print(f"Available models: {len(models.data)}")
    print("\nFirst 5 models:")
    for model in models.data[:5]:
        print(f"  - {model.id}")


def example_completion_legacy():
    """Example: Legacy completion endpoint."""
    print("\n=== Legacy Completion Example ===")

    response = client.completions.create(
        model="ibm/granite-3-8b-instruct",
        prompt="Once upon a time, in a land far away,",
        max_tokens=50,
        temperature=0.8,
    )

    print(f"Completion: {response.choices[0].text}")


def example_with_functions():
    """Example: Function calling (if supported by model)."""
    print("\n=== Function Calling Example ===")

    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the current weather in a location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                        },
                    },
                    "required": ["location"],
                },
            },
        }
    ]

    try:
        response = client.chat.completions.create(
            model="ibm/granite-3-8b-instruct",
            messages=[
                {"role": "user", "content": "What's the weather like in Boston?"},
            ],
            tools=tools,
            tool_choice="auto",
        )

        message = response.choices[0].message
        if message.tool_calls:
            print(f"Function called: {message.tool_calls[0].function.name}")
            print(f"Arguments: {message.tool_calls[0].function.arguments}")
        else:
            print(f"Response: {message.content}")
    except Exception as e:
        print(f"Function calling may not be supported by this model: {e}")


if __name__ == "__main__":
    print("watsonx-openai-proxy Usage Examples")
    print("=" * 50)

    try:
        # Run examples
        example_chat_completion()
        example_streaming_chat()
        example_embeddings()
        example_list_models()
        example_completion_legacy()

        # Optional examples (may require specific configuration)
        # example_with_model_mapping()
        # example_with_functions()

        print("\n" + "=" * 50)
        print("All examples completed successfully!")

    except Exception as e:
        print(f"\nError: {e}")
        print("\nMake sure:")
        print("1. The proxy server is running (python -m app.main)")
        print("2. Your .env file is configured correctly")
        print("3. You have the OpenAI Python SDK installed (pip install openai)")