184 lines
5.6 KiB
Python
184 lines
5.6 KiB
Python
"""Example usage of watsonx-openai-proxy with OpenAI Python SDK."""
|
|
|
|
import os
|
|
from openai import OpenAI
|
|
|
|
# Configure the client to use the proxy
|
|
client = OpenAI(
|
|
base_url="http://localhost:8000/v1",
|
|
api_key=os.getenv("API_KEY", "not-needed-if-proxy-has-no-auth"),
|
|
)
|
|
|
|
|
|
def example_chat_completion():
|
|
"""Example: Basic chat completion."""
|
|
print("\n=== Chat Completion Example ===")
|
|
|
|
response = client.chat.completions.create(
|
|
model="ibm/granite-3-8b-instruct",
|
|
messages=[
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "What is the capital of France?"},
|
|
],
|
|
temperature=0.7,
|
|
max_tokens=100,
|
|
)
|
|
|
|
print(f"Response: {response.choices[0].message.content}")
|
|
print(f"Tokens used: {response.usage.total_tokens}")
|
|
|
|
|
|
def example_streaming_chat():
|
|
"""Example: Streaming chat completion."""
|
|
print("\n=== Streaming Chat Example ===")
|
|
|
|
stream = client.chat.completions.create(
|
|
model="ibm/granite-3-8b-instruct",
|
|
messages=[
|
|
{"role": "user", "content": "Tell me a short story about a robot."},
|
|
],
|
|
stream=True,
|
|
max_tokens=200,
|
|
)
|
|
|
|
print("Response: ", end="", flush=True)
|
|
for chunk in stream:
|
|
if chunk.choices[0].delta.content:
|
|
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
print()
|
|
|
|
|
|
def example_with_model_mapping():
|
|
"""Example: Using mapped model names."""
|
|
print("\n=== Model Mapping Example ===")
|
|
|
|
# If you configured MODEL_MAP_GPT4=ibm/granite-4-h-small in .env
|
|
# you can use "gpt-4" and it will be mapped automatically
|
|
response = client.chat.completions.create(
|
|
model="gpt-4", # This gets mapped to ibm/granite-4-h-small
|
|
messages=[
|
|
{"role": "user", "content": "Explain quantum computing in one sentence."},
|
|
],
|
|
max_tokens=50,
|
|
)
|
|
|
|
print(f"Response: {response.choices[0].message.content}")
|
|
|
|
|
|
def example_embeddings():
|
|
"""Example: Generate embeddings."""
|
|
print("\n=== Embeddings Example ===")
|
|
|
|
response = client.embeddings.create(
|
|
model="ibm/slate-125m-english-rtrvr",
|
|
input=[
|
|
"The quick brown fox jumps over the lazy dog.",
|
|
"Machine learning is a subset of artificial intelligence.",
|
|
],
|
|
)
|
|
|
|
print(f"Generated {len(response.data)} embeddings")
|
|
print(f"Embedding dimension: {len(response.data[0].embedding)}")
|
|
print(f"First embedding (first 5 values): {response.data[0].embedding[:5]}")
|
|
|
|
|
|
def example_list_models():
|
|
"""Example: List available models."""
|
|
print("\n=== List Models Example ===")
|
|
|
|
models = client.models.list()
|
|
|
|
print(f"Available models: {len(models.data)}")
|
|
print("\nFirst 5 models:")
|
|
for model in models.data[:5]:
|
|
print(f" - {model.id}")
|
|
|
|
|
|
def example_completion_legacy():
|
|
"""Example: Legacy completion endpoint."""
|
|
print("\n=== Legacy Completion Example ===")
|
|
|
|
response = client.completions.create(
|
|
model="ibm/granite-3-8b-instruct",
|
|
prompt="Once upon a time, in a land far away,",
|
|
max_tokens=50,
|
|
temperature=0.8,
|
|
)
|
|
|
|
print(f"Completion: {response.choices[0].text}")
|
|
|
|
|
|
def example_with_functions():
|
|
"""Example: Function calling (if supported by model)."""
|
|
print("\n=== Function Calling Example ===")
|
|
|
|
tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_weather",
|
|
"description": "Get the current weather in a location",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
},
|
|
"unit": {
|
|
"type": "string",
|
|
"enum": ["celsius", "fahrenheit"],
|
|
},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
},
|
|
}
|
|
]
|
|
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model="ibm/granite-3-8b-instruct",
|
|
messages=[
|
|
{"role": "user", "content": "What's the weather like in Boston?"},
|
|
],
|
|
tools=tools,
|
|
tool_choice="auto",
|
|
)
|
|
|
|
message = response.choices[0].message
|
|
if message.tool_calls:
|
|
print(f"Function called: {message.tool_calls[0].function.name}")
|
|
print(f"Arguments: {message.tool_calls[0].function.arguments}")
|
|
else:
|
|
print(f"Response: {message.content}")
|
|
except Exception as e:
|
|
print(f"Function calling may not be supported by this model: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("watsonx-openai-proxy Usage Examples")
|
|
print("=" * 50)
|
|
|
|
try:
|
|
# Run examples
|
|
example_chat_completion()
|
|
example_streaming_chat()
|
|
example_embeddings()
|
|
example_list_models()
|
|
example_completion_legacy()
|
|
|
|
# Optional examples (may require specific configuration)
|
|
# example_with_model_mapping()
|
|
# example_with_functions()
|
|
|
|
print("\n" + "=" * 50)
|
|
print("All examples completed successfully!")
|
|
|
|
except Exception as e:
|
|
print(f"\nError: {e}")
|
|
print("\nMake sure:")
|
|
print("1. The proxy server is running (python -m app.main)")
|
|
print("2. Your .env file is configured correctly")
|
|
print("3. You have the OpenAI Python SDK installed (pip install openai)")
|