- Model dropdown below mode switch applies to active agents - In work mode: changes model for Rex/Maddy/Coder/Research + subagents - In lab mode: changes model for Eric - /api/apply-model endpoint: swap model + VRAM in current mode - /api/switch accepts optional model param - Removed separate lab model card (consolidated into one selector)
372 lines
12 KiB
Python
372 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Ollama GPU Switcher — Toggle OpenClaw agents between work mode (qwen3) and lab mode (GPU exclusive).
|
|
No LLM involved. Reads/writes openclaw.json directly, then signals the gateway to restart.
|
|
Also manages ollama model loading/pinning via the ollama API.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import signal
|
|
import subprocess
|
|
import threading
|
|
from flask import Flask, jsonify, request, send_from_directory
|
|
import requests as http_requests
|
|
|
|
app = Flask(__name__, static_folder="static")
|
|
|
|
CONFIG_PATH = os.environ.get("OPENCLAW_CONFIG", os.path.expanduser("~/.openclaw/openclaw.json"))
|
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://ollama.glenwood.schapira.nyc:11434")
|
|
|
|
# Agents that use ollama and compete for GPU
|
|
OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"]
|
|
|
|
WORK_PRIMARY = "ollama/qwen3-128k:14b"
|
|
LAB_PRIMARY = "groq/llama-3.3-70b-versatile"
|
|
|
|
# Model loading state (tracked in-process)
|
|
_loading_state = {"model": None, "status": "idle"} # idle | loading | done | error
|
|
_loading_lock = threading.Lock()
|
|
|
|
|
|
def read_config():
|
|
with open(CONFIG_PATH, "r") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def write_config(config):
|
|
with open(CONFIG_PATH, "w") as f:
|
|
json.dump(config, f, indent=2)
|
|
f.write("\n")
|
|
|
|
|
|
def restart_gateway():
|
|
"""Restart the openclaw gateway via CLI."""
|
|
try:
|
|
subprocess.run(["openclaw", "gateway", "restart"], timeout=10, capture_output=True)
|
|
return True
|
|
except Exception:
|
|
try:
|
|
result = subprocess.run(["pgrep", "-f", "openclaw.*gateway"], capture_output=True, text=True)
|
|
if result.stdout.strip():
|
|
pid = int(result.stdout.strip().split("\n")[0])
|
|
os.kill(pid, signal.SIGUSR1)
|
|
return True
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
|
|
def find_agent(config, agent_id):
|
|
for agent in config.get("agents", {}).get("list", []):
|
|
if agent.get("id") == agent_id:
|
|
return agent
|
|
return None
|
|
|
|
|
|
def detect_mode(config):
|
|
ollama_count = 0
|
|
groq_count = 0
|
|
for agent_id in OLLAMA_AGENTS:
|
|
agent = find_agent(config, agent_id)
|
|
if agent:
|
|
primary = agent.get("model", {}).get("primary", "")
|
|
if "ollama/" in primary:
|
|
ollama_count += 1
|
|
elif "groq/" in primary:
|
|
groq_count += 1
|
|
|
|
if ollama_count == len(OLLAMA_AGENTS):
|
|
return "work"
|
|
elif groq_count >= len(OLLAMA_AGENTS):
|
|
return "lab"
|
|
return "mixed"
|
|
|
|
|
|
def ollama_ps():
|
|
"""Get currently loaded models from ollama."""
|
|
try:
|
|
r = http_requests.get(f"{OLLAMA_URL}/api/ps", timeout=5)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
models = []
|
|
for m in data.get("models", []):
|
|
size_gb = m.get("size_vram", 0) / (1024**3)
|
|
models.append({
|
|
"name": m.get("name", "unknown"),
|
|
"size_vram_gb": round(size_gb, 1),
|
|
"parameter_size": m.get("details", {}).get("parameter_size", ""),
|
|
"quantization": m.get("details", {}).get("quantization_level", ""),
|
|
"family": m.get("details", {}).get("family", ""),
|
|
"context_length": m.get("context_length", 0),
|
|
"expires_at": m.get("expires_at", ""),
|
|
})
|
|
return {"ok": True, "models": models}
|
|
except Exception as e:
|
|
return {"ok": False, "models": [], "error": str(e)}
|
|
|
|
|
|
def ollama_load_model(model_name, keep_alive="-1m"):
|
|
"""Load a model into VRAM and pin it. keep_alive=-1m means forever."""
|
|
global _loading_state
|
|
with _loading_lock:
|
|
_loading_state = {"model": model_name, "status": "loading"}
|
|
|
|
try:
|
|
# Use /api/generate with empty prompt to load & pin the model
|
|
r = http_requests.post(
|
|
f"{OLLAMA_URL}/api/generate",
|
|
json={
|
|
"model": model_name,
|
|
"prompt": "",
|
|
"keep_alive": keep_alive,
|
|
},
|
|
timeout=300, # models can take a while to load
|
|
)
|
|
r.raise_for_status()
|
|
with _loading_lock:
|
|
_loading_state = {"model": model_name, "status": "done"}
|
|
return True
|
|
except Exception as e:
|
|
with _loading_lock:
|
|
_loading_state = {"model": model_name, "status": "error", "error": str(e)}
|
|
return False
|
|
|
|
|
|
def ollama_unload_model(model_name):
|
|
"""Unload a model from VRAM."""
|
|
try:
|
|
r = http_requests.post(
|
|
f"{OLLAMA_URL}/api/generate",
|
|
json={
|
|
"model": model_name,
|
|
"prompt": "",
|
|
"keep_alive": "0",
|
|
},
|
|
timeout=30,
|
|
)
|
|
r.raise_for_status()
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def load_model_async(model_name):
|
|
"""Load model in background thread."""
|
|
t = threading.Thread(target=ollama_load_model, args=(model_name,), daemon=True)
|
|
t.start()
|
|
|
|
|
|
# --- Routes ---
|
|
|
|
@app.route("/")
|
|
def index():
|
|
return send_from_directory("static", "index.html")
|
|
|
|
|
|
@app.route("/api/status")
|
|
def status():
|
|
try:
|
|
config = read_config()
|
|
mode = detect_mode(config)
|
|
|
|
agent_details = []
|
|
for agent_id in OLLAMA_AGENTS:
|
|
agent = find_agent(config, agent_id)
|
|
if agent:
|
|
agent_details.append({
|
|
"id": agent["id"],
|
|
"name": agent.get("name", agent["id"]),
|
|
"model": agent.get("model", {}).get("primary", "unknown"),
|
|
})
|
|
|
|
lab = find_agent(config, "lab")
|
|
lab_info = {
|
|
"name": lab.get("name", "Eric") if lab else "Eric",
|
|
"model": lab.get("model", {}).get("primary", "unknown") if lab else "unknown",
|
|
}
|
|
|
|
subagents_primary = (
|
|
config.get("agents", {})
|
|
.get("defaults", {})
|
|
.get("subagents", {})
|
|
.get("model", {})
|
|
.get("primary", "unknown")
|
|
)
|
|
|
|
# Determine active ollama model based on mode
|
|
if mode == "work":
|
|
active_ollama = agent_details[0]["model"] if agent_details else WORK_PRIMARY
|
|
elif mode == "lab":
|
|
active_ollama = lab_info["model"]
|
|
else:
|
|
active_ollama = "unknown"
|
|
|
|
return jsonify({
|
|
"ok": True,
|
|
"mode": mode,
|
|
"lab": lab_info,
|
|
"agents": agent_details,
|
|
"subagentsPrimary": subagents_primary,
|
|
"activeOllamaModel": active_ollama,
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"ok": False, "error": str(e)}), 500
|
|
|
|
|
|
@app.route("/api/ollama")
|
|
def ollama_status():
|
|
"""Get ollama loaded models + loading state."""
|
|
ps = ollama_ps()
|
|
with _loading_lock:
|
|
loading = dict(_loading_state)
|
|
ps["loading"] = loading
|
|
return jsonify(ps)
|
|
|
|
|
|
def apply_model_to_agents(config, ollama_model, mode):
|
|
"""Apply the selected ollama model to the appropriate agents based on mode.
|
|
In work mode: update work agents (rex, maddy, coder, research) + subagents default.
|
|
In lab mode: update lab agent.
|
|
"""
|
|
if mode == "work":
|
|
for agent_id in OLLAMA_AGENTS:
|
|
agent = find_agent(config, agent_id)
|
|
if agent:
|
|
agent.setdefault("model", {})["primary"] = ollama_model
|
|
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
|
|
config["agents"]["defaults"]["subagents"]["model"]["primary"] = ollama_model
|
|
elif mode == "lab":
|
|
lab = find_agent(config, "lab")
|
|
if lab:
|
|
lab.setdefault("model", {})["primary"] = ollama_model
|
|
|
|
|
|
@app.route("/api/switch", methods=["POST"])
|
|
def switch():
|
|
try:
|
|
data = request.json or {}
|
|
target_mode = data.get("mode", "work")
|
|
selected_model = data.get("model", None)
|
|
|
|
if target_mode not in ("work", "lab"):
|
|
return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400
|
|
|
|
config = read_config()
|
|
|
|
if target_mode == "lab":
|
|
# Move work agents to groq
|
|
for agent_id in OLLAMA_AGENTS:
|
|
agent = find_agent(config, agent_id)
|
|
if agent:
|
|
agent.setdefault("model", {})["primary"] = LAB_PRIMARY
|
|
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
|
|
config["agents"]["defaults"]["subagents"]["model"]["primary"] = LAB_PRIMARY
|
|
|
|
# If a model was selected, set it as the lab model
|
|
if selected_model and "ollama/" in selected_model:
|
|
lab = find_agent(config, "lab")
|
|
if lab:
|
|
lab.setdefault("model", {})["primary"] = selected_model
|
|
|
|
# Determine which ollama model to load for lab
|
|
lab = find_agent(config, "lab")
|
|
lab_model = lab.get("model", {}).get("primary", "") if lab else ""
|
|
target_ollama_model = lab_model.replace("ollama/", "") if "ollama/" in lab_model else None
|
|
|
|
elif target_mode == "work":
|
|
# Use selected model or default
|
|
work_model = selected_model if selected_model and "ollama/" in selected_model else WORK_PRIMARY
|
|
for agent_id in OLLAMA_AGENTS:
|
|
agent = find_agent(config, agent_id)
|
|
if agent:
|
|
agent.setdefault("model", {})["primary"] = work_model
|
|
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
|
|
config["agents"]["defaults"]["subagents"]["model"]["primary"] = work_model
|
|
target_ollama_model = work_model.replace("ollama/", "")
|
|
|
|
write_config(config)
|
|
restarted = restart_gateway()
|
|
|
|
# Swap VRAM: unload old, load+pin new
|
|
if target_ollama_model:
|
|
ps = ollama_ps()
|
|
for m in ps.get("models", []):
|
|
if m["name"] != target_ollama_model:
|
|
ollama_unload_model(m["name"])
|
|
load_model_async(target_ollama_model)
|
|
|
|
return jsonify({
|
|
"ok": True,
|
|
"mode": target_mode,
|
|
"restarted": restarted,
|
|
"loading_model": target_ollama_model,
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"ok": False, "error": str(e)}), 500
|
|
|
|
|
|
@app.route("/api/apply-model", methods=["POST"])
|
|
def apply_model():
|
|
"""Change the ollama model for agents in the current mode and swap VRAM."""
|
|
try:
|
|
data = request.json or {}
|
|
model = data.get("model", "")
|
|
if not model or "ollama/" not in model:
|
|
return jsonify({"ok": False, "error": "Must be an ollama model"}), 400
|
|
|
|
config = read_config()
|
|
mode = detect_mode(config)
|
|
|
|
apply_model_to_agents(config, model, mode)
|
|
write_config(config)
|
|
restarted = restart_gateway()
|
|
|
|
# Swap VRAM
|
|
ollama_model = model.replace("ollama/", "")
|
|
ps = ollama_ps()
|
|
for m in ps.get("models", []):
|
|
if m["name"] != ollama_model:
|
|
ollama_unload_model(m["name"])
|
|
load_model_async(ollama_model)
|
|
|
|
return jsonify({
|
|
"ok": True,
|
|
"model": model,
|
|
"mode": mode,
|
|
"restarted": restarted,
|
|
"loading_model": ollama_model,
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"ok": False, "error": str(e)}), 500
|
|
|
|
|
|
@app.route("/api/ollama/load", methods=["POST"])
|
|
def load_model():
|
|
"""Manually load/pin a model."""
|
|
data = request.json or {}
|
|
model = data.get("model", "")
|
|
if not model:
|
|
return jsonify({"ok": False, "error": "No model specified"}), 400
|
|
load_model_async(model)
|
|
return jsonify({"ok": True, "loading": model})
|
|
|
|
|
|
@app.route("/api/ollama/unload", methods=["POST"])
|
|
def unload_model():
|
|
"""Manually unload a model."""
|
|
data = request.json or {}
|
|
model = data.get("model", "")
|
|
if not model:
|
|
return jsonify({"ok": False, "error": "No model specified"}), 400
|
|
result = ollama_unload_model(model)
|
|
return jsonify({"ok": result, "unloaded": model})
|
|
|
|
|
|
if __name__ == "__main__":
|
|
port = int(os.environ.get("PORT", 8585))
|
|
print(f"🔀 Ollama GPU Switcher running on http://0.0.0.0:{port}")
|
|
print(f"📄 Config: {CONFIG_PATH}")
|
|
print(f"🦙 Ollama: {OLLAMA_URL}")
|
|
app.run(host="0.0.0.0", port=port, debug=False)
|