Files
Norbert 786a72c06d feat: unified model selector below mode toggle
- Model dropdown below mode switch applies to active agents
- In work mode: changes model for Rex/Maddy/Coder/Research + subagents
- In lab mode: changes model for Eric
- /api/apply-model endpoint: swap model + VRAM in current mode
- /api/switch accepts optional model param
- Removed separate lab model card (consolidated into one selector)
2026-02-18 19:51:02 +00:00

372 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Ollama GPU Switcher — Toggle OpenClaw agents between work mode (qwen3) and lab mode (GPU exclusive).
No LLM involved. Reads/writes openclaw.json directly, then signals the gateway to restart.
Also manages ollama model loading/pinning via the ollama API.
"""
import json
import os
import signal
import subprocess
import threading
from flask import Flask, jsonify, request, send_from_directory
import requests as http_requests
app = Flask(__name__, static_folder="static")
CONFIG_PATH = os.environ.get("OPENCLAW_CONFIG", os.path.expanduser("~/.openclaw/openclaw.json"))
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://ollama.glenwood.schapira.nyc:11434")
# Agents that use ollama and compete for GPU
OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"]
WORK_PRIMARY = "ollama/qwen3-128k:14b"
LAB_PRIMARY = "groq/llama-3.3-70b-versatile"
# Model loading state (tracked in-process)
_loading_state = {"model": None, "status": "idle"} # idle | loading | done | error
_loading_lock = threading.Lock()
def read_config():
with open(CONFIG_PATH, "r") as f:
return json.load(f)
def write_config(config):
with open(CONFIG_PATH, "w") as f:
json.dump(config, f, indent=2)
f.write("\n")
def restart_gateway():
"""Restart the openclaw gateway via CLI."""
try:
subprocess.run(["openclaw", "gateway", "restart"], timeout=10, capture_output=True)
return True
except Exception:
try:
result = subprocess.run(["pgrep", "-f", "openclaw.*gateway"], capture_output=True, text=True)
if result.stdout.strip():
pid = int(result.stdout.strip().split("\n")[0])
os.kill(pid, signal.SIGUSR1)
return True
except Exception:
pass
return False
def find_agent(config, agent_id):
for agent in config.get("agents", {}).get("list", []):
if agent.get("id") == agent_id:
return agent
return None
def detect_mode(config):
ollama_count = 0
groq_count = 0
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
primary = agent.get("model", {}).get("primary", "")
if "ollama/" in primary:
ollama_count += 1
elif "groq/" in primary:
groq_count += 1
if ollama_count == len(OLLAMA_AGENTS):
return "work"
elif groq_count >= len(OLLAMA_AGENTS):
return "lab"
return "mixed"
def ollama_ps():
"""Get currently loaded models from ollama."""
try:
r = http_requests.get(f"{OLLAMA_URL}/api/ps", timeout=5)
r.raise_for_status()
data = r.json()
models = []
for m in data.get("models", []):
size_gb = m.get("size_vram", 0) / (1024**3)
models.append({
"name": m.get("name", "unknown"),
"size_vram_gb": round(size_gb, 1),
"parameter_size": m.get("details", {}).get("parameter_size", ""),
"quantization": m.get("details", {}).get("quantization_level", ""),
"family": m.get("details", {}).get("family", ""),
"context_length": m.get("context_length", 0),
"expires_at": m.get("expires_at", ""),
})
return {"ok": True, "models": models}
except Exception as e:
return {"ok": False, "models": [], "error": str(e)}
def ollama_load_model(model_name, keep_alive="-1m"):
"""Load a model into VRAM and pin it. keep_alive=-1m means forever."""
global _loading_state
with _loading_lock:
_loading_state = {"model": model_name, "status": "loading"}
try:
# Use /api/generate with empty prompt to load & pin the model
r = http_requests.post(
f"{OLLAMA_URL}/api/generate",
json={
"model": model_name,
"prompt": "",
"keep_alive": keep_alive,
},
timeout=300, # models can take a while to load
)
r.raise_for_status()
with _loading_lock:
_loading_state = {"model": model_name, "status": "done"}
return True
except Exception as e:
with _loading_lock:
_loading_state = {"model": model_name, "status": "error", "error": str(e)}
return False
def ollama_unload_model(model_name):
"""Unload a model from VRAM."""
try:
r = http_requests.post(
f"{OLLAMA_URL}/api/generate",
json={
"model": model_name,
"prompt": "",
"keep_alive": "0",
},
timeout=30,
)
r.raise_for_status()
return True
except Exception:
return False
def load_model_async(model_name):
"""Load model in background thread."""
t = threading.Thread(target=ollama_load_model, args=(model_name,), daemon=True)
t.start()
# --- Routes ---
@app.route("/")
def index():
return send_from_directory("static", "index.html")
@app.route("/api/status")
def status():
try:
config = read_config()
mode = detect_mode(config)
agent_details = []
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
agent_details.append({
"id": agent["id"],
"name": agent.get("name", agent["id"]),
"model": agent.get("model", {}).get("primary", "unknown"),
})
lab = find_agent(config, "lab")
lab_info = {
"name": lab.get("name", "Eric") if lab else "Eric",
"model": lab.get("model", {}).get("primary", "unknown") if lab else "unknown",
}
subagents_primary = (
config.get("agents", {})
.get("defaults", {})
.get("subagents", {})
.get("model", {})
.get("primary", "unknown")
)
# Determine active ollama model based on mode
if mode == "work":
active_ollama = agent_details[0]["model"] if agent_details else WORK_PRIMARY
elif mode == "lab":
active_ollama = lab_info["model"]
else:
active_ollama = "unknown"
return jsonify({
"ok": True,
"mode": mode,
"lab": lab_info,
"agents": agent_details,
"subagentsPrimary": subagents_primary,
"activeOllamaModel": active_ollama,
})
except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/ollama")
def ollama_status():
"""Get ollama loaded models + loading state."""
ps = ollama_ps()
with _loading_lock:
loading = dict(_loading_state)
ps["loading"] = loading
return jsonify(ps)
def apply_model_to_agents(config, ollama_model, mode):
"""Apply the selected ollama model to the appropriate agents based on mode.
In work mode: update work agents (rex, maddy, coder, research) + subagents default.
In lab mode: update lab agent.
"""
if mode == "work":
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
agent.setdefault("model", {})["primary"] = ollama_model
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
config["agents"]["defaults"]["subagents"]["model"]["primary"] = ollama_model
elif mode == "lab":
lab = find_agent(config, "lab")
if lab:
lab.setdefault("model", {})["primary"] = ollama_model
@app.route("/api/switch", methods=["POST"])
def switch():
try:
data = request.json or {}
target_mode = data.get("mode", "work")
selected_model = data.get("model", None)
if target_mode not in ("work", "lab"):
return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400
config = read_config()
if target_mode == "lab":
# Move work agents to groq
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
agent.setdefault("model", {})["primary"] = LAB_PRIMARY
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
config["agents"]["defaults"]["subagents"]["model"]["primary"] = LAB_PRIMARY
# If a model was selected, set it as the lab model
if selected_model and "ollama/" in selected_model:
lab = find_agent(config, "lab")
if lab:
lab.setdefault("model", {})["primary"] = selected_model
# Determine which ollama model to load for lab
lab = find_agent(config, "lab")
lab_model = lab.get("model", {}).get("primary", "") if lab else ""
target_ollama_model = lab_model.replace("ollama/", "") if "ollama/" in lab_model else None
elif target_mode == "work":
# Use selected model or default
work_model = selected_model if selected_model and "ollama/" in selected_model else WORK_PRIMARY
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
agent.setdefault("model", {})["primary"] = work_model
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
config["agents"]["defaults"]["subagents"]["model"]["primary"] = work_model
target_ollama_model = work_model.replace("ollama/", "")
write_config(config)
restarted = restart_gateway()
# Swap VRAM: unload old, load+pin new
if target_ollama_model:
ps = ollama_ps()
for m in ps.get("models", []):
if m["name"] != target_ollama_model:
ollama_unload_model(m["name"])
load_model_async(target_ollama_model)
return jsonify({
"ok": True,
"mode": target_mode,
"restarted": restarted,
"loading_model": target_ollama_model,
})
except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/apply-model", methods=["POST"])
def apply_model():
"""Change the ollama model for agents in the current mode and swap VRAM."""
try:
data = request.json or {}
model = data.get("model", "")
if not model or "ollama/" not in model:
return jsonify({"ok": False, "error": "Must be an ollama model"}), 400
config = read_config()
mode = detect_mode(config)
apply_model_to_agents(config, model, mode)
write_config(config)
restarted = restart_gateway()
# Swap VRAM
ollama_model = model.replace("ollama/", "")
ps = ollama_ps()
for m in ps.get("models", []):
if m["name"] != ollama_model:
ollama_unload_model(m["name"])
load_model_async(ollama_model)
return jsonify({
"ok": True,
"model": model,
"mode": mode,
"restarted": restarted,
"loading_model": ollama_model,
})
except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/ollama/load", methods=["POST"])
def load_model():
"""Manually load/pin a model."""
data = request.json or {}
model = data.get("model", "")
if not model:
return jsonify({"ok": False, "error": "No model specified"}), 400
load_model_async(model)
return jsonify({"ok": True, "loading": model})
@app.route("/api/ollama/unload", methods=["POST"])
def unload_model():
"""Manually unload a model."""
data = request.json or {}
model = data.get("model", "")
if not model:
return jsonify({"ok": False, "error": "No model specified"}), 400
result = ollama_unload_model(model)
return jsonify({"ok": result, "unloaded": model})
if __name__ == "__main__":
port = int(os.environ.get("PORT", 8585))
print(f"🔀 Ollama GPU Switcher running on http://0.0.0.0:{port}")
print(f"📄 Config: {CONFIG_PATH}")
print(f"🦙 Ollama: {OLLAMA_URL}")
app.run(host="0.0.0.0", port=port, debug=False)