diff --git a/app.py b/app.py index 3722da4..0112375 100644 --- a/app.py +++ b/app.py @@ -2,18 +2,21 @@ """ Ollama GPU Switcher โ€” Toggle OpenClaw agents between work mode (qwen3) and lab mode (GPU exclusive). No LLM involved. Reads/writes openclaw.json directly, then signals the gateway to restart. +Also manages ollama model loading/pinning via the ollama API. """ import json import os import signal import subprocess -import copy +import threading from flask import Flask, jsonify, request, send_from_directory +import requests as http_requests app = Flask(__name__, static_folder="static") CONFIG_PATH = os.environ.get("OPENCLAW_CONFIG", os.path.expanduser("~/.openclaw/openclaw.json")) +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://ollama.glenwood.schapira.nyc:11434") # Agents that use ollama and compete for GPU OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"] @@ -21,6 +24,10 @@ OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"] WORK_PRIMARY = "ollama/qwen3-128k:14b" LAB_PRIMARY = "groq/llama-3.3-70b-versatile" +# Model loading state (tracked in-process) +_loading_state = {"model": None, "status": "idle"} # idle | loading | done | error +_loading_lock = threading.Lock() + def read_config(): with open(CONFIG_PATH, "r") as f: @@ -39,7 +46,6 @@ def restart_gateway(): subprocess.run(["openclaw", "gateway", "restart"], timeout=10, capture_output=True) return True except Exception: - # Fallback: try SIGUSR1 to the gateway process try: result = subprocess.run(["pgrep", "-f", "openclaw.*gateway"], capture_output=True, text=True) if result.stdout.strip(): @@ -77,6 +83,82 @@ def detect_mode(config): return "mixed" +def ollama_ps(): + """Get currently loaded models from ollama.""" + try: + r = http_requests.get(f"{OLLAMA_URL}/api/ps", timeout=5) + r.raise_for_status() + data = r.json() + models = [] + for m in data.get("models", []): + size_gb = m.get("size_vram", 0) / (1024**3) + models.append({ + "name": m.get("name", "unknown"), + "size_vram_gb": round(size_gb, 1), + "parameter_size": m.get("details", {}).get("parameter_size", ""), + "quantization": m.get("details", {}).get("quantization_level", ""), + "family": m.get("details", {}).get("family", ""), + "context_length": m.get("context_length", 0), + "expires_at": m.get("expires_at", ""), + }) + return {"ok": True, "models": models} + except Exception as e: + return {"ok": False, "models": [], "error": str(e)} + + +def ollama_load_model(model_name, keep_alive="-1m"): + """Load a model into VRAM and pin it. keep_alive=-1m means forever.""" + global _loading_state + with _loading_lock: + _loading_state = {"model": model_name, "status": "loading"} + + try: + # Use /api/generate with empty prompt to load & pin the model + r = http_requests.post( + f"{OLLAMA_URL}/api/generate", + json={ + "model": model_name, + "prompt": "", + "keep_alive": keep_alive, + }, + timeout=300, # models can take a while to load + ) + r.raise_for_status() + with _loading_lock: + _loading_state = {"model": model_name, "status": "done"} + return True + except Exception as e: + with _loading_lock: + _loading_state = {"model": model_name, "status": "error", "error": str(e)} + return False + + +def ollama_unload_model(model_name): + """Unload a model from VRAM.""" + try: + r = http_requests.post( + f"{OLLAMA_URL}/api/generate", + json={ + "model": model_name, + "prompt": "", + "keep_alive": "0", + }, + timeout=30, + ) + r.raise_for_status() + return True + except Exception: + return False + + +def load_model_async(model_name): + """Load model in background thread.""" + t = threading.Thread(target=ollama_load_model, args=(model_name,), daemon=True) + t.start() + + +# --- Routes --- + @app.route("/") def index(): return send_from_directory("static", "index.html") @@ -104,7 +186,6 @@ def status(): "model": lab.get("model", {}).get("primary", "unknown") if lab else "unknown", } - # Subagents default subagents_primary = ( config.get("agents", {}) .get("defaults", {}) @@ -124,6 +205,16 @@ def status(): return jsonify({"ok": False, "error": str(e)}), 500 +@app.route("/api/ollama") +def ollama_status(): + """Get ollama loaded models + loading state.""" + ps = ollama_ps() + with _loading_lock: + loading = dict(_loading_state) + ps["loading"] = loading + return jsonify(ps) + + @app.route("/api/switch", methods=["POST"]) def switch(): try: @@ -132,13 +223,23 @@ def switch(): if target_mode == "lab": new_primary = LAB_PRIMARY + target_ollama_model = None # lab model is managed separately elif target_mode == "work": new_primary = WORK_PRIMARY + target_ollama_model = "qwen3-128k:14b" else: return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400 config = read_config() + # Determine which ollama model to load based on mode + if target_mode == "lab": + lab = find_agent(config, "lab") + if lab: + lab_model = lab.get("model", {}).get("primary", "") + if "ollama/" in lab_model: + target_ollama_model = lab_model.replace("ollama/", "") + # Patch each agent's primary model for agent_id in OLLAMA_AGENTS: agent = find_agent(config, agent_id) @@ -154,10 +255,21 @@ def switch(): write_config(config) restarted = restart_gateway() + # Unload current models and load the target model + if target_ollama_model: + # First unload anything currently loaded + ps = ollama_ps() + for m in ps.get("models", []): + if m["name"] != target_ollama_model: + ollama_unload_model(m["name"]) + # Load and pin the target model async + load_model_async(target_ollama_model) + return jsonify({ "ok": True, "mode": target_mode, "restarted": restarted, + "loading_model": target_ollama_model, }) except Exception as e: return jsonify({"ok": False, "error": str(e)}), 500 @@ -183,13 +295,53 @@ def set_lab_model(): write_config(config) restarted = restart_gateway() - return jsonify({"ok": True, "model": model, "restarted": restarted}) + # If currently in lab mode, load the new model + mode = detect_mode(config) + ollama_model_name = None + if mode == "lab" and "ollama/" in model: + ollama_model_name = model.replace("ollama/", "") + # Unload old models first + ps = ollama_ps() + for m in ps.get("models", []): + if m["name"] != ollama_model_name: + ollama_unload_model(m["name"]) + load_model_async(ollama_model_name) + + return jsonify({ + "ok": True, + "model": model, + "restarted": restarted, + "loading_model": ollama_model_name, + }) except Exception as e: return jsonify({"ok": False, "error": str(e)}), 500 +@app.route("/api/ollama/load", methods=["POST"]) +def load_model(): + """Manually load/pin a model.""" + data = request.json or {} + model = data.get("model", "") + if not model: + return jsonify({"ok": False, "error": "No model specified"}), 400 + load_model_async(model) + return jsonify({"ok": True, "loading": model}) + + +@app.route("/api/ollama/unload", methods=["POST"]) +def unload_model(): + """Manually unload a model.""" + data = request.json or {} + model = data.get("model", "") + if not model: + return jsonify({"ok": False, "error": "No model specified"}), 400 + result = ollama_unload_model(model) + return jsonify({"ok": result, "unloaded": model}) + + if __name__ == "__main__": port = int(os.environ.get("PORT", 8585)) print(f"๐Ÿ”€ Ollama GPU Switcher running on http://0.0.0.0:{port}") print(f"๐Ÿ“„ Config: {CONFIG_PATH}") + print(f"๐Ÿฆ™ Ollama: {OLLAMA_URL}") app.run(host="0.0.0.0", port=port, debug=False) diff --git a/static/index.html b/static/index.html index c7d6054..5519319 100644 --- a/static/index.html +++ b/static/index.html @@ -18,6 +18,7 @@ --blue: #58a6ff; --red: #f85149; --purple: #bc8cff; + --yellow: #d29922; } * { box-sizing: border-box; margin: 0; padding: 0; } @@ -33,17 +34,8 @@ padding: 2rem 1rem; } - h1 { - font-size: 1.5rem; - font-weight: 600; - margin-bottom: 0.5rem; - } - - .subtitle { - color: var(--text-dim); - font-size: 0.875rem; - margin-bottom: 2rem; - } + h1 { font-size: 1.5rem; font-weight: 600; margin-bottom: 0.5rem; } + .subtitle { color: var(--text-dim); font-size: 0.875rem; margin-bottom: 2rem; } .card { background: var(--surface); @@ -51,7 +43,7 @@ border-radius: 12px; padding: 1.5rem; width: 100%; - max-width: 480px; + max-width: 520px; margin-bottom: 1rem; } @@ -73,8 +65,7 @@ } .mode-dot { - width: 12px; - height: 12px; + width: 12px; height: 12px; border-radius: 50%; flex-shrink: 0; } @@ -83,15 +74,11 @@ .mode-dot.lab { background: var(--orange); box-shadow: 0 0 8px var(--orange); } .mode-dot.mixed { background: var(--purple); box-shadow: 0 0 8px var(--purple); } - .mode-label { - font-size: 1.25rem; - font-weight: 600; - } + .mode-label { font-size: 1.25rem; font-weight: 600; } /* Toggle switch */ .toggle-container { display: flex; - gap: 0; border-radius: 8px; overflow: hidden; border: 1px solid var(--border); @@ -110,26 +97,12 @@ } .toggle-btn:hover { background: rgba(255,255,255,0.05); } - - .toggle-btn.active-work { - background: var(--green-dim); - color: white; - } - - .toggle-btn.active-lab { - background: var(--orange-dim); - color: white; - } - - .toggle-btn:disabled { - opacity: 0.5; - cursor: wait; - } + .toggle-btn.active-work { background: var(--green-dim); color: white; } + .toggle-btn.active-lab { background: var(--orange-dim); color: white; } + .toggle-btn:disabled { opacity: 0.5; cursor: wait; } /* Agent list */ - .agent-list { - list-style: none; - } + .agent-list { list-style: none; } .agent-item { display: flex; @@ -138,21 +111,110 @@ padding: 0.5rem 0; border-bottom: 1px solid var(--border); } - .agent-item:last-child { border-bottom: none; } - .agent-name { + .agent-name { font-weight: 500; } + + .model-tag { + font-size: 0.8rem; + font-family: 'SF Mono', SFMono-Regular, Consolas, monospace; + padding: 2px 8px; + border-radius: 4px; + background: rgba(255,255,255,0.05); + } + .model-tag.ollama { color: var(--green); border: 1px solid rgba(63,185,80,0.3); } + .model-tag.groq { color: var(--blue); border: 1px solid rgba(88,166,255,0.3); } + + /* Ollama status */ + .ollama-card { border-color: var(--green-dim); } + + .ollama-model { + display: flex; + justify-content: space-between; + align-items: center; + padding: 0.75rem; + background: rgba(255,255,255,0.03); + border-radius: 8px; + margin-bottom: 0.5rem; + } + + .ollama-model-info { display: flex; flex-direction: column; gap: 0.25rem; } + + .ollama-model-name { + font-weight: 600; + font-family: 'SF Mono', SFMono-Regular, Consolas, monospace; + font-size: 0.95rem; + } + + .ollama-model-meta { + font-size: 0.75rem; + color: var(--text-dim); + } + + .ollama-model-size { + font-size: 0.85rem; + font-weight: 600; + color: var(--blue); + } + + .ollama-empty { + text-align: center; + padding: 1rem; + color: var(--text-dim); + font-style: italic; + } + + /* Loading indicator */ + .loading-banner { + display: flex; + align-items: center; + gap: 0.75rem; + padding: 0.75rem 1rem; + background: rgba(210,153,34,0.1); + border: 1px solid rgba(210,153,34,0.3); + border-radius: 8px; + margin-bottom: 1rem; + } + + .loading-banner.done { + background: rgba(63,185,80,0.1); + border-color: rgba(63,185,80,0.3); + } + + .loading-banner.error { + background: rgba(248,81,73,0.1); + border-color: rgba(248,81,73,0.3); + } + + .loading-text { + font-size: 0.85rem; font-weight: 500; } - .agent-model { - font-size: 0.8rem; - color: var(--text-dim); - font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace; + .loading-banner .loading-text { color: var(--yellow); } + .loading-banner.done .loading-text { color: var(--green); } + .loading-banner.error .loading-text { color: var(--red); } + + @keyframes pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.4; } } - .agent-model.ollama { color: var(--green); } - .agent-model.groq { color: var(--blue); } + .pulse { animation: pulse 1.5s ease-in-out infinite; } + + @keyframes spin { + to { transform: rotate(360deg); } + } + + .spinner { + display: inline-block; + width: 16px; height: 16px; + border: 2px solid var(--border); + border-top-color: var(--yellow); + border-radius: 50%; + animation: spin 0.8s linear infinite; + flex-shrink: 0; + } /* Lab model selector */ .lab-model-row { @@ -170,10 +232,9 @@ background: var(--bg); color: var(--text); font-size: 0.85rem; - font-family: inherit; } - .lab-model-row button { + .lab-model-row button, .action-btn { padding: 0.5rem 1rem; border-radius: 6px; border: 1px solid var(--border); @@ -184,11 +245,8 @@ transition: all 0.2s; } - .lab-model-row button:hover { - background: rgba(255,255,255,0.1); - } + .lab-model-row button:hover, .action-btn:hover { background: rgba(255,255,255,0.1); } - /* Status bar */ .status-bar { text-align: center; font-size: 0.8rem; @@ -196,31 +254,30 @@ margin-top: 1rem; min-height: 1.2em; } - .status-bar.error { color: var(--red); } .status-bar.success { color: var(--green); } - /* Loading */ - .loading { - text-align: center; - padding: 2rem; + .vram-bar-container { + margin-top: 0.75rem; + background: rgba(255,255,255,0.05); + border-radius: 4px; + height: 8px; + overflow: hidden; + } + + .vram-bar { + height: 100%; + border-radius: 4px; + transition: width 0.5s ease; + background: var(--green); + } + + .vram-label { + display: flex; + justify-content: space-between; + font-size: 0.7rem; color: var(--text-dim); - } - - @keyframes spin { - to { transform: rotate(360deg); } - } - - .spinner { - display: inline-block; - width: 20px; - height: 20px; - border: 2px solid var(--border); - border-top-color: var(--blue); - border-radius: 50%; - animation: spin 0.8s linear infinite; - margin-right: 0.5rem; - vertical-align: middle; + margin-top: 0.25rem; } @@ -229,6 +286,12 @@

๐Ÿ”€ Ollama GPU Switcher

Toggle agents between work mode and lab experiments

+ + +

Current Mode

@@ -236,26 +299,36 @@ Loading...
- - + + +
+
+ +
+

๐Ÿฆ™ Ollama VRAM

+
+
Checking...
+
+
+
+
+
+ 0 GB + 24 GB

GPU Agents

Lab Agent (Eric)

- loading... + loading...