feat: ollama VRAM status + model loading/pinning on switch
- Show loaded models with VRAM usage bar (24GB 3090) - On mode switch: unload old model, load+pin target model (keep_alive=-1m) - Loading banner with spinner (polls faster at 2s while loading) - Lab model changes also trigger model swap when in lab mode - Manual load/unload API endpoints
This commit is contained in:
160
app.py
160
app.py
@@ -2,18 +2,21 @@
|
|||||||
"""
|
"""
|
||||||
Ollama GPU Switcher — Toggle OpenClaw agents between work mode (qwen3) and lab mode (GPU exclusive).
|
Ollama GPU Switcher — Toggle OpenClaw agents between work mode (qwen3) and lab mode (GPU exclusive).
|
||||||
No LLM involved. Reads/writes openclaw.json directly, then signals the gateway to restart.
|
No LLM involved. Reads/writes openclaw.json directly, then signals the gateway to restart.
|
||||||
|
Also manages ollama model loading/pinning via the ollama API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
import copy
|
import threading
|
||||||
from flask import Flask, jsonify, request, send_from_directory
|
from flask import Flask, jsonify, request, send_from_directory
|
||||||
|
import requests as http_requests
|
||||||
|
|
||||||
app = Flask(__name__, static_folder="static")
|
app = Flask(__name__, static_folder="static")
|
||||||
|
|
||||||
CONFIG_PATH = os.environ.get("OPENCLAW_CONFIG", os.path.expanduser("~/.openclaw/openclaw.json"))
|
CONFIG_PATH = os.environ.get("OPENCLAW_CONFIG", os.path.expanduser("~/.openclaw/openclaw.json"))
|
||||||
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://ollama.glenwood.schapira.nyc:11434")
|
||||||
|
|
||||||
# Agents that use ollama and compete for GPU
|
# Agents that use ollama and compete for GPU
|
||||||
OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"]
|
OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"]
|
||||||
@@ -21,6 +24,10 @@ OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"]
|
|||||||
WORK_PRIMARY = "ollama/qwen3-128k:14b"
|
WORK_PRIMARY = "ollama/qwen3-128k:14b"
|
||||||
LAB_PRIMARY = "groq/llama-3.3-70b-versatile"
|
LAB_PRIMARY = "groq/llama-3.3-70b-versatile"
|
||||||
|
|
||||||
|
# Model loading state (tracked in-process)
|
||||||
|
_loading_state = {"model": None, "status": "idle"} # idle | loading | done | error
|
||||||
|
_loading_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def read_config():
|
def read_config():
|
||||||
with open(CONFIG_PATH, "r") as f:
|
with open(CONFIG_PATH, "r") as f:
|
||||||
@@ -39,7 +46,6 @@ def restart_gateway():
|
|||||||
subprocess.run(["openclaw", "gateway", "restart"], timeout=10, capture_output=True)
|
subprocess.run(["openclaw", "gateway", "restart"], timeout=10, capture_output=True)
|
||||||
return True
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
# Fallback: try SIGUSR1 to the gateway process
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(["pgrep", "-f", "openclaw.*gateway"], capture_output=True, text=True)
|
result = subprocess.run(["pgrep", "-f", "openclaw.*gateway"], capture_output=True, text=True)
|
||||||
if result.stdout.strip():
|
if result.stdout.strip():
|
||||||
@@ -77,6 +83,82 @@ def detect_mode(config):
|
|||||||
return "mixed"
|
return "mixed"
|
||||||
|
|
||||||
|
|
||||||
|
def ollama_ps():
|
||||||
|
"""Get currently loaded models from ollama."""
|
||||||
|
try:
|
||||||
|
r = http_requests.get(f"{OLLAMA_URL}/api/ps", timeout=5)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()
|
||||||
|
models = []
|
||||||
|
for m in data.get("models", []):
|
||||||
|
size_gb = m.get("size_vram", 0) / (1024**3)
|
||||||
|
models.append({
|
||||||
|
"name": m.get("name", "unknown"),
|
||||||
|
"size_vram_gb": round(size_gb, 1),
|
||||||
|
"parameter_size": m.get("details", {}).get("parameter_size", ""),
|
||||||
|
"quantization": m.get("details", {}).get("quantization_level", ""),
|
||||||
|
"family": m.get("details", {}).get("family", ""),
|
||||||
|
"context_length": m.get("context_length", 0),
|
||||||
|
"expires_at": m.get("expires_at", ""),
|
||||||
|
})
|
||||||
|
return {"ok": True, "models": models}
|
||||||
|
except Exception as e:
|
||||||
|
return {"ok": False, "models": [], "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def ollama_load_model(model_name, keep_alive="-1m"):
|
||||||
|
"""Load a model into VRAM and pin it. keep_alive=-1m means forever."""
|
||||||
|
global _loading_state
|
||||||
|
with _loading_lock:
|
||||||
|
_loading_state = {"model": model_name, "status": "loading"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use /api/generate with empty prompt to load & pin the model
|
||||||
|
r = http_requests.post(
|
||||||
|
f"{OLLAMA_URL}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": model_name,
|
||||||
|
"prompt": "",
|
||||||
|
"keep_alive": keep_alive,
|
||||||
|
},
|
||||||
|
timeout=300, # models can take a while to load
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
with _loading_lock:
|
||||||
|
_loading_state = {"model": model_name, "status": "done"}
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
with _loading_lock:
|
||||||
|
_loading_state = {"model": model_name, "status": "error", "error": str(e)}
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def ollama_unload_model(model_name):
|
||||||
|
"""Unload a model from VRAM."""
|
||||||
|
try:
|
||||||
|
r = http_requests.post(
|
||||||
|
f"{OLLAMA_URL}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": model_name,
|
||||||
|
"prompt": "",
|
||||||
|
"keep_alive": "0",
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_model_async(model_name):
|
||||||
|
"""Load model in background thread."""
|
||||||
|
t = threading.Thread(target=ollama_load_model, args=(model_name,), daemon=True)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
|
||||||
|
# --- Routes ---
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def index():
|
def index():
|
||||||
return send_from_directory("static", "index.html")
|
return send_from_directory("static", "index.html")
|
||||||
@@ -104,7 +186,6 @@ def status():
|
|||||||
"model": lab.get("model", {}).get("primary", "unknown") if lab else "unknown",
|
"model": lab.get("model", {}).get("primary", "unknown") if lab else "unknown",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Subagents default
|
|
||||||
subagents_primary = (
|
subagents_primary = (
|
||||||
config.get("agents", {})
|
config.get("agents", {})
|
||||||
.get("defaults", {})
|
.get("defaults", {})
|
||||||
@@ -124,6 +205,16 @@ def status():
|
|||||||
return jsonify({"ok": False, "error": str(e)}), 500
|
return jsonify({"ok": False, "error": str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ollama")
|
||||||
|
def ollama_status():
|
||||||
|
"""Get ollama loaded models + loading state."""
|
||||||
|
ps = ollama_ps()
|
||||||
|
with _loading_lock:
|
||||||
|
loading = dict(_loading_state)
|
||||||
|
ps["loading"] = loading
|
||||||
|
return jsonify(ps)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/switch", methods=["POST"])
|
@app.route("/api/switch", methods=["POST"])
|
||||||
def switch():
|
def switch():
|
||||||
try:
|
try:
|
||||||
@@ -132,13 +223,23 @@ def switch():
|
|||||||
|
|
||||||
if target_mode == "lab":
|
if target_mode == "lab":
|
||||||
new_primary = LAB_PRIMARY
|
new_primary = LAB_PRIMARY
|
||||||
|
target_ollama_model = None # lab model is managed separately
|
||||||
elif target_mode == "work":
|
elif target_mode == "work":
|
||||||
new_primary = WORK_PRIMARY
|
new_primary = WORK_PRIMARY
|
||||||
|
target_ollama_model = "qwen3-128k:14b"
|
||||||
else:
|
else:
|
||||||
return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400
|
return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400
|
||||||
|
|
||||||
config = read_config()
|
config = read_config()
|
||||||
|
|
||||||
|
# Determine which ollama model to load based on mode
|
||||||
|
if target_mode == "lab":
|
||||||
|
lab = find_agent(config, "lab")
|
||||||
|
if lab:
|
||||||
|
lab_model = lab.get("model", {}).get("primary", "")
|
||||||
|
if "ollama/" in lab_model:
|
||||||
|
target_ollama_model = lab_model.replace("ollama/", "")
|
||||||
|
|
||||||
# Patch each agent's primary model
|
# Patch each agent's primary model
|
||||||
for agent_id in OLLAMA_AGENTS:
|
for agent_id in OLLAMA_AGENTS:
|
||||||
agent = find_agent(config, agent_id)
|
agent = find_agent(config, agent_id)
|
||||||
@@ -154,10 +255,21 @@ def switch():
|
|||||||
write_config(config)
|
write_config(config)
|
||||||
restarted = restart_gateway()
|
restarted = restart_gateway()
|
||||||
|
|
||||||
|
# Unload current models and load the target model
|
||||||
|
if target_ollama_model:
|
||||||
|
# First unload anything currently loaded
|
||||||
|
ps = ollama_ps()
|
||||||
|
for m in ps.get("models", []):
|
||||||
|
if m["name"] != target_ollama_model:
|
||||||
|
ollama_unload_model(m["name"])
|
||||||
|
# Load and pin the target model async
|
||||||
|
load_model_async(target_ollama_model)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"ok": True,
|
"ok": True,
|
||||||
"mode": target_mode,
|
"mode": target_mode,
|
||||||
"restarted": restarted,
|
"restarted": restarted,
|
||||||
|
"loading_model": target_ollama_model,
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"ok": False, "error": str(e)}), 500
|
return jsonify({"ok": False, "error": str(e)}), 500
|
||||||
@@ -183,13 +295,53 @@ def set_lab_model():
|
|||||||
write_config(config)
|
write_config(config)
|
||||||
restarted = restart_gateway()
|
restarted = restart_gateway()
|
||||||
|
|
||||||
return jsonify({"ok": True, "model": model, "restarted": restarted})
|
# If currently in lab mode, load the new model
|
||||||
|
mode = detect_mode(config)
|
||||||
|
ollama_model_name = None
|
||||||
|
if mode == "lab" and "ollama/" in model:
|
||||||
|
ollama_model_name = model.replace("ollama/", "")
|
||||||
|
# Unload old models first
|
||||||
|
ps = ollama_ps()
|
||||||
|
for m in ps.get("models", []):
|
||||||
|
if m["name"] != ollama_model_name:
|
||||||
|
ollama_unload_model(m["name"])
|
||||||
|
load_model_async(ollama_model_name)
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"ok": True,
|
||||||
|
"model": model,
|
||||||
|
"restarted": restarted,
|
||||||
|
"loading_model": ollama_model_name,
|
||||||
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"ok": False, "error": str(e)}), 500
|
return jsonify({"ok": False, "error": str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ollama/load", methods=["POST"])
|
||||||
|
def load_model():
|
||||||
|
"""Manually load/pin a model."""
|
||||||
|
data = request.json or {}
|
||||||
|
model = data.get("model", "")
|
||||||
|
if not model:
|
||||||
|
return jsonify({"ok": False, "error": "No model specified"}), 400
|
||||||
|
load_model_async(model)
|
||||||
|
return jsonify({"ok": True, "loading": model})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ollama/unload", methods=["POST"])
|
||||||
|
def unload_model():
|
||||||
|
"""Manually unload a model."""
|
||||||
|
data = request.json or {}
|
||||||
|
model = data.get("model", "")
|
||||||
|
if not model:
|
||||||
|
return jsonify({"ok": False, "error": "No model specified"}), 400
|
||||||
|
result = ollama_unload_model(model)
|
||||||
|
return jsonify({"ok": result, "unloaded": model})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
port = int(os.environ.get("PORT", 8585))
|
port = int(os.environ.get("PORT", 8585))
|
||||||
print(f"🔀 Ollama GPU Switcher running on http://0.0.0.0:{port}")
|
print(f"🔀 Ollama GPU Switcher running on http://0.0.0.0:{port}")
|
||||||
print(f"📄 Config: {CONFIG_PATH}")
|
print(f"📄 Config: {CONFIG_PATH}")
|
||||||
|
print(f"🦙 Ollama: {OLLAMA_URL}")
|
||||||
app.run(host="0.0.0.0", port=port, debug=False)
|
app.run(host="0.0.0.0", port=port, debug=False)
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
--blue: #58a6ff;
|
--blue: #58a6ff;
|
||||||
--red: #f85149;
|
--red: #f85149;
|
||||||
--purple: #bc8cff;
|
--purple: #bc8cff;
|
||||||
|
--yellow: #d29922;
|
||||||
}
|
}
|
||||||
|
|
||||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
@@ -33,17 +34,8 @@
|
|||||||
padding: 2rem 1rem;
|
padding: 2rem 1rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
h1 {
|
h1 { font-size: 1.5rem; font-weight: 600; margin-bottom: 0.5rem; }
|
||||||
font-size: 1.5rem;
|
.subtitle { color: var(--text-dim); font-size: 0.875rem; margin-bottom: 2rem; }
|
||||||
font-weight: 600;
|
|
||||||
margin-bottom: 0.5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.subtitle {
|
|
||||||
color: var(--text-dim);
|
|
||||||
font-size: 0.875rem;
|
|
||||||
margin-bottom: 2rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.card {
|
.card {
|
||||||
background: var(--surface);
|
background: var(--surface);
|
||||||
@@ -51,7 +43,7 @@
|
|||||||
border-radius: 12px;
|
border-radius: 12px;
|
||||||
padding: 1.5rem;
|
padding: 1.5rem;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
max-width: 480px;
|
max-width: 520px;
|
||||||
margin-bottom: 1rem;
|
margin-bottom: 1rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,8 +65,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.mode-dot {
|
.mode-dot {
|
||||||
width: 12px;
|
width: 12px; height: 12px;
|
||||||
height: 12px;
|
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
@@ -83,15 +74,11 @@
|
|||||||
.mode-dot.lab { background: var(--orange); box-shadow: 0 0 8px var(--orange); }
|
.mode-dot.lab { background: var(--orange); box-shadow: 0 0 8px var(--orange); }
|
||||||
.mode-dot.mixed { background: var(--purple); box-shadow: 0 0 8px var(--purple); }
|
.mode-dot.mixed { background: var(--purple); box-shadow: 0 0 8px var(--purple); }
|
||||||
|
|
||||||
.mode-label {
|
.mode-label { font-size: 1.25rem; font-weight: 600; }
|
||||||
font-size: 1.25rem;
|
|
||||||
font-weight: 600;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Toggle switch */
|
/* Toggle switch */
|
||||||
.toggle-container {
|
.toggle-container {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 0;
|
|
||||||
border-radius: 8px;
|
border-radius: 8px;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
border: 1px solid var(--border);
|
border: 1px solid var(--border);
|
||||||
@@ -110,26 +97,12 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.toggle-btn:hover { background: rgba(255,255,255,0.05); }
|
.toggle-btn:hover { background: rgba(255,255,255,0.05); }
|
||||||
|
.toggle-btn.active-work { background: var(--green-dim); color: white; }
|
||||||
.toggle-btn.active-work {
|
.toggle-btn.active-lab { background: var(--orange-dim); color: white; }
|
||||||
background: var(--green-dim);
|
.toggle-btn:disabled { opacity: 0.5; cursor: wait; }
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
|
|
||||||
.toggle-btn.active-lab {
|
|
||||||
background: var(--orange-dim);
|
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
|
|
||||||
.toggle-btn:disabled {
|
|
||||||
opacity: 0.5;
|
|
||||||
cursor: wait;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Agent list */
|
/* Agent list */
|
||||||
.agent-list {
|
.agent-list { list-style: none; }
|
||||||
list-style: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.agent-item {
|
.agent-item {
|
||||||
display: flex;
|
display: flex;
|
||||||
@@ -138,21 +111,110 @@
|
|||||||
padding: 0.5rem 0;
|
padding: 0.5rem 0;
|
||||||
border-bottom: 1px solid var(--border);
|
border-bottom: 1px solid var(--border);
|
||||||
}
|
}
|
||||||
|
|
||||||
.agent-item:last-child { border-bottom: none; }
|
.agent-item:last-child { border-bottom: none; }
|
||||||
|
|
||||||
.agent-name {
|
.agent-name { font-weight: 500; }
|
||||||
|
|
||||||
|
.model-tag {
|
||||||
|
font-size: 0.8rem;
|
||||||
|
font-family: 'SF Mono', SFMono-Regular, Consolas, monospace;
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 4px;
|
||||||
|
background: rgba(255,255,255,0.05);
|
||||||
|
}
|
||||||
|
.model-tag.ollama { color: var(--green); border: 1px solid rgba(63,185,80,0.3); }
|
||||||
|
.model-tag.groq { color: var(--blue); border: 1px solid rgba(88,166,255,0.3); }
|
||||||
|
|
||||||
|
/* Ollama status */
|
||||||
|
.ollama-card { border-color: var(--green-dim); }
|
||||||
|
|
||||||
|
.ollama-model {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
padding: 0.75rem;
|
||||||
|
background: rgba(255,255,255,0.03);
|
||||||
|
border-radius: 8px;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ollama-model-info { display: flex; flex-direction: column; gap: 0.25rem; }
|
||||||
|
|
||||||
|
.ollama-model-name {
|
||||||
|
font-weight: 600;
|
||||||
|
font-family: 'SF Mono', SFMono-Regular, Consolas, monospace;
|
||||||
|
font-size: 0.95rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ollama-model-meta {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
.ollama-model-size {
|
||||||
|
font-size: 0.85rem;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--blue);
|
||||||
|
}
|
||||||
|
|
||||||
|
.ollama-empty {
|
||||||
|
text-align: center;
|
||||||
|
padding: 1rem;
|
||||||
|
color: var(--text-dim);
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Loading indicator */
|
||||||
|
.loading-banner {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
padding: 0.75rem 1rem;
|
||||||
|
background: rgba(210,153,34,0.1);
|
||||||
|
border: 1px solid rgba(210,153,34,0.3);
|
||||||
|
border-radius: 8px;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.loading-banner.done {
|
||||||
|
background: rgba(63,185,80,0.1);
|
||||||
|
border-color: rgba(63,185,80,0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.loading-banner.error {
|
||||||
|
background: rgba(248,81,73,0.1);
|
||||||
|
border-color: rgba(248,81,73,0.3);
|
||||||
|
}
|
||||||
|
|
||||||
|
.loading-text {
|
||||||
|
font-size: 0.85rem;
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
}
|
}
|
||||||
|
|
||||||
.agent-model {
|
.loading-banner .loading-text { color: var(--yellow); }
|
||||||
font-size: 0.8rem;
|
.loading-banner.done .loading-text { color: var(--green); }
|
||||||
color: var(--text-dim);
|
.loading-banner.error .loading-text { color: var(--red); }
|
||||||
font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
|
|
||||||
|
@keyframes pulse {
|
||||||
|
0%, 100% { opacity: 1; }
|
||||||
|
50% { opacity: 0.4; }
|
||||||
}
|
}
|
||||||
|
|
||||||
.agent-model.ollama { color: var(--green); }
|
.pulse { animation: pulse 1.5s ease-in-out infinite; }
|
||||||
.agent-model.groq { color: var(--blue); }
|
|
||||||
|
@keyframes spin {
|
||||||
|
to { transform: rotate(360deg); }
|
||||||
|
}
|
||||||
|
|
||||||
|
.spinner {
|
||||||
|
display: inline-block;
|
||||||
|
width: 16px; height: 16px;
|
||||||
|
border: 2px solid var(--border);
|
||||||
|
border-top-color: var(--yellow);
|
||||||
|
border-radius: 50%;
|
||||||
|
animation: spin 0.8s linear infinite;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Lab model selector */
|
/* Lab model selector */
|
||||||
.lab-model-row {
|
.lab-model-row {
|
||||||
@@ -170,10 +232,9 @@
|
|||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
font-family: inherit;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.lab-model-row button {
|
.lab-model-row button, .action-btn {
|
||||||
padding: 0.5rem 1rem;
|
padding: 0.5rem 1rem;
|
||||||
border-radius: 6px;
|
border-radius: 6px;
|
||||||
border: 1px solid var(--border);
|
border: 1px solid var(--border);
|
||||||
@@ -184,11 +245,8 @@
|
|||||||
transition: all 0.2s;
|
transition: all 0.2s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.lab-model-row button:hover {
|
.lab-model-row button:hover, .action-btn:hover { background: rgba(255,255,255,0.1); }
|
||||||
background: rgba(255,255,255,0.1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Status bar */
|
|
||||||
.status-bar {
|
.status-bar {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
font-size: 0.8rem;
|
font-size: 0.8rem;
|
||||||
@@ -196,31 +254,30 @@
|
|||||||
margin-top: 1rem;
|
margin-top: 1rem;
|
||||||
min-height: 1.2em;
|
min-height: 1.2em;
|
||||||
}
|
}
|
||||||
|
|
||||||
.status-bar.error { color: var(--red); }
|
.status-bar.error { color: var(--red); }
|
||||||
.status-bar.success { color: var(--green); }
|
.status-bar.success { color: var(--green); }
|
||||||
|
|
||||||
/* Loading */
|
.vram-bar-container {
|
||||||
.loading {
|
margin-top: 0.75rem;
|
||||||
text-align: center;
|
background: rgba(255,255,255,0.05);
|
||||||
padding: 2rem;
|
border-radius: 4px;
|
||||||
|
height: 8px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.vram-bar {
|
||||||
|
height: 100%;
|
||||||
|
border-radius: 4px;
|
||||||
|
transition: width 0.5s ease;
|
||||||
|
background: var(--green);
|
||||||
|
}
|
||||||
|
|
||||||
|
.vram-label {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
font-size: 0.7rem;
|
||||||
color: var(--text-dim);
|
color: var(--text-dim);
|
||||||
}
|
margin-top: 0.25rem;
|
||||||
|
|
||||||
@keyframes spin {
|
|
||||||
to { transform: rotate(360deg); }
|
|
||||||
}
|
|
||||||
|
|
||||||
.spinner {
|
|
||||||
display: inline-block;
|
|
||||||
width: 20px;
|
|
||||||
height: 20px;
|
|
||||||
border: 2px solid var(--border);
|
|
||||||
border-top-color: var(--blue);
|
|
||||||
border-radius: 50%;
|
|
||||||
animation: spin 0.8s linear infinite;
|
|
||||||
margin-right: 0.5rem;
|
|
||||||
vertical-align: middle;
|
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
@@ -229,6 +286,12 @@
|
|||||||
<h1>🔀 Ollama GPU Switcher</h1>
|
<h1>🔀 Ollama GPU Switcher</h1>
|
||||||
<p class="subtitle">Toggle agents between work mode and lab experiments</p>
|
<p class="subtitle">Toggle agents between work mode and lab experiments</p>
|
||||||
|
|
||||||
|
<!-- Loading banner -->
|
||||||
|
<div id="loading-banner" class="loading-banner" style="display:none; max-width:520px; width:100%;">
|
||||||
|
<span class="spinner" id="loading-spinner"></span>
|
||||||
|
<span class="loading-text" id="loading-text">Loading model...</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2>Current Mode</h2>
|
<h2>Current Mode</h2>
|
||||||
<div class="mode-display">
|
<div class="mode-display">
|
||||||
@@ -236,26 +299,36 @@
|
|||||||
<span id="mode-label" class="mode-label">Loading...</span>
|
<span id="mode-label" class="mode-label">Loading...</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="toggle-container">
|
<div class="toggle-container">
|
||||||
<button id="btn-work" class="toggle-btn" onclick="switchMode('work')">
|
<button id="btn-work" class="toggle-btn" onclick="switchMode('work')">🛠️ Work Mode</button>
|
||||||
🛠️ Work Mode
|
<button id="btn-lab" class="toggle-btn" onclick="switchMode('lab')">🧪 Lab Mode</button>
|
||||||
</button>
|
</div>
|
||||||
<button id="btn-lab" class="toggle-btn" onclick="switchMode('lab')">
|
</div>
|
||||||
🧪 Lab Mode
|
|
||||||
</button>
|
<div class="card ollama-card">
|
||||||
|
<h2>🦙 Ollama VRAM</h2>
|
||||||
|
<div id="ollama-models">
|
||||||
|
<div class="ollama-empty">Checking...</div>
|
||||||
|
</div>
|
||||||
|
<div class="vram-bar-container">
|
||||||
|
<div id="vram-bar" class="vram-bar" style="width: 0%"></div>
|
||||||
|
</div>
|
||||||
|
<div class="vram-label">
|
||||||
|
<span id="vram-used">0 GB</span>
|
||||||
|
<span>24 GB</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2>GPU Agents</h2>
|
<h2>GPU Agents</h2>
|
||||||
<ul id="agent-list" class="agent-list">
|
<ul id="agent-list" class="agent-list">
|
||||||
<li class="loading"><span class="spinner"></span> Loading...</li>
|
<li class="ollama-empty">Loading...</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2>Lab Agent (Eric)</h2>
|
<h2>Lab Agent (Eric)</h2>
|
||||||
<div id="lab-info" style="margin-bottom: 0.5rem;">
|
<div id="lab-info" style="margin-bottom: 0.5rem;">
|
||||||
<span class="agent-model">loading...</span>
|
<span class="model-tag">loading...</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="lab-model-row">
|
<div class="lab-model-row">
|
||||||
<select id="lab-model-select">
|
<select id="lab-model-select">
|
||||||
@@ -276,36 +349,39 @@
|
|||||||
<div id="status-bar" class="status-bar"></div>
|
<div id="status-bar" class="status-bar"></div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
|
const VRAM_TOTAL_GB = 24; // RTX 3090
|
||||||
let currentMode = 'unknown';
|
let currentMode = 'unknown';
|
||||||
let switching = false;
|
let switching = false;
|
||||||
|
let pollInterval = null;
|
||||||
|
|
||||||
async function fetchStatus() {
|
async function fetchStatus() {
|
||||||
try {
|
try {
|
||||||
const r = await fetch('/api/status');
|
const [statusR, ollamaR] = await Promise.all([
|
||||||
const data = await r.json();
|
fetch('/api/status'),
|
||||||
if (!data.ok) throw new Error(data.error);
|
fetch('/api/ollama'),
|
||||||
updateUI(data);
|
]);
|
||||||
|
const statusData = await statusR.json();
|
||||||
|
const ollamaData = await ollamaR.json();
|
||||||
|
if (statusData.ok) updateModeUI(statusData);
|
||||||
|
updateOllamaUI(ollamaData);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
showStatus('Failed to fetch status: ' + e.message, 'error');
|
showStatus('Failed to fetch status: ' + e.message, 'error');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function updateUI(data) {
|
function updateModeUI(data) {
|
||||||
currentMode = data.mode;
|
currentMode = data.mode;
|
||||||
|
|
||||||
// Mode indicator
|
|
||||||
const dot = document.getElementById('mode-dot');
|
const dot = document.getElementById('mode-dot');
|
||||||
const label = document.getElementById('mode-label');
|
const label = document.getElementById('mode-label');
|
||||||
dot.className = 'mode-dot ' + data.mode;
|
dot.className = 'mode-dot ' + data.mode;
|
||||||
|
|
||||||
const modeNames = {
|
const modeNames = {
|
||||||
work: '🛠️ Work Mode — Agents on qwen3',
|
work: '🛠️ Work Mode — Agents on qwen3',
|
||||||
lab: '🧪 Lab Mode — Agents on groq, GPU free',
|
lab: '🧪 Lab Mode — GPU free for experiments',
|
||||||
mixed: '⚠️ Mixed — Check agent config',
|
mixed: '⚠️ Mixed — Check agent config',
|
||||||
};
|
};
|
||||||
label.textContent = modeNames[data.mode] || data.mode;
|
label.textContent = modeNames[data.mode] || data.mode;
|
||||||
|
|
||||||
// Toggle buttons
|
|
||||||
const btnWork = document.getElementById('btn-work');
|
const btnWork = document.getElementById('btn-work');
|
||||||
const btnLab = document.getElementById('btn-lab');
|
const btnLab = document.getElementById('btn-lab');
|
||||||
btnWork.className = 'toggle-btn' + (data.mode === 'work' ? ' active-work' : '');
|
btnWork.className = 'toggle-btn' + (data.mode === 'work' ? ' active-work' : '');
|
||||||
@@ -319,7 +395,7 @@ function updateUI(data) {
|
|||||||
const shortModel = a.model.replace('ollama/', '').replace('groq/', '');
|
const shortModel = a.model.replace('ollama/', '').replace('groq/', '');
|
||||||
return `<li class="agent-item">
|
return `<li class="agent-item">
|
||||||
<span class="agent-name">${a.name}</span>
|
<span class="agent-name">${a.name}</span>
|
||||||
<span class="agent-model ${cls}">${shortModel}</span>
|
<span class="model-tag ${cls}">${shortModel}</span>
|
||||||
</li>`;
|
</li>`;
|
||||||
}).join('');
|
}).join('');
|
||||||
|
|
||||||
@@ -327,26 +403,95 @@ function updateUI(data) {
|
|||||||
const labInfo = document.getElementById('lab-info');
|
const labInfo = document.getElementById('lab-info');
|
||||||
const shortLab = data.lab.model.replace('ollama/', '').replace('groq/', '');
|
const shortLab = data.lab.model.replace('ollama/', '').replace('groq/', '');
|
||||||
const labCls = data.lab.model.includes('ollama/') ? 'ollama' : 'groq';
|
const labCls = data.lab.model.includes('ollama/') ? 'ollama' : 'groq';
|
||||||
labInfo.innerHTML = `Current: <span class="agent-model ${labCls}">${shortLab}</span>`;
|
labInfo.innerHTML = `Current: <span class="model-tag ${labCls}">${shortLab}</span>`;
|
||||||
|
|
||||||
// Set select to current value
|
|
||||||
const select = document.getElementById('lab-model-select');
|
const select = document.getElementById('lab-model-select');
|
||||||
for (let opt of select.options) {
|
for (let opt of select.options) {
|
||||||
if (opt.value === data.lab.model) {
|
if (opt.value === data.lab.model) { opt.selected = true; break; }
|
||||||
opt.selected = true;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function updateOllamaUI(data) {
|
||||||
|
const container = document.getElementById('ollama-models');
|
||||||
|
const banner = document.getElementById('loading-banner');
|
||||||
|
const loadingText = document.getElementById('loading-text');
|
||||||
|
const spinner = document.getElementById('loading-spinner');
|
||||||
|
|
||||||
|
// Loading banner
|
||||||
|
if (data.loading && data.loading.status === 'loading') {
|
||||||
|
banner.style.display = 'flex';
|
||||||
|
banner.className = 'loading-banner';
|
||||||
|
spinner.style.display = 'inline-block';
|
||||||
|
loadingText.textContent = `Loading ${data.loading.model}...`;
|
||||||
|
// Poll faster while loading
|
||||||
|
startFastPoll();
|
||||||
|
} else if (data.loading && data.loading.status === 'done') {
|
||||||
|
banner.style.display = 'flex';
|
||||||
|
banner.className = 'loading-banner done';
|
||||||
|
spinner.style.display = 'none';
|
||||||
|
loadingText.textContent = `✅ ${data.loading.model} loaded and pinned`;
|
||||||
|
stopFastPoll();
|
||||||
|
setTimeout(() => { banner.style.display = 'none'; }, 5000);
|
||||||
|
} else if (data.loading && data.loading.status === 'error') {
|
||||||
|
banner.style.display = 'flex';
|
||||||
|
banner.className = 'loading-banner error';
|
||||||
|
spinner.style.display = 'none';
|
||||||
|
loadingText.textContent = `❌ Failed to load ${data.loading.model}`;
|
||||||
|
stopFastPoll();
|
||||||
|
setTimeout(() => { banner.style.display = 'none'; }, 8000);
|
||||||
|
} else {
|
||||||
|
banner.style.display = 'none';
|
||||||
|
stopFastPoll();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Model list
|
||||||
|
if (!data.ok) {
|
||||||
|
container.innerHTML = `<div class="ollama-empty" style="color:var(--red)">⚠️ Ollama unreachable</div>`;
|
||||||
|
document.getElementById('vram-bar').style.width = '0%';
|
||||||
|
document.getElementById('vram-used').textContent = '? GB';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.models.length === 0) {
|
||||||
|
container.innerHTML = `<div class="ollama-empty">No models loaded in VRAM</div>`;
|
||||||
|
document.getElementById('vram-bar').style.width = '0%';
|
||||||
|
document.getElementById('vram-used').textContent = '0 GB';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let totalVram = 0;
|
||||||
|
container.innerHTML = data.models.map(m => {
|
||||||
|
totalVram += m.size_vram_gb;
|
||||||
|
return `<div class="ollama-model">
|
||||||
|
<div class="ollama-model-info">
|
||||||
|
<span class="ollama-model-name">${m.name}</span>
|
||||||
|
<span class="ollama-model-meta">${m.parameter_size} · ${m.quantization} · ${m.family} · ctx ${m.context_length.toLocaleString()}</span>
|
||||||
|
</div>
|
||||||
|
<span class="ollama-model-size">${m.size_vram_gb} GB</span>
|
||||||
|
</div>`;
|
||||||
|
}).join('');
|
||||||
|
|
||||||
|
// VRAM bar
|
||||||
|
const pct = Math.min((totalVram / VRAM_TOTAL_GB) * 100, 100);
|
||||||
|
const bar = document.getElementById('vram-bar');
|
||||||
|
bar.style.width = pct + '%';
|
||||||
|
bar.style.background = pct > 85 ? 'var(--red)' : pct > 65 ? 'var(--orange)' : 'var(--green)';
|
||||||
|
document.getElementById('vram-used').textContent = totalVram.toFixed(1) + ' GB';
|
||||||
|
}
|
||||||
|
|
||||||
|
let fastPollId = null;
|
||||||
|
function startFastPoll() {
|
||||||
|
if (fastPollId) return;
|
||||||
|
fastPollId = setInterval(fetchStatus, 2000);
|
||||||
|
}
|
||||||
|
function stopFastPoll() {
|
||||||
|
if (fastPollId) { clearInterval(fastPollId); fastPollId = null; }
|
||||||
}
|
}
|
||||||
|
|
||||||
async function switchMode(mode) {
|
async function switchMode(mode) {
|
||||||
if (switching) return;
|
if (switching || mode === currentMode) return;
|
||||||
if (mode === currentMode) return;
|
|
||||||
|
|
||||||
switching = true;
|
switching = true;
|
||||||
const btns = document.querySelectorAll('.toggle-btn');
|
document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = true);
|
||||||
btns.forEach(b => b.disabled = true);
|
|
||||||
|
|
||||||
showStatus('Switching to ' + mode + ' mode...', '');
|
showStatus('Switching to ' + mode + ' mode...', '');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -357,28 +502,22 @@ async function switchMode(mode) {
|
|||||||
});
|
});
|
||||||
const data = await r.json();
|
const data = await r.json();
|
||||||
if (!data.ok) throw new Error(data.error);
|
if (!data.ok) throw new Error(data.error);
|
||||||
|
showStatus('Switched to ' + mode + ' mode. Loading model...', 'success');
|
||||||
showStatus('Switched to ' + mode + ' mode. Gateway restarting...', 'success');
|
|
||||||
|
|
||||||
// Wait for gateway to restart, then refresh
|
|
||||||
setTimeout(async () => {
|
setTimeout(async () => {
|
||||||
await fetchStatus();
|
await fetchStatus();
|
||||||
switching = false;
|
switching = false;
|
||||||
btns.forEach(b => b.disabled = false);
|
document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = false);
|
||||||
}, 3000);
|
}, 2000);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
showStatus('Switch failed: ' + e.message, 'error');
|
showStatus('Switch failed: ' + e.message, 'error');
|
||||||
switching = false;
|
switching = false;
|
||||||
btns.forEach(b => b.disabled = false);
|
document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function setLabModel() {
|
async function setLabModel() {
|
||||||
const select = document.getElementById('lab-model-select');
|
const model = document.getElementById('lab-model-select').value;
|
||||||
const model = select.value;
|
|
||||||
|
|
||||||
showStatus('Setting lab model to ' + model + '...', '');
|
showStatus('Setting lab model to ' + model + '...', '');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const r = await fetch('/api/lab-model', {
|
const r = await fetch('/api/lab-model', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@@ -387,9 +526,8 @@ async function setLabModel() {
|
|||||||
});
|
});
|
||||||
const data = await r.json();
|
const data = await r.json();
|
||||||
if (!data.ok) throw new Error(data.error);
|
if (!data.ok) throw new Error(data.error);
|
||||||
|
showStatus('Lab model updated.', 'success');
|
||||||
showStatus('Lab model updated. Gateway restarting...', 'success');
|
setTimeout(fetchStatus, 2000);
|
||||||
setTimeout(fetchStatus, 3000);
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
showStatus('Failed: ' + e.message, 'error');
|
showStatus('Failed: ' + e.message, 'error');
|
||||||
}
|
}
|
||||||
@@ -401,10 +539,8 @@ function showStatus(msg, type) {
|
|||||||
bar.className = 'status-bar' + (type ? ' ' + type : '');
|
bar.className = 'status-bar' + (type ? ' ' + type : '');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init
|
|
||||||
fetchStatus();
|
fetchStatus();
|
||||||
// Auto-refresh every 30s
|
setInterval(fetchStatus, 10000);
|
||||||
setInterval(fetchStatus, 30000);
|
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
Reference in New Issue
Block a user