feat: ollama VRAM status + model loading/pinning on switch

- Show loaded models with VRAM usage bar (24GB 3090)
- On mode switch: unload old model, load+pin target model (keep_alive=-1m)
- Loading banner with spinner (polls faster at 2s while loading)
- Lab model changes also trigger model swap when in lab mode
- Manual load/unload API endpoints
This commit is contained in:
2026-02-18 19:47:22 +00:00
parent 3366d6d9ec
commit d678a4d3d4
2 changed files with 411 additions and 123 deletions

160
app.py
View File

@@ -2,18 +2,21 @@
""" """
Ollama GPU Switcher — Toggle OpenClaw agents between work mode (qwen3) and lab mode (GPU exclusive). Ollama GPU Switcher — Toggle OpenClaw agents between work mode (qwen3) and lab mode (GPU exclusive).
No LLM involved. Reads/writes openclaw.json directly, then signals the gateway to restart. No LLM involved. Reads/writes openclaw.json directly, then signals the gateway to restart.
Also manages ollama model loading/pinning via the ollama API.
""" """
import json import json
import os import os
import signal import signal
import subprocess import subprocess
import copy import threading
from flask import Flask, jsonify, request, send_from_directory from flask import Flask, jsonify, request, send_from_directory
import requests as http_requests
app = Flask(__name__, static_folder="static") app = Flask(__name__, static_folder="static")
CONFIG_PATH = os.environ.get("OPENCLAW_CONFIG", os.path.expanduser("~/.openclaw/openclaw.json")) CONFIG_PATH = os.environ.get("OPENCLAW_CONFIG", os.path.expanduser("~/.openclaw/openclaw.json"))
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://ollama.glenwood.schapira.nyc:11434")
# Agents that use ollama and compete for GPU # Agents that use ollama and compete for GPU
OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"] OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"]
@@ -21,6 +24,10 @@ OLLAMA_AGENTS = ["rex", "maddy", "coder", "research"]
WORK_PRIMARY = "ollama/qwen3-128k:14b" WORK_PRIMARY = "ollama/qwen3-128k:14b"
LAB_PRIMARY = "groq/llama-3.3-70b-versatile" LAB_PRIMARY = "groq/llama-3.3-70b-versatile"
# Model loading state (tracked in-process)
_loading_state = {"model": None, "status": "idle"} # idle | loading | done | error
_loading_lock = threading.Lock()
def read_config(): def read_config():
with open(CONFIG_PATH, "r") as f: with open(CONFIG_PATH, "r") as f:
@@ -39,7 +46,6 @@ def restart_gateway():
subprocess.run(["openclaw", "gateway", "restart"], timeout=10, capture_output=True) subprocess.run(["openclaw", "gateway", "restart"], timeout=10, capture_output=True)
return True return True
except Exception: except Exception:
# Fallback: try SIGUSR1 to the gateway process
try: try:
result = subprocess.run(["pgrep", "-f", "openclaw.*gateway"], capture_output=True, text=True) result = subprocess.run(["pgrep", "-f", "openclaw.*gateway"], capture_output=True, text=True)
if result.stdout.strip(): if result.stdout.strip():
@@ -77,6 +83,82 @@ def detect_mode(config):
return "mixed" return "mixed"
def ollama_ps():
"""Get currently loaded models from ollama."""
try:
r = http_requests.get(f"{OLLAMA_URL}/api/ps", timeout=5)
r.raise_for_status()
data = r.json()
models = []
for m in data.get("models", []):
size_gb = m.get("size_vram", 0) / (1024**3)
models.append({
"name": m.get("name", "unknown"),
"size_vram_gb": round(size_gb, 1),
"parameter_size": m.get("details", {}).get("parameter_size", ""),
"quantization": m.get("details", {}).get("quantization_level", ""),
"family": m.get("details", {}).get("family", ""),
"context_length": m.get("context_length", 0),
"expires_at": m.get("expires_at", ""),
})
return {"ok": True, "models": models}
except Exception as e:
return {"ok": False, "models": [], "error": str(e)}
def ollama_load_model(model_name, keep_alive="-1m"):
"""Load a model into VRAM and pin it. keep_alive=-1m means forever."""
global _loading_state
with _loading_lock:
_loading_state = {"model": model_name, "status": "loading"}
try:
# Use /api/generate with empty prompt to load & pin the model
r = http_requests.post(
f"{OLLAMA_URL}/api/generate",
json={
"model": model_name,
"prompt": "",
"keep_alive": keep_alive,
},
timeout=300, # models can take a while to load
)
r.raise_for_status()
with _loading_lock:
_loading_state = {"model": model_name, "status": "done"}
return True
except Exception as e:
with _loading_lock:
_loading_state = {"model": model_name, "status": "error", "error": str(e)}
return False
def ollama_unload_model(model_name):
"""Unload a model from VRAM."""
try:
r = http_requests.post(
f"{OLLAMA_URL}/api/generate",
json={
"model": model_name,
"prompt": "",
"keep_alive": "0",
},
timeout=30,
)
r.raise_for_status()
return True
except Exception:
return False
def load_model_async(model_name):
"""Load model in background thread."""
t = threading.Thread(target=ollama_load_model, args=(model_name,), daemon=True)
t.start()
# --- Routes ---
@app.route("/") @app.route("/")
def index(): def index():
return send_from_directory("static", "index.html") return send_from_directory("static", "index.html")
@@ -104,7 +186,6 @@ def status():
"model": lab.get("model", {}).get("primary", "unknown") if lab else "unknown", "model": lab.get("model", {}).get("primary", "unknown") if lab else "unknown",
} }
# Subagents default
subagents_primary = ( subagents_primary = (
config.get("agents", {}) config.get("agents", {})
.get("defaults", {}) .get("defaults", {})
@@ -124,6 +205,16 @@ def status():
return jsonify({"ok": False, "error": str(e)}), 500 return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/ollama")
def ollama_status():
"""Get ollama loaded models + loading state."""
ps = ollama_ps()
with _loading_lock:
loading = dict(_loading_state)
ps["loading"] = loading
return jsonify(ps)
@app.route("/api/switch", methods=["POST"]) @app.route("/api/switch", methods=["POST"])
def switch(): def switch():
try: try:
@@ -132,13 +223,23 @@ def switch():
if target_mode == "lab": if target_mode == "lab":
new_primary = LAB_PRIMARY new_primary = LAB_PRIMARY
target_ollama_model = None # lab model is managed separately
elif target_mode == "work": elif target_mode == "work":
new_primary = WORK_PRIMARY new_primary = WORK_PRIMARY
target_ollama_model = "qwen3-128k:14b"
else: else:
return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400 return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400
config = read_config() config = read_config()
# Determine which ollama model to load based on mode
if target_mode == "lab":
lab = find_agent(config, "lab")
if lab:
lab_model = lab.get("model", {}).get("primary", "")
if "ollama/" in lab_model:
target_ollama_model = lab_model.replace("ollama/", "")
# Patch each agent's primary model # Patch each agent's primary model
for agent_id in OLLAMA_AGENTS: for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id) agent = find_agent(config, agent_id)
@@ -154,10 +255,21 @@ def switch():
write_config(config) write_config(config)
restarted = restart_gateway() restarted = restart_gateway()
# Unload current models and load the target model
if target_ollama_model:
# First unload anything currently loaded
ps = ollama_ps()
for m in ps.get("models", []):
if m["name"] != target_ollama_model:
ollama_unload_model(m["name"])
# Load and pin the target model async
load_model_async(target_ollama_model)
return jsonify({ return jsonify({
"ok": True, "ok": True,
"mode": target_mode, "mode": target_mode,
"restarted": restarted, "restarted": restarted,
"loading_model": target_ollama_model,
}) })
except Exception as e: except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500 return jsonify({"ok": False, "error": str(e)}), 500
@@ -183,13 +295,53 @@ def set_lab_model():
write_config(config) write_config(config)
restarted = restart_gateway() restarted = restart_gateway()
return jsonify({"ok": True, "model": model, "restarted": restarted}) # If currently in lab mode, load the new model
mode = detect_mode(config)
ollama_model_name = None
if mode == "lab" and "ollama/" in model:
ollama_model_name = model.replace("ollama/", "")
# Unload old models first
ps = ollama_ps()
for m in ps.get("models", []):
if m["name"] != ollama_model_name:
ollama_unload_model(m["name"])
load_model_async(ollama_model_name)
return jsonify({
"ok": True,
"model": model,
"restarted": restarted,
"loading_model": ollama_model_name,
})
except Exception as e: except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500 return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/ollama/load", methods=["POST"])
def load_model():
"""Manually load/pin a model."""
data = request.json or {}
model = data.get("model", "")
if not model:
return jsonify({"ok": False, "error": "No model specified"}), 400
load_model_async(model)
return jsonify({"ok": True, "loading": model})
@app.route("/api/ollama/unload", methods=["POST"])
def unload_model():
"""Manually unload a model."""
data = request.json or {}
model = data.get("model", "")
if not model:
return jsonify({"ok": False, "error": "No model specified"}), 400
result = ollama_unload_model(model)
return jsonify({"ok": result, "unloaded": model})
if __name__ == "__main__": if __name__ == "__main__":
port = int(os.environ.get("PORT", 8585)) port = int(os.environ.get("PORT", 8585))
print(f"🔀 Ollama GPU Switcher running on http://0.0.0.0:{port}") print(f"🔀 Ollama GPU Switcher running on http://0.0.0.0:{port}")
print(f"📄 Config: {CONFIG_PATH}") print(f"📄 Config: {CONFIG_PATH}")
print(f"🦙 Ollama: {OLLAMA_URL}")
app.run(host="0.0.0.0", port=port, debug=False) app.run(host="0.0.0.0", port=port, debug=False)

View File

@@ -18,6 +18,7 @@
--blue: #58a6ff; --blue: #58a6ff;
--red: #f85149; --red: #f85149;
--purple: #bc8cff; --purple: #bc8cff;
--yellow: #d29922;
} }
* { box-sizing: border-box; margin: 0; padding: 0; } * { box-sizing: border-box; margin: 0; padding: 0; }
@@ -33,17 +34,8 @@
padding: 2rem 1rem; padding: 2rem 1rem;
} }
h1 { h1 { font-size: 1.5rem; font-weight: 600; margin-bottom: 0.5rem; }
font-size: 1.5rem; .subtitle { color: var(--text-dim); font-size: 0.875rem; margin-bottom: 2rem; }
font-weight: 600;
margin-bottom: 0.5rem;
}
.subtitle {
color: var(--text-dim);
font-size: 0.875rem;
margin-bottom: 2rem;
}
.card { .card {
background: var(--surface); background: var(--surface);
@@ -51,7 +43,7 @@
border-radius: 12px; border-radius: 12px;
padding: 1.5rem; padding: 1.5rem;
width: 100%; width: 100%;
max-width: 480px; max-width: 520px;
margin-bottom: 1rem; margin-bottom: 1rem;
} }
@@ -73,8 +65,7 @@
} }
.mode-dot { .mode-dot {
width: 12px; width: 12px; height: 12px;
height: 12px;
border-radius: 50%; border-radius: 50%;
flex-shrink: 0; flex-shrink: 0;
} }
@@ -83,15 +74,11 @@
.mode-dot.lab { background: var(--orange); box-shadow: 0 0 8px var(--orange); } .mode-dot.lab { background: var(--orange); box-shadow: 0 0 8px var(--orange); }
.mode-dot.mixed { background: var(--purple); box-shadow: 0 0 8px var(--purple); } .mode-dot.mixed { background: var(--purple); box-shadow: 0 0 8px var(--purple); }
.mode-label { .mode-label { font-size: 1.25rem; font-weight: 600; }
font-size: 1.25rem;
font-weight: 600;
}
/* Toggle switch */ /* Toggle switch */
.toggle-container { .toggle-container {
display: flex; display: flex;
gap: 0;
border-radius: 8px; border-radius: 8px;
overflow: hidden; overflow: hidden;
border: 1px solid var(--border); border: 1px solid var(--border);
@@ -110,26 +97,12 @@
} }
.toggle-btn:hover { background: rgba(255,255,255,0.05); } .toggle-btn:hover { background: rgba(255,255,255,0.05); }
.toggle-btn.active-work { background: var(--green-dim); color: white; }
.toggle-btn.active-work { .toggle-btn.active-lab { background: var(--orange-dim); color: white; }
background: var(--green-dim); .toggle-btn:disabled { opacity: 0.5; cursor: wait; }
color: white;
}
.toggle-btn.active-lab {
background: var(--orange-dim);
color: white;
}
.toggle-btn:disabled {
opacity: 0.5;
cursor: wait;
}
/* Agent list */ /* Agent list */
.agent-list { .agent-list { list-style: none; }
list-style: none;
}
.agent-item { .agent-item {
display: flex; display: flex;
@@ -138,21 +111,110 @@
padding: 0.5rem 0; padding: 0.5rem 0;
border-bottom: 1px solid var(--border); border-bottom: 1px solid var(--border);
} }
.agent-item:last-child { border-bottom: none; } .agent-item:last-child { border-bottom: none; }
.agent-name { .agent-name { font-weight: 500; }
.model-tag {
font-size: 0.8rem;
font-family: 'SF Mono', SFMono-Regular, Consolas, monospace;
padding: 2px 8px;
border-radius: 4px;
background: rgba(255,255,255,0.05);
}
.model-tag.ollama { color: var(--green); border: 1px solid rgba(63,185,80,0.3); }
.model-tag.groq { color: var(--blue); border: 1px solid rgba(88,166,255,0.3); }
/* Ollama status */
.ollama-card { border-color: var(--green-dim); }
.ollama-model {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.75rem;
background: rgba(255,255,255,0.03);
border-radius: 8px;
margin-bottom: 0.5rem;
}
.ollama-model-info { display: flex; flex-direction: column; gap: 0.25rem; }
.ollama-model-name {
font-weight: 600;
font-family: 'SF Mono', SFMono-Regular, Consolas, monospace;
font-size: 0.95rem;
}
.ollama-model-meta {
font-size: 0.75rem;
color: var(--text-dim);
}
.ollama-model-size {
font-size: 0.85rem;
font-weight: 600;
color: var(--blue);
}
.ollama-empty {
text-align: center;
padding: 1rem;
color: var(--text-dim);
font-style: italic;
}
/* Loading indicator */
.loading-banner {
display: flex;
align-items: center;
gap: 0.75rem;
padding: 0.75rem 1rem;
background: rgba(210,153,34,0.1);
border: 1px solid rgba(210,153,34,0.3);
border-radius: 8px;
margin-bottom: 1rem;
}
.loading-banner.done {
background: rgba(63,185,80,0.1);
border-color: rgba(63,185,80,0.3);
}
.loading-banner.error {
background: rgba(248,81,73,0.1);
border-color: rgba(248,81,73,0.3);
}
.loading-text {
font-size: 0.85rem;
font-weight: 500; font-weight: 500;
} }
.agent-model { .loading-banner .loading-text { color: var(--yellow); }
font-size: 0.8rem; .loading-banner.done .loading-text { color: var(--green); }
color: var(--text-dim); .loading-banner.error .loading-text { color: var(--red); }
font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.4; }
} }
.agent-model.ollama { color: var(--green); } .pulse { animation: pulse 1.5s ease-in-out infinite; }
.agent-model.groq { color: var(--blue); }
@keyframes spin {
to { transform: rotate(360deg); }
}
.spinner {
display: inline-block;
width: 16px; height: 16px;
border: 2px solid var(--border);
border-top-color: var(--yellow);
border-radius: 50%;
animation: spin 0.8s linear infinite;
flex-shrink: 0;
}
/* Lab model selector */ /* Lab model selector */
.lab-model-row { .lab-model-row {
@@ -170,10 +232,9 @@
background: var(--bg); background: var(--bg);
color: var(--text); color: var(--text);
font-size: 0.85rem; font-size: 0.85rem;
font-family: inherit;
} }
.lab-model-row button { .lab-model-row button, .action-btn {
padding: 0.5rem 1rem; padding: 0.5rem 1rem;
border-radius: 6px; border-radius: 6px;
border: 1px solid var(--border); border: 1px solid var(--border);
@@ -184,11 +245,8 @@
transition: all 0.2s; transition: all 0.2s;
} }
.lab-model-row button:hover { .lab-model-row button:hover, .action-btn:hover { background: rgba(255,255,255,0.1); }
background: rgba(255,255,255,0.1);
}
/* Status bar */
.status-bar { .status-bar {
text-align: center; text-align: center;
font-size: 0.8rem; font-size: 0.8rem;
@@ -196,31 +254,30 @@
margin-top: 1rem; margin-top: 1rem;
min-height: 1.2em; min-height: 1.2em;
} }
.status-bar.error { color: var(--red); } .status-bar.error { color: var(--red); }
.status-bar.success { color: var(--green); } .status-bar.success { color: var(--green); }
/* Loading */ .vram-bar-container {
.loading { margin-top: 0.75rem;
text-align: center; background: rgba(255,255,255,0.05);
padding: 2rem; border-radius: 4px;
height: 8px;
overflow: hidden;
}
.vram-bar {
height: 100%;
border-radius: 4px;
transition: width 0.5s ease;
background: var(--green);
}
.vram-label {
display: flex;
justify-content: space-between;
font-size: 0.7rem;
color: var(--text-dim); color: var(--text-dim);
} margin-top: 0.25rem;
@keyframes spin {
to { transform: rotate(360deg); }
}
.spinner {
display: inline-block;
width: 20px;
height: 20px;
border: 2px solid var(--border);
border-top-color: var(--blue);
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin-right: 0.5rem;
vertical-align: middle;
} }
</style> </style>
</head> </head>
@@ -229,6 +286,12 @@
<h1>🔀 Ollama GPU Switcher</h1> <h1>🔀 Ollama GPU Switcher</h1>
<p class="subtitle">Toggle agents between work mode and lab experiments</p> <p class="subtitle">Toggle agents between work mode and lab experiments</p>
<!-- Loading banner -->
<div id="loading-banner" class="loading-banner" style="display:none; max-width:520px; width:100%;">
<span class="spinner" id="loading-spinner"></span>
<span class="loading-text" id="loading-text">Loading model...</span>
</div>
<div class="card"> <div class="card">
<h2>Current Mode</h2> <h2>Current Mode</h2>
<div class="mode-display"> <div class="mode-display">
@@ -236,26 +299,36 @@
<span id="mode-label" class="mode-label">Loading...</span> <span id="mode-label" class="mode-label">Loading...</span>
</div> </div>
<div class="toggle-container"> <div class="toggle-container">
<button id="btn-work" class="toggle-btn" onclick="switchMode('work')"> <button id="btn-work" class="toggle-btn" onclick="switchMode('work')">🛠️ Work Mode</button>
🛠️ Work Mode <button id="btn-lab" class="toggle-btn" onclick="switchMode('lab')">🧪 Lab Mode</button>
</button> </div>
<button id="btn-lab" class="toggle-btn" onclick="switchMode('lab')"> </div>
🧪 Lab Mode
</button> <div class="card ollama-card">
<h2>🦙 Ollama VRAM</h2>
<div id="ollama-models">
<div class="ollama-empty">Checking...</div>
</div>
<div class="vram-bar-container">
<div id="vram-bar" class="vram-bar" style="width: 0%"></div>
</div>
<div class="vram-label">
<span id="vram-used">0 GB</span>
<span>24 GB</span>
</div> </div>
</div> </div>
<div class="card"> <div class="card">
<h2>GPU Agents</h2> <h2>GPU Agents</h2>
<ul id="agent-list" class="agent-list"> <ul id="agent-list" class="agent-list">
<li class="loading"><span class="spinner"></span> Loading...</li> <li class="ollama-empty">Loading...</li>
</ul> </ul>
</div> </div>
<div class="card"> <div class="card">
<h2>Lab Agent (Eric)</h2> <h2>Lab Agent (Eric)</h2>
<div id="lab-info" style="margin-bottom: 0.5rem;"> <div id="lab-info" style="margin-bottom: 0.5rem;">
<span class="agent-model">loading...</span> <span class="model-tag">loading...</span>
</div> </div>
<div class="lab-model-row"> <div class="lab-model-row">
<select id="lab-model-select"> <select id="lab-model-select">
@@ -276,36 +349,39 @@
<div id="status-bar" class="status-bar"></div> <div id="status-bar" class="status-bar"></div>
<script> <script>
const VRAM_TOTAL_GB = 24; // RTX 3090
let currentMode = 'unknown'; let currentMode = 'unknown';
let switching = false; let switching = false;
let pollInterval = null;
async function fetchStatus() { async function fetchStatus() {
try { try {
const r = await fetch('/api/status'); const [statusR, ollamaR] = await Promise.all([
const data = await r.json(); fetch('/api/status'),
if (!data.ok) throw new Error(data.error); fetch('/api/ollama'),
updateUI(data); ]);
const statusData = await statusR.json();
const ollamaData = await ollamaR.json();
if (statusData.ok) updateModeUI(statusData);
updateOllamaUI(ollamaData);
} catch (e) { } catch (e) {
showStatus('Failed to fetch status: ' + e.message, 'error'); showStatus('Failed to fetch status: ' + e.message, 'error');
} }
} }
function updateUI(data) { function updateModeUI(data) {
currentMode = data.mode; currentMode = data.mode;
// Mode indicator
const dot = document.getElementById('mode-dot'); const dot = document.getElementById('mode-dot');
const label = document.getElementById('mode-label'); const label = document.getElementById('mode-label');
dot.className = 'mode-dot ' + data.mode; dot.className = 'mode-dot ' + data.mode;
const modeNames = { const modeNames = {
work: '🛠️ Work Mode — Agents on qwen3', work: '🛠️ Work Mode — Agents on qwen3',
lab: '🧪 Lab Mode — Agents on groq, GPU free', lab: '🧪 Lab Mode — GPU free for experiments',
mixed: '⚠️ Mixed — Check agent config', mixed: '⚠️ Mixed — Check agent config',
}; };
label.textContent = modeNames[data.mode] || data.mode; label.textContent = modeNames[data.mode] || data.mode;
// Toggle buttons
const btnWork = document.getElementById('btn-work'); const btnWork = document.getElementById('btn-work');
const btnLab = document.getElementById('btn-lab'); const btnLab = document.getElementById('btn-lab');
btnWork.className = 'toggle-btn' + (data.mode === 'work' ? ' active-work' : ''); btnWork.className = 'toggle-btn' + (data.mode === 'work' ? ' active-work' : '');
@@ -319,7 +395,7 @@ function updateUI(data) {
const shortModel = a.model.replace('ollama/', '').replace('groq/', ''); const shortModel = a.model.replace('ollama/', '').replace('groq/', '');
return `<li class="agent-item"> return `<li class="agent-item">
<span class="agent-name">${a.name}</span> <span class="agent-name">${a.name}</span>
<span class="agent-model ${cls}">${shortModel}</span> <span class="model-tag ${cls}">${shortModel}</span>
</li>`; </li>`;
}).join(''); }).join('');
@@ -327,26 +403,95 @@ function updateUI(data) {
const labInfo = document.getElementById('lab-info'); const labInfo = document.getElementById('lab-info');
const shortLab = data.lab.model.replace('ollama/', '').replace('groq/', ''); const shortLab = data.lab.model.replace('ollama/', '').replace('groq/', '');
const labCls = data.lab.model.includes('ollama/') ? 'ollama' : 'groq'; const labCls = data.lab.model.includes('ollama/') ? 'ollama' : 'groq';
labInfo.innerHTML = `Current: <span class="agent-model ${labCls}">${shortLab}</span>`; labInfo.innerHTML = `Current: <span class="model-tag ${labCls}">${shortLab}</span>`;
// Set select to current value
const select = document.getElementById('lab-model-select'); const select = document.getElementById('lab-model-select');
for (let opt of select.options) { for (let opt of select.options) {
if (opt.value === data.lab.model) { if (opt.value === data.lab.model) { opt.selected = true; break; }
opt.selected = true;
break;
} }
} }
function updateOllamaUI(data) {
const container = document.getElementById('ollama-models');
const banner = document.getElementById('loading-banner');
const loadingText = document.getElementById('loading-text');
const spinner = document.getElementById('loading-spinner');
// Loading banner
if (data.loading && data.loading.status === 'loading') {
banner.style.display = 'flex';
banner.className = 'loading-banner';
spinner.style.display = 'inline-block';
loadingText.textContent = `Loading ${data.loading.model}...`;
// Poll faster while loading
startFastPoll();
} else if (data.loading && data.loading.status === 'done') {
banner.style.display = 'flex';
banner.className = 'loading-banner done';
spinner.style.display = 'none';
loadingText.textContent = `${data.loading.model} loaded and pinned`;
stopFastPoll();
setTimeout(() => { banner.style.display = 'none'; }, 5000);
} else if (data.loading && data.loading.status === 'error') {
banner.style.display = 'flex';
banner.className = 'loading-banner error';
spinner.style.display = 'none';
loadingText.textContent = `❌ Failed to load ${data.loading.model}`;
stopFastPoll();
setTimeout(() => { banner.style.display = 'none'; }, 8000);
} else {
banner.style.display = 'none';
stopFastPoll();
}
// Model list
if (!data.ok) {
container.innerHTML = `<div class="ollama-empty" style="color:var(--red)">⚠️ Ollama unreachable</div>`;
document.getElementById('vram-bar').style.width = '0%';
document.getElementById('vram-used').textContent = '? GB';
return;
}
if (data.models.length === 0) {
container.innerHTML = `<div class="ollama-empty">No models loaded in VRAM</div>`;
document.getElementById('vram-bar').style.width = '0%';
document.getElementById('vram-used').textContent = '0 GB';
return;
}
let totalVram = 0;
container.innerHTML = data.models.map(m => {
totalVram += m.size_vram_gb;
return `<div class="ollama-model">
<div class="ollama-model-info">
<span class="ollama-model-name">${m.name}</span>
<span class="ollama-model-meta">${m.parameter_size} · ${m.quantization} · ${m.family} · ctx ${m.context_length.toLocaleString()}</span>
</div>
<span class="ollama-model-size">${m.size_vram_gb} GB</span>
</div>`;
}).join('');
// VRAM bar
const pct = Math.min((totalVram / VRAM_TOTAL_GB) * 100, 100);
const bar = document.getElementById('vram-bar');
bar.style.width = pct + '%';
bar.style.background = pct > 85 ? 'var(--red)' : pct > 65 ? 'var(--orange)' : 'var(--green)';
document.getElementById('vram-used').textContent = totalVram.toFixed(1) + ' GB';
}
let fastPollId = null;
function startFastPoll() {
if (fastPollId) return;
fastPollId = setInterval(fetchStatus, 2000);
}
function stopFastPoll() {
if (fastPollId) { clearInterval(fastPollId); fastPollId = null; }
} }
async function switchMode(mode) { async function switchMode(mode) {
if (switching) return; if (switching || mode === currentMode) return;
if (mode === currentMode) return;
switching = true; switching = true;
const btns = document.querySelectorAll('.toggle-btn'); document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = true);
btns.forEach(b => b.disabled = true);
showStatus('Switching to ' + mode + ' mode...', ''); showStatus('Switching to ' + mode + ' mode...', '');
try { try {
@@ -357,28 +502,22 @@ async function switchMode(mode) {
}); });
const data = await r.json(); const data = await r.json();
if (!data.ok) throw new Error(data.error); if (!data.ok) throw new Error(data.error);
showStatus('Switched to ' + mode + ' mode. Loading model...', 'success');
showStatus('Switched to ' + mode + ' mode. Gateway restarting...', 'success');
// Wait for gateway to restart, then refresh
setTimeout(async () => { setTimeout(async () => {
await fetchStatus(); await fetchStatus();
switching = false; switching = false;
btns.forEach(b => b.disabled = false); document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = false);
}, 3000); }, 2000);
} catch (e) { } catch (e) {
showStatus('Switch failed: ' + e.message, 'error'); showStatus('Switch failed: ' + e.message, 'error');
switching = false; switching = false;
btns.forEach(b => b.disabled = false); document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = false);
} }
} }
async function setLabModel() { async function setLabModel() {
const select = document.getElementById('lab-model-select'); const model = document.getElementById('lab-model-select').value;
const model = select.value;
showStatus('Setting lab model to ' + model + '...', ''); showStatus('Setting lab model to ' + model + '...', '');
try { try {
const r = await fetch('/api/lab-model', { const r = await fetch('/api/lab-model', {
method: 'POST', method: 'POST',
@@ -387,9 +526,8 @@ async function setLabModel() {
}); });
const data = await r.json(); const data = await r.json();
if (!data.ok) throw new Error(data.error); if (!data.ok) throw new Error(data.error);
showStatus('Lab model updated.', 'success');
showStatus('Lab model updated. Gateway restarting...', 'success'); setTimeout(fetchStatus, 2000);
setTimeout(fetchStatus, 3000);
} catch (e) { } catch (e) {
showStatus('Failed: ' + e.message, 'error'); showStatus('Failed: ' + e.message, 'error');
} }
@@ -401,10 +539,8 @@ function showStatus(msg, type) {
bar.className = 'status-bar' + (type ? ' ' + type : ''); bar.className = 'status-bar' + (type ? ' ' + type : '');
} }
// Init
fetchStatus(); fetchStatus();
// Auto-refresh every 30s setInterval(fetchStatus, 10000);
setInterval(fetchStatus, 30000);
</script> </script>
</body> </body>
</html> </html>