feat: ollama VRAM status + model loading/pinning on switch

- Show loaded models with VRAM usage bar (24GB 3090)
- On mode switch: unload old model, load+pin target model (keep_alive=-1m)
- Loading banner with spinner (polls faster at 2s while loading)
- Lab model changes also trigger model swap when in lab mode
- Manual load/unload API endpoints
This commit is contained in:
2026-02-18 19:47:22 +00:00
parent 3366d6d9ec
commit d678a4d3d4
2 changed files with 411 additions and 123 deletions

View File

@@ -18,6 +18,7 @@
--blue: #58a6ff;
--red: #f85149;
--purple: #bc8cff;
--yellow: #d29922;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
@@ -33,17 +34,8 @@
padding: 2rem 1rem;
}
h1 {
font-size: 1.5rem;
font-weight: 600;
margin-bottom: 0.5rem;
}
.subtitle {
color: var(--text-dim);
font-size: 0.875rem;
margin-bottom: 2rem;
}
h1 { font-size: 1.5rem; font-weight: 600; margin-bottom: 0.5rem; }
.subtitle { color: var(--text-dim); font-size: 0.875rem; margin-bottom: 2rem; }
.card {
background: var(--surface);
@@ -51,7 +43,7 @@
border-radius: 12px;
padding: 1.5rem;
width: 100%;
max-width: 480px;
max-width: 520px;
margin-bottom: 1rem;
}
@@ -73,8 +65,7 @@
}
.mode-dot {
width: 12px;
height: 12px;
width: 12px; height: 12px;
border-radius: 50%;
flex-shrink: 0;
}
@@ -83,15 +74,11 @@
.mode-dot.lab { background: var(--orange); box-shadow: 0 0 8px var(--orange); }
.mode-dot.mixed { background: var(--purple); box-shadow: 0 0 8px var(--purple); }
.mode-label {
font-size: 1.25rem;
font-weight: 600;
}
.mode-label { font-size: 1.25rem; font-weight: 600; }
/* Toggle switch */
.toggle-container {
display: flex;
gap: 0;
border-radius: 8px;
overflow: hidden;
border: 1px solid var(--border);
@@ -110,26 +97,12 @@
}
.toggle-btn:hover { background: rgba(255,255,255,0.05); }
.toggle-btn.active-work {
background: var(--green-dim);
color: white;
}
.toggle-btn.active-lab {
background: var(--orange-dim);
color: white;
}
.toggle-btn:disabled {
opacity: 0.5;
cursor: wait;
}
.toggle-btn.active-work { background: var(--green-dim); color: white; }
.toggle-btn.active-lab { background: var(--orange-dim); color: white; }
.toggle-btn:disabled { opacity: 0.5; cursor: wait; }
/* Agent list */
.agent-list {
list-style: none;
}
.agent-list { list-style: none; }
.agent-item {
display: flex;
@@ -138,21 +111,110 @@
padding: 0.5rem 0;
border-bottom: 1px solid var(--border);
}
.agent-item:last-child { border-bottom: none; }
.agent-name {
.agent-name { font-weight: 500; }
.model-tag {
font-size: 0.8rem;
font-family: 'SF Mono', SFMono-Regular, Consolas, monospace;
padding: 2px 8px;
border-radius: 4px;
background: rgba(255,255,255,0.05);
}
.model-tag.ollama { color: var(--green); border: 1px solid rgba(63,185,80,0.3); }
.model-tag.groq { color: var(--blue); border: 1px solid rgba(88,166,255,0.3); }
/* Ollama status */
.ollama-card { border-color: var(--green-dim); }
.ollama-model {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.75rem;
background: rgba(255,255,255,0.03);
border-radius: 8px;
margin-bottom: 0.5rem;
}
.ollama-model-info { display: flex; flex-direction: column; gap: 0.25rem; }
.ollama-model-name {
font-weight: 600;
font-family: 'SF Mono', SFMono-Regular, Consolas, monospace;
font-size: 0.95rem;
}
.ollama-model-meta {
font-size: 0.75rem;
color: var(--text-dim);
}
.ollama-model-size {
font-size: 0.85rem;
font-weight: 600;
color: var(--blue);
}
.ollama-empty {
text-align: center;
padding: 1rem;
color: var(--text-dim);
font-style: italic;
}
/* Loading indicator */
.loading-banner {
display: flex;
align-items: center;
gap: 0.75rem;
padding: 0.75rem 1rem;
background: rgba(210,153,34,0.1);
border: 1px solid rgba(210,153,34,0.3);
border-radius: 8px;
margin-bottom: 1rem;
}
.loading-banner.done {
background: rgba(63,185,80,0.1);
border-color: rgba(63,185,80,0.3);
}
.loading-banner.error {
background: rgba(248,81,73,0.1);
border-color: rgba(248,81,73,0.3);
}
.loading-text {
font-size: 0.85rem;
font-weight: 500;
}
.agent-model {
font-size: 0.8rem;
color: var(--text-dim);
font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
.loading-banner .loading-text { color: var(--yellow); }
.loading-banner.done .loading-text { color: var(--green); }
.loading-banner.error .loading-text { color: var(--red); }
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.4; }
}
.agent-model.ollama { color: var(--green); }
.agent-model.groq { color: var(--blue); }
.pulse { animation: pulse 1.5s ease-in-out infinite; }
@keyframes spin {
to { transform: rotate(360deg); }
}
.spinner {
display: inline-block;
width: 16px; height: 16px;
border: 2px solid var(--border);
border-top-color: var(--yellow);
border-radius: 50%;
animation: spin 0.8s linear infinite;
flex-shrink: 0;
}
/* Lab model selector */
.lab-model-row {
@@ -170,10 +232,9 @@
background: var(--bg);
color: var(--text);
font-size: 0.85rem;
font-family: inherit;
}
.lab-model-row button {
.lab-model-row button, .action-btn {
padding: 0.5rem 1rem;
border-radius: 6px;
border: 1px solid var(--border);
@@ -184,11 +245,8 @@
transition: all 0.2s;
}
.lab-model-row button:hover {
background: rgba(255,255,255,0.1);
}
.lab-model-row button:hover, .action-btn:hover { background: rgba(255,255,255,0.1); }
/* Status bar */
.status-bar {
text-align: center;
font-size: 0.8rem;
@@ -196,31 +254,30 @@
margin-top: 1rem;
min-height: 1.2em;
}
.status-bar.error { color: var(--red); }
.status-bar.success { color: var(--green); }
/* Loading */
.loading {
text-align: center;
padding: 2rem;
.vram-bar-container {
margin-top: 0.75rem;
background: rgba(255,255,255,0.05);
border-radius: 4px;
height: 8px;
overflow: hidden;
}
.vram-bar {
height: 100%;
border-radius: 4px;
transition: width 0.5s ease;
background: var(--green);
}
.vram-label {
display: flex;
justify-content: space-between;
font-size: 0.7rem;
color: var(--text-dim);
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.spinner {
display: inline-block;
width: 20px;
height: 20px;
border: 2px solid var(--border);
border-top-color: var(--blue);
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin-right: 0.5rem;
vertical-align: middle;
margin-top: 0.25rem;
}
</style>
</head>
@@ -229,6 +286,12 @@
<h1>🔀 Ollama GPU Switcher</h1>
<p class="subtitle">Toggle agents between work mode and lab experiments</p>
<!-- Loading banner -->
<div id="loading-banner" class="loading-banner" style="display:none; max-width:520px; width:100%;">
<span class="spinner" id="loading-spinner"></span>
<span class="loading-text" id="loading-text">Loading model...</span>
</div>
<div class="card">
<h2>Current Mode</h2>
<div class="mode-display">
@@ -236,26 +299,36 @@
<span id="mode-label" class="mode-label">Loading...</span>
</div>
<div class="toggle-container">
<button id="btn-work" class="toggle-btn" onclick="switchMode('work')">
🛠️ Work Mode
</button>
<button id="btn-lab" class="toggle-btn" onclick="switchMode('lab')">
🧪 Lab Mode
</button>
<button id="btn-work" class="toggle-btn" onclick="switchMode('work')">🛠️ Work Mode</button>
<button id="btn-lab" class="toggle-btn" onclick="switchMode('lab')">🧪 Lab Mode</button>
</div>
</div>
<div class="card ollama-card">
<h2>🦙 Ollama VRAM</h2>
<div id="ollama-models">
<div class="ollama-empty">Checking...</div>
</div>
<div class="vram-bar-container">
<div id="vram-bar" class="vram-bar" style="width: 0%"></div>
</div>
<div class="vram-label">
<span id="vram-used">0 GB</span>
<span>24 GB</span>
</div>
</div>
<div class="card">
<h2>GPU Agents</h2>
<ul id="agent-list" class="agent-list">
<li class="loading"><span class="spinner"></span> Loading...</li>
<li class="ollama-empty">Loading...</li>
</ul>
</div>
<div class="card">
<h2>Lab Agent (Eric)</h2>
<div id="lab-info" style="margin-bottom: 0.5rem;">
<span class="agent-model">loading...</span>
<span class="model-tag">loading...</span>
</div>
<div class="lab-model-row">
<select id="lab-model-select">
@@ -276,36 +349,39 @@
<div id="status-bar" class="status-bar"></div>
<script>
const VRAM_TOTAL_GB = 24; // RTX 3090
let currentMode = 'unknown';
let switching = false;
let pollInterval = null;
async function fetchStatus() {
try {
const r = await fetch('/api/status');
const data = await r.json();
if (!data.ok) throw new Error(data.error);
updateUI(data);
const [statusR, ollamaR] = await Promise.all([
fetch('/api/status'),
fetch('/api/ollama'),
]);
const statusData = await statusR.json();
const ollamaData = await ollamaR.json();
if (statusData.ok) updateModeUI(statusData);
updateOllamaUI(ollamaData);
} catch (e) {
showStatus('Failed to fetch status: ' + e.message, 'error');
}
}
function updateUI(data) {
function updateModeUI(data) {
currentMode = data.mode;
// Mode indicator
const dot = document.getElementById('mode-dot');
const label = document.getElementById('mode-label');
dot.className = 'mode-dot ' + data.mode;
const modeNames = {
work: '🛠️ Work Mode — Agents on qwen3',
lab: '🧪 Lab Mode — Agents on groq, GPU free',
lab: '🧪 Lab Mode — GPU free for experiments',
mixed: '⚠️ Mixed — Check agent config',
};
label.textContent = modeNames[data.mode] || data.mode;
// Toggle buttons
const btnWork = document.getElementById('btn-work');
const btnLab = document.getElementById('btn-lab');
btnWork.className = 'toggle-btn' + (data.mode === 'work' ? ' active-work' : '');
@@ -319,7 +395,7 @@ function updateUI(data) {
const shortModel = a.model.replace('ollama/', '').replace('groq/', '');
return `<li class="agent-item">
<span class="agent-name">${a.name}</span>
<span class="agent-model ${cls}">${shortModel}</span>
<span class="model-tag ${cls}">${shortModel}</span>
</li>`;
}).join('');
@@ -327,26 +403,95 @@ function updateUI(data) {
const labInfo = document.getElementById('lab-info');
const shortLab = data.lab.model.replace('ollama/', '').replace('groq/', '');
const labCls = data.lab.model.includes('ollama/') ? 'ollama' : 'groq';
labInfo.innerHTML = `Current: <span class="agent-model ${labCls}">${shortLab}</span>`;
labInfo.innerHTML = `Current: <span class="model-tag ${labCls}">${shortLab}</span>`;
// Set select to current value
const select = document.getElementById('lab-model-select');
for (let opt of select.options) {
if (opt.value === data.lab.model) {
opt.selected = true;
break;
}
if (opt.value === data.lab.model) { opt.selected = true; break; }
}
}
function updateOllamaUI(data) {
const container = document.getElementById('ollama-models');
const banner = document.getElementById('loading-banner');
const loadingText = document.getElementById('loading-text');
const spinner = document.getElementById('loading-spinner');
// Loading banner
if (data.loading && data.loading.status === 'loading') {
banner.style.display = 'flex';
banner.className = 'loading-banner';
spinner.style.display = 'inline-block';
loadingText.textContent = `Loading ${data.loading.model}...`;
// Poll faster while loading
startFastPoll();
} else if (data.loading && data.loading.status === 'done') {
banner.style.display = 'flex';
banner.className = 'loading-banner done';
spinner.style.display = 'none';
loadingText.textContent = `${data.loading.model} loaded and pinned`;
stopFastPoll();
setTimeout(() => { banner.style.display = 'none'; }, 5000);
} else if (data.loading && data.loading.status === 'error') {
banner.style.display = 'flex';
banner.className = 'loading-banner error';
spinner.style.display = 'none';
loadingText.textContent = `❌ Failed to load ${data.loading.model}`;
stopFastPoll();
setTimeout(() => { banner.style.display = 'none'; }, 8000);
} else {
banner.style.display = 'none';
stopFastPoll();
}
// Model list
if (!data.ok) {
container.innerHTML = `<div class="ollama-empty" style="color:var(--red)">⚠️ Ollama unreachable</div>`;
document.getElementById('vram-bar').style.width = '0%';
document.getElementById('vram-used').textContent = '? GB';
return;
}
if (data.models.length === 0) {
container.innerHTML = `<div class="ollama-empty">No models loaded in VRAM</div>`;
document.getElementById('vram-bar').style.width = '0%';
document.getElementById('vram-used').textContent = '0 GB';
return;
}
let totalVram = 0;
container.innerHTML = data.models.map(m => {
totalVram += m.size_vram_gb;
return `<div class="ollama-model">
<div class="ollama-model-info">
<span class="ollama-model-name">${m.name}</span>
<span class="ollama-model-meta">${m.parameter_size} · ${m.quantization} · ${m.family} · ctx ${m.context_length.toLocaleString()}</span>
</div>
<span class="ollama-model-size">${m.size_vram_gb} GB</span>
</div>`;
}).join('');
// VRAM bar
const pct = Math.min((totalVram / VRAM_TOTAL_GB) * 100, 100);
const bar = document.getElementById('vram-bar');
bar.style.width = pct + '%';
bar.style.background = pct > 85 ? 'var(--red)' : pct > 65 ? 'var(--orange)' : 'var(--green)';
document.getElementById('vram-used').textContent = totalVram.toFixed(1) + ' GB';
}
let fastPollId = null;
function startFastPoll() {
if (fastPollId) return;
fastPollId = setInterval(fetchStatus, 2000);
}
function stopFastPoll() {
if (fastPollId) { clearInterval(fastPollId); fastPollId = null; }
}
async function switchMode(mode) {
if (switching) return;
if (mode === currentMode) return;
if (switching || mode === currentMode) return;
switching = true;
const btns = document.querySelectorAll('.toggle-btn');
btns.forEach(b => b.disabled = true);
document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = true);
showStatus('Switching to ' + mode + ' mode...', '');
try {
@@ -357,28 +502,22 @@ async function switchMode(mode) {
});
const data = await r.json();
if (!data.ok) throw new Error(data.error);
showStatus('Switched to ' + mode + ' mode. Gateway restarting...', 'success');
// Wait for gateway to restart, then refresh
showStatus('Switched to ' + mode + ' mode. Loading model...', 'success');
setTimeout(async () => {
await fetchStatus();
switching = false;
btns.forEach(b => b.disabled = false);
}, 3000);
document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = false);
}, 2000);
} catch (e) {
showStatus('Switch failed: ' + e.message, 'error');
switching = false;
btns.forEach(b => b.disabled = false);
document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = false);
}
}
async function setLabModel() {
const select = document.getElementById('lab-model-select');
const model = select.value;
const model = document.getElementById('lab-model-select').value;
showStatus('Setting lab model to ' + model + '...', '');
try {
const r = await fetch('/api/lab-model', {
method: 'POST',
@@ -387,9 +526,8 @@ async function setLabModel() {
});
const data = await r.json();
if (!data.ok) throw new Error(data.error);
showStatus('Lab model updated. Gateway restarting...', 'success');
setTimeout(fetchStatus, 3000);
showStatus('Lab model updated.', 'success');
setTimeout(fetchStatus, 2000);
} catch (e) {
showStatus('Failed: ' + e.message, 'error');
}
@@ -401,10 +539,8 @@ function showStatus(msg, type) {
bar.className = 'status-bar' + (type ? ' ' + type : '');
}
// Init
fetchStatus();
// Auto-refresh every 30s
setInterval(fetchStatus, 30000);
setInterval(fetchStatus, 10000);
</script>
</body>
</html>