feat: unified model selector below mode toggle

- Model dropdown below mode switch applies to active agents
- In work mode: changes model for Rex/Maddy/Coder/Research + subagents
- In lab mode: changes model for Eric
- /api/apply-model endpoint: swap model + VRAM in current mode
- /api/switch accepts optional model param
- Removed separate lab model card (consolidated into one selector)
This commit is contained in:
2026-02-18 19:51:02 +00:00
parent d678a4d3d4
commit 786a72c06d
2 changed files with 180 additions and 156 deletions

122
app.py
View File

@@ -194,12 +194,21 @@ def status():
.get("primary", "unknown")
)
# Determine active ollama model based on mode
if mode == "work":
active_ollama = agent_details[0]["model"] if agent_details else WORK_PRIMARY
elif mode == "lab":
active_ollama = lab_info["model"]
else:
active_ollama = "unknown"
return jsonify({
"ok": True,
"mode": mode,
"lab": lab_info,
"agents": agent_details,
"subagentsPrimary": subagents_primary,
"activeOllamaModel": active_ollama,
})
except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500
@@ -215,54 +224,76 @@ def ollama_status():
return jsonify(ps)
def apply_model_to_agents(config, ollama_model, mode):
"""Apply the selected ollama model to the appropriate agents based on mode.
In work mode: update work agents (rex, maddy, coder, research) + subagents default.
In lab mode: update lab agent.
"""
if mode == "work":
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
agent.setdefault("model", {})["primary"] = ollama_model
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
config["agents"]["defaults"]["subagents"]["model"]["primary"] = ollama_model
elif mode == "lab":
lab = find_agent(config, "lab")
if lab:
lab.setdefault("model", {})["primary"] = ollama_model
@app.route("/api/switch", methods=["POST"])
def switch():
try:
data = request.json or {}
target_mode = data.get("mode", "work")
selected_model = data.get("model", None)
if target_mode == "lab":
new_primary = LAB_PRIMARY
target_ollama_model = None # lab model is managed separately
elif target_mode == "work":
new_primary = WORK_PRIMARY
target_ollama_model = "qwen3-128k:14b"
else:
if target_mode not in ("work", "lab"):
return jsonify({"ok": False, "error": f"Unknown mode: {target_mode}"}), 400
config = read_config()
# Determine which ollama model to load based on mode
if target_mode == "lab":
# Move work agents to groq
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
agent.setdefault("model", {})["primary"] = LAB_PRIMARY
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
config["agents"]["defaults"]["subagents"]["model"]["primary"] = LAB_PRIMARY
# If a model was selected, set it as the lab model
if selected_model and "ollama/" in selected_model:
lab = find_agent(config, "lab")
if lab:
lab.setdefault("model", {})["primary"] = selected_model
# Determine which ollama model to load for lab
lab = find_agent(config, "lab")
if lab:
lab_model = lab.get("model", {}).get("primary", "")
if "ollama/" in lab_model:
target_ollama_model = lab_model.replace("ollama/", "")
lab_model = lab.get("model", {}).get("primary", "") if lab else ""
target_ollama_model = lab_model.replace("ollama/", "") if "ollama/" in lab_model else None
# Patch each agent's primary model
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
if "model" not in agent:
agent["model"] = {}
agent["model"]["primary"] = new_primary
# Patch subagents default
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
config["agents"]["defaults"]["subagents"]["model"]["primary"] = new_primary
elif target_mode == "work":
# Use selected model or default
work_model = selected_model if selected_model and "ollama/" in selected_model else WORK_PRIMARY
for agent_id in OLLAMA_AGENTS:
agent = find_agent(config, agent_id)
if agent:
agent.setdefault("model", {})["primary"] = work_model
config.setdefault("agents", {}).setdefault("defaults", {}).setdefault("subagents", {}).setdefault("model", {})
config["agents"]["defaults"]["subagents"]["model"]["primary"] = work_model
target_ollama_model = work_model.replace("ollama/", "")
write_config(config)
restarted = restart_gateway()
# Unload current models and load the target model
# Swap VRAM: unload old, load+pin new
if target_ollama_model:
# First unload anything currently loaded
ps = ollama_ps()
for m in ps.get("models", []):
if m["name"] != target_ollama_model:
ollama_unload_model(m["name"])
# Load and pin the target model async
load_model_async(target_ollama_model)
return jsonify({
@@ -275,43 +306,36 @@ def switch():
return jsonify({"ok": False, "error": str(e)}), 500
@app.route("/api/lab-model", methods=["POST"])
def set_lab_model():
@app.route("/api/apply-model", methods=["POST"])
def apply_model():
"""Change the ollama model for agents in the current mode and swap VRAM."""
try:
data = request.json or {}
model = data.get("model", "")
if not model:
return jsonify({"ok": False, "error": "No model specified"}), 400
if not model or "ollama/" not in model:
return jsonify({"ok": False, "error": "Must be an ollama model"}), 400
config = read_config()
lab = find_agent(config, "lab")
if not lab:
return jsonify({"ok": False, "error": "Lab agent not found"}), 404
if "model" not in lab:
lab["model"] = {}
lab["model"]["primary"] = model
mode = detect_mode(config)
apply_model_to_agents(config, model, mode)
write_config(config)
restarted = restart_gateway()
# If currently in lab mode, load the new model
mode = detect_mode(config)
ollama_model_name = None
if mode == "lab" and "ollama/" in model:
ollama_model_name = model.replace("ollama/", "")
# Unload old models first
ps = ollama_ps()
for m in ps.get("models", []):
if m["name"] != ollama_model_name:
ollama_unload_model(m["name"])
load_model_async(ollama_model_name)
# Swap VRAM
ollama_model = model.replace("ollama/", "")
ps = ollama_ps()
for m in ps.get("models", []):
if m["name"] != ollama_model:
ollama_unload_model(m["name"])
load_model_async(ollama_model)
return jsonify({
"ok": True,
"model": model,
"mode": mode,
"restarted": restarted,
"loading_model": ollama_model_name,
"loading_model": ollama_model,
})
except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500

View File

@@ -56,7 +56,6 @@
margin-bottom: 1rem;
}
/* Mode indicator */
.mode-display {
display: flex;
align-items: center;
@@ -76,7 +75,6 @@
.mode-label { font-size: 1.25rem; font-weight: 600; }
/* Toggle switch */
.toggle-container {
display: flex;
border-radius: 8px;
@@ -101,6 +99,49 @@
.toggle-btn.active-lab { background: var(--orange-dim); color: white; }
.toggle-btn:disabled { opacity: 0.5; cursor: wait; }
/* Model selector row */
.model-select-row {
display: flex;
gap: 0.5rem;
align-items: center;
margin-top: 1.25rem;
padding-top: 1.25rem;
border-top: 1px solid var(--border);
}
.model-select-row label {
font-size: 0.8rem;
font-weight: 600;
color: var(--text-dim);
white-space: nowrap;
}
.model-select-row select {
flex: 1;
padding: 0.5rem;
border-radius: 6px;
border: 1px solid var(--border);
background: var(--bg);
color: var(--text);
font-size: 0.85rem;
font-family: 'SF Mono', SFMono-Regular, Consolas, monospace;
}
.model-select-row button {
padding: 0.5rem 1rem;
border-radius: 6px;
border: 1px solid var(--border);
background: var(--surface);
color: var(--text);
cursor: pointer;
font-size: 0.85rem;
transition: all 0.2s;
white-space: nowrap;
}
.model-select-row button:hover { background: rgba(255,255,255,0.1); }
.model-select-row button:disabled { opacity: 0.5; cursor: wait; }
/* Agent list */
.agent-list { list-style: none; }
@@ -174,6 +215,8 @@
border: 1px solid rgba(210,153,34,0.3);
border-radius: 8px;
margin-bottom: 1rem;
max-width: 520px;
width: 100%;
}
.loading-banner.done {
@@ -186,25 +229,12 @@
border-color: rgba(248,81,73,0.3);
}
.loading-text {
font-size: 0.85rem;
font-weight: 500;
}
.loading-text { font-size: 0.85rem; font-weight: 500; }
.loading-banner .loading-text { color: var(--yellow); }
.loading-banner.done .loading-text { color: var(--green); }
.loading-banner.error .loading-text { color: var(--red); }
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.4; }
}
.pulse { animation: pulse 1.5s ease-in-out infinite; }
@keyframes spin {
to { transform: rotate(360deg); }
}
@keyframes spin { to { transform: rotate(360deg); } }
.spinner {
display: inline-block;
@@ -216,47 +246,6 @@
flex-shrink: 0;
}
/* Lab model selector */
.lab-model-row {
display: flex;
gap: 0.5rem;
align-items: center;
margin-top: 0.75rem;
}
.lab-model-row select {
flex: 1;
padding: 0.5rem;
border-radius: 6px;
border: 1px solid var(--border);
background: var(--bg);
color: var(--text);
font-size: 0.85rem;
}
.lab-model-row button, .action-btn {
padding: 0.5rem 1rem;
border-radius: 6px;
border: 1px solid var(--border);
background: var(--surface);
color: var(--text);
cursor: pointer;
font-size: 0.85rem;
transition: all 0.2s;
}
.lab-model-row button:hover, .action-btn:hover { background: rgba(255,255,255,0.1); }
.status-bar {
text-align: center;
font-size: 0.8rem;
color: var(--text-dim);
margin-top: 1rem;
min-height: 1.2em;
}
.status-bar.error { color: var(--red); }
.status-bar.success { color: var(--green); }
.vram-bar-container {
margin-top: 0.75rem;
background: rgba(255,255,255,0.05);
@@ -279,6 +268,16 @@
color: var(--text-dim);
margin-top: 0.25rem;
}
.status-bar {
text-align: center;
font-size: 0.8rem;
color: var(--text-dim);
margin-top: 1rem;
min-height: 1.2em;
}
.status-bar.error { color: var(--red); }
.status-bar.success { color: var(--green); }
</style>
</head>
<body>
@@ -286,8 +285,7 @@
<h1>🔀 Ollama GPU Switcher</h1>
<p class="subtitle">Toggle agents between work mode and lab experiments</p>
<!-- Loading banner -->
<div id="loading-banner" class="loading-banner" style="display:none; max-width:520px; width:100%;">
<div id="loading-banner" class="loading-banner" style="display:none;">
<span class="spinner" id="loading-spinner"></span>
<span class="loading-text" id="loading-text">Loading model...</span>
</div>
@@ -302,6 +300,20 @@
<button id="btn-work" class="toggle-btn" onclick="switchMode('work')">🛠️ Work Mode</button>
<button id="btn-lab" class="toggle-btn" onclick="switchMode('lab')">🧪 Lab Mode</button>
</div>
<div class="model-select-row">
<label>Ollama model:</label>
<select id="model-select">
<option value="ollama/qwen3-128k:14b">qwen3-128k:14b</option>
<option value="ollama/granite4:32b-a9b-h">granite4:32b-a9b-h</option>
<option value="ollama/qwen3:14b">qwen3:14b</option>
<option value="ollama/gpt-oss:20b">gpt-oss:20b</option>
<option value="ollama/gpt-oss:20b-64k">gpt-oss:20b-64k</option>
<option value="ollama/gemma3:27b">gemma3:27b</option>
<option value="ollama/gemma3:12b">gemma3:12b</option>
<option value="ollama/granite3.3:latest">granite3.3:latest</option>
</select>
<button id="btn-apply-model" onclick="applyModel()">Apply</button>
</div>
</div>
<div class="card ollama-card">
@@ -319,40 +331,19 @@
</div>
<div class="card">
<h2>GPU Agents</h2>
<h2>Agents</h2>
<ul id="agent-list" class="agent-list">
<li class="ollama-empty">Loading...</li>
</ul>
</div>
<div class="card">
<h2>Lab Agent (Eric)</h2>
<div id="lab-info" style="margin-bottom: 0.5rem;">
<span class="model-tag">loading...</span>
</div>
<div class="lab-model-row">
<select id="lab-model-select">
<option value="ollama/granite4:32b-a9b-h">granite4:32b-a9b-h</option>
<option value="ollama/qwen3-128k:14b">qwen3-128k:14b</option>
<option value="ollama/qwen3:14b">qwen3:14b</option>
<option value="ollama/gpt-oss:20b">gpt-oss:20b</option>
<option value="ollama/gpt-oss:20b-64k">gpt-oss:20b-64k</option>
<option value="ollama/gemma3:27b">gemma3:27b</option>
<option value="ollama/gemma3:12b">gemma3:12b</option>
<option value="ollama/granite3.3:latest">granite3.3:latest</option>
<option value="groq/llama-3.3-70b-versatile">groq (cloud)</option>
</select>
<button onclick="setLabModel()">Apply</button>
</div>
</div>
<div id="status-bar" class="status-bar"></div>
<script>
const VRAM_TOTAL_GB = 24; // RTX 3090
const VRAM_TOTAL_GB = 24;
let currentMode = 'unknown';
let switching = false;
let pollInterval = null;
let fastPollId = null;
async function fetchStatus() {
try {
@@ -376,7 +367,7 @@ function updateModeUI(data) {
dot.className = 'mode-dot ' + data.mode;
const modeNames = {
work: '🛠️ Work Mode — Agents on qwen3',
work: '🛠️ Work Mode — Agents on ollama',
lab: '🧪 Lab Mode — GPU free for experiments',
mixed: '⚠️ Mixed — Check agent config',
};
@@ -387,9 +378,16 @@ function updateModeUI(data) {
btnWork.className = 'toggle-btn' + (data.mode === 'work' ? ' active-work' : '');
btnLab.className = 'toggle-btn' + (data.mode === 'lab' ? ' active-lab' : '');
// Agent list
// All agents (including lab)
const list = document.getElementById('agent-list');
list.innerHTML = data.agents.map(a => {
const allAgents = [...data.agents];
allAgents.push({
id: 'lab',
name: data.lab.name + ' (Lab)',
model: data.lab.model,
});
list.innerHTML = allAgents.map(a => {
const isOllama = a.model.includes('ollama/');
const cls = isOllama ? 'ollama' : 'groq';
const shortModel = a.model.replace('ollama/', '').replace('groq/', '');
@@ -399,15 +397,12 @@ function updateModeUI(data) {
</li>`;
}).join('');
// Lab info
const labInfo = document.getElementById('lab-info');
const shortLab = data.lab.model.replace('ollama/', '').replace('groq/', '');
const labCls = data.lab.model.includes('ollama/') ? 'ollama' : 'groq';
labInfo.innerHTML = `Current: <span class="model-tag ${labCls}">${shortLab}</span>`;
const select = document.getElementById('lab-model-select');
// Update model selector to reflect current active ollama model
const select = document.getElementById('model-select');
// In work mode, show the work model; in lab mode, show the lab model
const activeModel = data.mode === 'lab' ? data.lab.model : data.activeOllamaModel || 'ollama/qwen3-128k:14b';
for (let opt of select.options) {
if (opt.value === data.lab.model) { opt.selected = true; break; }
if (opt.value === activeModel) { opt.selected = true; break; }
}
}
@@ -417,13 +412,11 @@ function updateOllamaUI(data) {
const loadingText = document.getElementById('loading-text');
const spinner = document.getElementById('loading-spinner');
// Loading banner
if (data.loading && data.loading.status === 'loading') {
banner.style.display = 'flex';
banner.className = 'loading-banner';
spinner.style.display = 'inline-block';
loadingText.textContent = `Loading ${data.loading.model}...`;
// Poll faster while loading
startFastPoll();
} else if (data.loading && data.loading.status === 'done') {
banner.style.display = 'flex';
@@ -444,7 +437,6 @@ function updateOllamaUI(data) {
stopFastPoll();
}
// Model list
if (!data.ok) {
container.innerHTML = `<div class="ollama-empty" style="color:var(--red)">⚠️ Ollama unreachable</div>`;
document.getElementById('vram-bar').style.width = '0%';
@@ -471,7 +463,6 @@ function updateOllamaUI(data) {
</div>`;
}).join('');
// VRAM bar
const pct = Math.min((totalVram / VRAM_TOTAL_GB) * 100, 100);
const bar = document.getElementById('vram-bar');
bar.style.width = pct + '%';
@@ -479,7 +470,6 @@ function updateOllamaUI(data) {
document.getElementById('vram-used').textContent = totalVram.toFixed(1) + ' GB';
}
let fastPollId = null;
function startFastPoll() {
if (fastPollId) return;
fastPollId = setInterval(fetchStatus, 2000);
@@ -494,11 +484,14 @@ async function switchMode(mode) {
document.querySelectorAll('.toggle-btn').forEach(b => b.disabled = true);
showStatus('Switching to ' + mode + ' mode...', '');
// Use currently selected model
const selectedModel = document.getElementById('model-select').value;
try {
const r = await fetch('/api/switch', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({mode}),
body: JSON.stringify({mode, model: selectedModel}),
});
const data = await r.json();
if (!data.ok) throw new Error(data.error);
@@ -515,21 +508,28 @@ async function switchMode(mode) {
}
}
async function setLabModel() {
const model = document.getElementById('lab-model-select').value;
showStatus('Setting lab model to ' + model + '...', '');
async function applyModel() {
const model = document.getElementById('model-select').value;
const btn = document.getElementById('btn-apply-model');
btn.disabled = true;
showStatus('Applying model ' + model.replace('ollama/', '') + '...', '');
try {
const r = await fetch('/api/lab-model', {
const r = await fetch('/api/apply-model', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({model}),
});
const data = await r.json();
if (!data.ok) throw new Error(data.error);
showStatus('Lab model updated.', 'success');
setTimeout(fetchStatus, 2000);
showStatus('Model applied. Loading...', 'success');
setTimeout(async () => {
await fetchStatus();
btn.disabled = false;
}, 2000);
} catch (e) {
showStatus('Failed: ' + e.message, 'error');
btn.disabled = false;
}
}