feat(runtime): validate fallback config and add /clearcooldown

Two operator-facing safety nets for the provider-fallback feature
shipped earlier today:

1. Startup config check: when LLM_FALLBACK_PROVIDER is set, verify the
   matching API key (OPENROUTER_API_KEY etc.) is present. Without this,
   the fallback would silently fail the moment the primary provider
   caps — which is exactly when the operator can least afford to find
   out. Also warns when LLM_FALLBACK_PROVIDER is set without
   LLM_FALLBACK_MODEL since the primary model name rarely exists on the
   fallback provider.

2. /clearcooldown admin command: manually release a provider cooldown
   if the cap is lifted early or if the cap-error parser ever
   false-trips. Lists active cooldowns when called without args; takes
   <provider> or "all". Persists immediately so the cleared state
   survives restart.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---
Build: pass | Tests: pass — Tests  1941 passed (1941)

---
Build: pass | Tests: pass — Tests  1944 passed (1944)
This commit is contained in:
Operator & claude 2026-04-26 08:44:37 +02:00 committed by Test
parent b4996f732f
commit 3d33482c14
3 changed files with 103 additions and 1 deletions

View file

@ -58,6 +58,7 @@ import {
handleUsageCommand,
handleUpdatesCommand,
handleBudgetResetCommand,
handleClearCooldownCommand,
handleWhoamiCommand,
handleResumeCommand,
handlePolicyCommand,
@ -134,6 +135,7 @@ export class TelegramChannel implements Channel {
'/tokens — Show runtime token burn per agent\n' +
'/updates — Show FreeBSD base/ports updates\n' +
'/budgetreset — Reset agent token budget (admin)\n' +
'/clearcooldown — Clear provider fallback cooldown (admin)\n' +
'/tts — Control voice replies (on/off/status)\n' +
'/stop — Stop running agent\n' +
'/new — Reset session, start fresh\n' +
@ -333,6 +335,11 @@ export class TelegramChannel implements Channel {
await handleBudgetResetCommand(ctx, chatJid);
});
this.bot.command('clearcooldown', async (ctx) => {
const chatJid = `tg:${ctx.chat.id}`;
await handleClearCooldownCommand(ctx, chatJid);
});
this.bot.command('rescue', async (ctx) => {
const chatJid = `tg:${ctx.chat.id}`;
await handleBudgetResetCommand(ctx, chatJid);

View file

@ -124,7 +124,10 @@ import { startSchedulerLoop } from './task-scheduler.js';
import { bridgeTelegramMessage } from './controlplane-telegram.js';
import { Channel, NewMessage, RegisteredGroup } from './types.js';
import { logger } from './logger.js';
import { loadProviderCooldowns } from './provider-fallback.js';
import {
getFallbackPolicy,
loadProviderCooldowns,
} from './provider-fallback.js';
import { formatSkillsListText } from './skills-list.js';
import { shouldApplyTts, stripTtsMarker, synthesize } from './tts.js';
import { getTtsModeForChat, setCommandContext } from './telegram-commands.js';
@ -996,6 +999,38 @@ async function main(): Promise<void> {
if (AGENT_ENGINE === 'pi-tui' && llmKey) criticalConfig.push(llmKey);
if (sttKey) criticalConfig.push(sttKey);
// If a provider fallback is configured, make sure the fallback's API key is
// present too — otherwise the fallback silently fails the moment the primary
// provider hits its cap, which is exactly when we can least afford it.
const fallbackPolicy = getFallbackPolicy();
const fallbackProvider = (fallbackPolicy.fallbackProvider || '')
.trim()
.toLowerCase();
if (fallbackProvider) {
const fallbackKey =
fallbackProvider === 'openrouter'
? { name: 'OPENROUTER_API_KEY', value: OPENROUTER_API_KEY }
: fallbackProvider === 'zai'
? { name: 'ZAI_API_KEY', value: ZAI_API_KEY }
: fallbackProvider === 'anthropic'
? { name: 'ANTHROPIC_API_KEY', value: ANTHROPIC_API_KEY }
: fallbackProvider === 'google'
? { name: 'GOOGLE_API_KEY', value: GOOGLE_API_KEY }
: null;
if (fallbackKey) {
criticalConfig.push({
...fallbackKey,
hint: `LLM_FALLBACK_PROVIDER=${fallbackProvider} — fallback will fail when primary provider hits its cap`,
});
}
if (!fallbackPolicy.fallbackModel) {
logger.warn(
{ fallbackProvider },
'LLM_FALLBACK_PROVIDER set without LLM_FALLBACK_MODEL — fallback will reuse the primary model name, which may not exist on the fallback provider',
);
}
}
for (const c of criticalConfig) {
if (!c.value) {
logger.warn({ config: c.name }, `Missing ${c.name}${c.hint}`);

View file

@ -59,8 +59,10 @@ import { buildDiskReport, renderDiskReport } from './reports/disk-report.js';
import { buildBudgetReport, renderBudgetReport } from './reports/budget-report.js';
import { buildTasksReport, renderTasksReport } from './reports/tasks-report.js';
import {
clearProviderCooldown,
getFallbackPolicy,
listProviderCooldowns,
persistProviderCooldowns,
} from './provider-fallback.js';
import {
buildPublishReport,
@ -965,6 +967,64 @@ export async function handleBudgetResetCallback(
});
}
// ── /clearcooldown ───────────────────────────────────────────────────────
export async function handleClearCooldownCommand(
ctxArg: any,
chatJid: string,
): Promise<void> {
if (!(await requireAdmin(ctxArg))) return;
if (!(await requireOpsChat(ctxArg, chatJid))) return;
const text = (ctxArg.message?.text || '').trim();
const args = text.split(/\s+/).slice(1);
const target = (args[0] || '').trim().toLowerCase();
const active = listProviderCooldowns();
if (!target) {
if (active.length === 0) {
await ctxArg.reply('No provider cooldowns active.');
return;
}
const lines = active
.map((c) => `- ${c.provider} until ${c.until.toISOString()} (${c.reason})`)
.join('\n');
await ctxArg.reply(
`Active provider cooldowns:\n${lines}\n\nUsage: /clearcooldown <provider|all>`,
);
return;
}
if (target === 'all') {
if (active.length === 0) {
await ctxArg.reply('No provider cooldowns to clear.');
return;
}
for (const c of active) clearProviderCooldown(c.provider);
await persistProviderCooldowns().catch(() => undefined);
await ctxArg.reply(
`Cleared ${active.length} provider cooldown${active.length === 1 ? '' : 's'}: ${active
.map((c) => c.provider)
.join(', ')}`,
);
return;
}
const match = active.find((c) => c.provider === target);
if (!match) {
await ctxArg.reply(
`No active cooldown for "${target}". Active: ${
active.length === 0 ? 'none' : active.map((c) => c.provider).join(', ')
}`,
);
return;
}
clearProviderCooldown(target);
await persistProviderCooldowns().catch(() => undefined);
await ctxArg.reply(`Cleared provider cooldown: ${target}`);
}
function splitTextChunks(text: string, maxLen: number): string[] {
if (!text) return [];
if (maxLen <= 0) return [text];