feat(runtime): validate fallback config and add /clearcooldown
Two operator-facing safety nets for the provider-fallback feature shipped earlier today: 1. Startup config check: when LLM_FALLBACK_PROVIDER is set, verify the matching API key (OPENROUTER_API_KEY etc.) is present. Without this, the fallback would silently fail the moment the primary provider caps — which is exactly when the operator can least afford to find out. Also warns when LLM_FALLBACK_PROVIDER is set without LLM_FALLBACK_MODEL since the primary model name rarely exists on the fallback provider. 2. /clearcooldown admin command: manually release a provider cooldown if the cap is lifted early or if the cap-error parser ever false-trips. Lists active cooldowns when called without args; takes <provider> or "all". Persists immediately so the cleared state survives restart. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- Build: pass | Tests: pass — Tests 1941 passed (1941) --- Build: pass | Tests: pass — Tests 1944 passed (1944)
This commit is contained in:
parent
b4996f732f
commit
3d33482c14
3 changed files with 103 additions and 1 deletions
|
|
@ -58,6 +58,7 @@ import {
|
|||
handleUsageCommand,
|
||||
handleUpdatesCommand,
|
||||
handleBudgetResetCommand,
|
||||
handleClearCooldownCommand,
|
||||
handleWhoamiCommand,
|
||||
handleResumeCommand,
|
||||
handlePolicyCommand,
|
||||
|
|
@ -134,6 +135,7 @@ export class TelegramChannel implements Channel {
|
|||
'/tokens — Show runtime token burn per agent\n' +
|
||||
'/updates — Show FreeBSD base/ports updates\n' +
|
||||
'/budgetreset — Reset agent token budget (admin)\n' +
|
||||
'/clearcooldown — Clear provider fallback cooldown (admin)\n' +
|
||||
'/tts — Control voice replies (on/off/status)\n' +
|
||||
'/stop — Stop running agent\n' +
|
||||
'/new — Reset session, start fresh\n' +
|
||||
|
|
@ -333,6 +335,11 @@ export class TelegramChannel implements Channel {
|
|||
await handleBudgetResetCommand(ctx, chatJid);
|
||||
});
|
||||
|
||||
this.bot.command('clearcooldown', async (ctx) => {
|
||||
const chatJid = `tg:${ctx.chat.id}`;
|
||||
await handleClearCooldownCommand(ctx, chatJid);
|
||||
});
|
||||
|
||||
this.bot.command('rescue', async (ctx) => {
|
||||
const chatJid = `tg:${ctx.chat.id}`;
|
||||
await handleBudgetResetCommand(ctx, chatJid);
|
||||
|
|
|
|||
37
src/index.ts
37
src/index.ts
|
|
@ -124,7 +124,10 @@ import { startSchedulerLoop } from './task-scheduler.js';
|
|||
import { bridgeTelegramMessage } from './controlplane-telegram.js';
|
||||
import { Channel, NewMessage, RegisteredGroup } from './types.js';
|
||||
import { logger } from './logger.js';
|
||||
import { loadProviderCooldowns } from './provider-fallback.js';
|
||||
import {
|
||||
getFallbackPolicy,
|
||||
loadProviderCooldowns,
|
||||
} from './provider-fallback.js';
|
||||
import { formatSkillsListText } from './skills-list.js';
|
||||
import { shouldApplyTts, stripTtsMarker, synthesize } from './tts.js';
|
||||
import { getTtsModeForChat, setCommandContext } from './telegram-commands.js';
|
||||
|
|
@ -996,6 +999,38 @@ async function main(): Promise<void> {
|
|||
if (AGENT_ENGINE === 'pi-tui' && llmKey) criticalConfig.push(llmKey);
|
||||
if (sttKey) criticalConfig.push(sttKey);
|
||||
|
||||
// If a provider fallback is configured, make sure the fallback's API key is
|
||||
// present too — otherwise the fallback silently fails the moment the primary
|
||||
// provider hits its cap, which is exactly when we can least afford it.
|
||||
const fallbackPolicy = getFallbackPolicy();
|
||||
const fallbackProvider = (fallbackPolicy.fallbackProvider || '')
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
if (fallbackProvider) {
|
||||
const fallbackKey =
|
||||
fallbackProvider === 'openrouter'
|
||||
? { name: 'OPENROUTER_API_KEY', value: OPENROUTER_API_KEY }
|
||||
: fallbackProvider === 'zai'
|
||||
? { name: 'ZAI_API_KEY', value: ZAI_API_KEY }
|
||||
: fallbackProvider === 'anthropic'
|
||||
? { name: 'ANTHROPIC_API_KEY', value: ANTHROPIC_API_KEY }
|
||||
: fallbackProvider === 'google'
|
||||
? { name: 'GOOGLE_API_KEY', value: GOOGLE_API_KEY }
|
||||
: null;
|
||||
if (fallbackKey) {
|
||||
criticalConfig.push({
|
||||
...fallbackKey,
|
||||
hint: `LLM_FALLBACK_PROVIDER=${fallbackProvider} — fallback will fail when primary provider hits its cap`,
|
||||
});
|
||||
}
|
||||
if (!fallbackPolicy.fallbackModel) {
|
||||
logger.warn(
|
||||
{ fallbackProvider },
|
||||
'LLM_FALLBACK_PROVIDER set without LLM_FALLBACK_MODEL — fallback will reuse the primary model name, which may not exist on the fallback provider',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const c of criticalConfig) {
|
||||
if (!c.value) {
|
||||
logger.warn({ config: c.name }, `Missing ${c.name} — ${c.hint}`);
|
||||
|
|
|
|||
|
|
@ -59,8 +59,10 @@ import { buildDiskReport, renderDiskReport } from './reports/disk-report.js';
|
|||
import { buildBudgetReport, renderBudgetReport } from './reports/budget-report.js';
|
||||
import { buildTasksReport, renderTasksReport } from './reports/tasks-report.js';
|
||||
import {
|
||||
clearProviderCooldown,
|
||||
getFallbackPolicy,
|
||||
listProviderCooldowns,
|
||||
persistProviderCooldowns,
|
||||
} from './provider-fallback.js';
|
||||
import {
|
||||
buildPublishReport,
|
||||
|
|
@ -965,6 +967,64 @@ export async function handleBudgetResetCallback(
|
|||
});
|
||||
}
|
||||
|
||||
// ── /clearcooldown ───────────────────────────────────────────────────────
|
||||
|
||||
export async function handleClearCooldownCommand(
|
||||
ctxArg: any,
|
||||
chatJid: string,
|
||||
): Promise<void> {
|
||||
if (!(await requireAdmin(ctxArg))) return;
|
||||
if (!(await requireOpsChat(ctxArg, chatJid))) return;
|
||||
|
||||
const text = (ctxArg.message?.text || '').trim();
|
||||
const args = text.split(/\s+/).slice(1);
|
||||
const target = (args[0] || '').trim().toLowerCase();
|
||||
|
||||
const active = listProviderCooldowns();
|
||||
|
||||
if (!target) {
|
||||
if (active.length === 0) {
|
||||
await ctxArg.reply('No provider cooldowns active.');
|
||||
return;
|
||||
}
|
||||
const lines = active
|
||||
.map((c) => `- ${c.provider} until ${c.until.toISOString()} (${c.reason})`)
|
||||
.join('\n');
|
||||
await ctxArg.reply(
|
||||
`Active provider cooldowns:\n${lines}\n\nUsage: /clearcooldown <provider|all>`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (target === 'all') {
|
||||
if (active.length === 0) {
|
||||
await ctxArg.reply('No provider cooldowns to clear.');
|
||||
return;
|
||||
}
|
||||
for (const c of active) clearProviderCooldown(c.provider);
|
||||
await persistProviderCooldowns().catch(() => undefined);
|
||||
await ctxArg.reply(
|
||||
`Cleared ${active.length} provider cooldown${active.length === 1 ? '' : 's'}: ${active
|
||||
.map((c) => c.provider)
|
||||
.join(', ')}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const match = active.find((c) => c.provider === target);
|
||||
if (!match) {
|
||||
await ctxArg.reply(
|
||||
`No active cooldown for "${target}". Active: ${
|
||||
active.length === 0 ? 'none' : active.map((c) => c.provider).join(', ')
|
||||
}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
clearProviderCooldown(target);
|
||||
await persistProviderCooldowns().catch(() => undefined);
|
||||
await ctxArg.reply(`Cleared provider cooldown: ${target}`);
|
||||
}
|
||||
|
||||
function splitTextChunks(text: string, maxLen: number): string[] {
|
||||
if (!text) return [];
|
||||
if (maxLen <= 0) return [text];
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue