feat(runtime): capture actual chat model usage

---
Build: FAIL | Tests: FAIL
This commit is contained in:
Operator & Codex 2026-04-26 07:27:57 +02:00
parent eb69e32eef
commit a73f211307
5 changed files with 243 additions and 3 deletions

View file

@ -5,6 +5,7 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import {
computeEffectiveSessionMaxTokens,
extractRuntimeFromPiSession,
isSessionOversize,
writeGroupsSnapshot,
writeTasksSnapshot,
@ -207,3 +208,74 @@ describe('computeEffectiveSessionMaxTokens', () => {
).toBe(200_000);
});
});
describe('extractRuntimeFromPiSession', () => {
let tmpDir: string;
beforeEach(() => {
const base = path.resolve(process.cwd(), 'tmp');
fs.mkdirSync(base, { recursive: true });
tmpDir = fs.mkdtempSync(path.join(base, 'runtime-test-'));
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
it('prefers the latest assistant provider/model/tokens', () => {
const sessionFile = path.join(tmpDir, 'session.jsonl');
fs.writeFileSync(
sessionFile,
[
JSON.stringify({
type: 'model_change',
provider: 'zai',
modelId: 'glm-5',
}),
JSON.stringify({
type: 'message',
message: {
role: 'assistant',
provider: 'zai',
model: 'glm-5',
usage: { totalTokens: 1234 },
},
}),
].join('\n'),
'utf-8',
);
expect(extractRuntimeFromPiSession(sessionFile)).toEqual({
actualProvider: 'zai',
actualModel: 'glm-5',
tokensUsed: 1234,
});
});
it('falls back to model_change when assistant metadata is absent', () => {
const sessionFile = path.join(tmpDir, 'session.jsonl');
fs.writeFileSync(
sessionFile,
JSON.stringify({
type: 'model_change',
provider: 'openrouter',
modelId: 'openai/gpt-5.1-codex',
}),
'utf-8',
);
expect(extractRuntimeFromPiSession(sessionFile)).toEqual({
actualProvider: 'openrouter',
actualModel: 'openai/gpt-5.1-codex',
tokensUsed: null,
});
});
it('returns nulls for missing or unreadable files', () => {
expect(extractRuntimeFromPiSession(path.join(tmpDir, 'missing.jsonl'))).toEqual({
actualProvider: null,
actualModel: null,
tokensUsed: null,
});
});
});

View file

@ -76,6 +76,9 @@ export interface AgentOutput {
status: 'success' | 'error';
result: string | null;
newSessionId?: string;
actualProvider?: string | null;
actualModel?: string | null;
tokensUsed?: number | null;
error?: string;
}
@ -133,6 +136,87 @@ function newestSessionFile(sessionDir: string): string | undefined {
}
}
export function extractRuntimeFromPiSession(sessionFile: string): {
actualProvider: string | null;
actualModel: string | null;
tokensUsed: number | null;
} {
try {
const raw = fs.readFileSync(sessionFile, 'utf-8');
if (!raw.trim()) {
return {
actualProvider: null,
actualModel: null,
tokensUsed: null,
};
}
const lines = raw.split('\n').filter(Boolean);
let actualProvider: string | null = null;
let actualModel: string | null = null;
let tokensUsed: number | null = null;
for (let i = lines.length - 1; i >= 0; i -= 1) {
try {
const parsed = JSON.parse(lines[i]) as {
type?: string;
provider?: unknown;
modelId?: unknown;
message?: {
role?: unknown;
provider?: unknown;
model?: unknown;
usage?: { totalTokens?: unknown };
};
};
const message = parsed.message;
if (
message?.role === 'assistant' &&
(typeof message.provider === 'string' ||
typeof message.model === 'string' ||
typeof message.usage?.totalTokens === 'number')
) {
actualProvider =
typeof message.provider === 'string' ? message.provider : null;
actualModel = typeof message.model === 'string' ? message.model : null;
tokensUsed =
typeof message.usage?.totalTokens === 'number'
? message.usage.totalTokens
: null;
break;
}
if (
parsed.type === 'model_change' &&
(!actualProvider || !actualModel)
) {
if (!actualProvider && typeof parsed.provider === 'string') {
actualProvider = parsed.provider;
}
if (!actualModel && typeof parsed.modelId === 'string') {
actualModel = parsed.modelId;
}
}
} catch {
continue;
}
}
return {
actualProvider,
actualModel,
tokensUsed,
};
} catch {
return {
actualProvider: null,
actualModel: null,
tokensUsed: null,
};
}
}
export function isSessionOversize(
sessionDir: string,
sessionId: string,
@ -581,6 +665,13 @@ export async function runJailAgent(
const sessionFile = PI_TUI_NO_SESSION
? undefined
: newestSessionFile(sessionDir);
const runtime = sessionFile
? extractRuntimeFromPiSession(path.join(sessionDir, sessionFile))
: {
actualProvider: null,
actualModel: null,
tokensUsed: null,
};
if (code === 0) {
void finish(
@ -588,6 +679,9 @@ export async function runJailAgent(
status: 'success',
result: stdout.trim() || null,
newSessionId: sessionFile,
actualProvider: runtime.actualProvider,
actualModel: runtime.actualModel,
tokensUsed: runtime.tokensUsed,
},
code,
);
@ -598,6 +692,9 @@ export async function runJailAgent(
result: null,
error: stderr.trim() || `pi exited with code ${code ?? 'null'}`,
newSessionId: sessionFile,
actualProvider: runtime.actualProvider,
actualModel: runtime.actualModel,
tokensUsed: runtime.tokensUsed,
},
code,
);

View file

@ -39,11 +39,14 @@ import {
} from './config.js';
import { getControlplaneSessionDir } from './controlplane-paths.js';
import { incCounter, registerGauge, startMetricsServer } from './metrics.js';
import {
insertActivity,
recordChatSpend,
} from './controlplane-db.js';
import {
buildStartupReport,
buildStartupReportWithDiagnostics,
} from './startup-report.js';
import { recordChatSpend } from './controlplane-db.js';
import { recordTokenSpend } from './controlplane-budget.js';
// ── Metrics prefix — derived from agent name for multi-install dashboards ──
@ -632,6 +635,24 @@ async function runAgent(
new Set(Object.keys(registeredGroups)),
);
const persistRuntimeMetadata = async (output: JailOutput): Promise<void> => {
if (!output.actualProvider || !output.actualModel) return;
group.jailConfig = {
...(group.jailConfig ?? {}),
lastRuntimeProvider: output.actualProvider,
lastRuntimeModel: output.actualModel,
lastRuntimeAt: new Date().toISOString(),
};
try {
await setRegisteredGroup(chatJid, group);
} catch {
logger.debug(
{ chatJid, groupFolder: group.folder },
'Failed to persist chat runtime metadata',
);
}
};
// Wrap onOutput to track session ID from streamed results
const wrappedOnOutput = onOutput
? async (output: JailOutput) => {
@ -639,6 +660,7 @@ async function runAgent(
sessions[group.folder] = output.newSessionId;
void setSession(group.folder, output.newSessionId);
}
await persistRuntimeMetadata(output);
await onOutput(output);
}
: undefined;
@ -667,6 +689,7 @@ async function runAgent(
sessions[group.folder] = output.newSessionId;
void setSession(group.folder, output.newSessionId);
}
await persistRuntimeMetadata(output);
if (output.status === 'error') {
logger.error(
@ -676,6 +699,16 @@ async function runAgent(
return 'error';
}
if (output.actualProvider && output.actualModel) {
group.jailConfig = {
...(group.jailConfig || {}),
lastRuntimeProvider: output.actualProvider,
lastRuntimeModel: output.actualModel,
lastRuntimeAt: new Date().toISOString(),
};
void setRegisteredGroup(chatJid, group);
}
// Store only the agent's reply as memory — not the full prompt (which can be huge)
if (output.result) {
void storeSessionSummary(output.result, {
@ -702,17 +735,35 @@ async function runAgent(
}
// Record estimated spend against the agent's global daily budget (used by /usage)
// since pi doesn't currently return token usage for chat runs.
// and persist one activity row so runtime analytics can report actual
// provider/model when available for the main chat path.
try {
const inputTokensEst = Math.round(prompt.length / 4);
const outputTokensEst = output.result
? Math.round(output.result.length / 4)
: 0;
const tokensUsed = output.tokensUsed ?? inputTokensEst + outputTokensEst;
await recordTokenSpend(
getMemoryPool(),
TENANT_ID,
inputTokensEst + outputTokensEst,
tokensUsed,
);
await insertActivity(getMemoryPool(), {
agent_id: TENANT_ID,
event_type: 'chat_run',
payload: {
chat_jid: chatJid,
group_folder: group.folder,
session_id: output.newSessionId ?? null,
configured_provider: PI_TUI_PROVIDER || null,
configured_model: PI_TUI_MODEL || null,
override_provider: group.jailConfig?.provider || null,
override_model: group.jailConfig?.model || null,
actual_provider: output.actualProvider ?? null,
actual_model: output.actualModel ?? null,
},
tokens_used: tokensUsed,
});
} catch {
logger.debug({ chatJid }, 'Agent budget spend recording failed');
}

View file

@ -154,6 +154,16 @@ function getChatModelOverrideLine(chatJid: string): string | null {
return `Chat override: ${effectiveProvider} / ${effectiveModel}`;
}
function getChatLastActualRuntimeLine(chatJid: string): string | null {
const c = ctx;
if (!c) return null;
const group = c.registeredGroups()[chatJid];
const provider = (group?.jailConfig?.lastRuntimeProvider || '').trim();
const model = (group?.jailConfig?.lastRuntimeModel || '').trim();
if (!provider || !model) return null;
return `Last actual runtime: ${provider} / ${model}`;
}
async function replyAuthFailure(ctxArg: any, message: string): Promise<void> {
try {
if (ctxArg.callbackQuery) {
@ -571,6 +581,8 @@ export async function handlePolicyCommand(
);
const overrideLine = getChatModelOverrideLine(chatJid);
if (overrideLine) lines.push(overrideLine);
const actualLine = getChatLastActualRuntimeLine(chatJid);
if (actualLine) lines.push(actualLine);
if (used !== null && limit !== null) {
lines.push(
`Budget: ${used.toLocaleString()}/${limit.toLocaleString()} tokens (${remainingPct ?? '?'}% remaining)`,
@ -1778,6 +1790,8 @@ export async function handleStatusCommand(
const overrideProvider = (group?.jailConfig?.provider || '').trim();
const overrideModel = (group?.jailConfig?.model || '').trim();
const actualProvider = (group?.jailConfig?.lastRuntimeProvider || '').trim();
const actualModel = (group?.jailConfig?.lastRuntimeModel || '').trim();
const hasOverride = Boolean(overrideProvider || overrideModel);
const effectiveProvider =
overrideProvider || (defaultProvider || '').trim();
@ -1792,6 +1806,9 @@ export async function handleStatusCommand(
`Default model: <code>${defaultProvider}/${defaultModel}</code>`,
);
}
if (actualProvider && actualModel) {
lines.push(`Last actual: <code>${actualProvider}/${actualModel}</code>`);
}
}
const active = c.queue.getActiveCount();

View file

@ -32,6 +32,9 @@ export interface JailConfig {
timeout?: number; // Default: 300000 (5 minutes)
provider?: string; // Per-group LLM provider override
model?: string; // Per-group LLM model override
lastRuntimeProvider?: string; // Last actual provider observed from pi session metadata
lastRuntimeModel?: string; // Last actual model observed from pi session metadata
lastRuntimeAt?: string; // ISO timestamp for last observed actual runtime
}
export interface RegisteredGroup {