feat(runtime): capture actual chat model usage
--- Build: FAIL | Tests: FAIL
This commit is contained in:
parent
eb69e32eef
commit
a73f211307
5 changed files with 243 additions and 3 deletions
|
|
@ -5,6 +5,7 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|||
|
||||
import {
|
||||
computeEffectiveSessionMaxTokens,
|
||||
extractRuntimeFromPiSession,
|
||||
isSessionOversize,
|
||||
writeGroupsSnapshot,
|
||||
writeTasksSnapshot,
|
||||
|
|
@ -207,3 +208,74 @@ describe('computeEffectiveSessionMaxTokens', () => {
|
|||
).toBe(200_000);
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractRuntimeFromPiSession', () => {
|
||||
let tmpDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
const base = path.resolve(process.cwd(), 'tmp');
|
||||
fs.mkdirSync(base, { recursive: true });
|
||||
tmpDir = fs.mkdtempSync(path.join(base, 'runtime-test-'));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('prefers the latest assistant provider/model/tokens', () => {
|
||||
const sessionFile = path.join(tmpDir, 'session.jsonl');
|
||||
fs.writeFileSync(
|
||||
sessionFile,
|
||||
[
|
||||
JSON.stringify({
|
||||
type: 'model_change',
|
||||
provider: 'zai',
|
||||
modelId: 'glm-5',
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: 'message',
|
||||
message: {
|
||||
role: 'assistant',
|
||||
provider: 'zai',
|
||||
model: 'glm-5',
|
||||
usage: { totalTokens: 1234 },
|
||||
},
|
||||
}),
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
expect(extractRuntimeFromPiSession(sessionFile)).toEqual({
|
||||
actualProvider: 'zai',
|
||||
actualModel: 'glm-5',
|
||||
tokensUsed: 1234,
|
||||
});
|
||||
});
|
||||
|
||||
it('falls back to model_change when assistant metadata is absent', () => {
|
||||
const sessionFile = path.join(tmpDir, 'session.jsonl');
|
||||
fs.writeFileSync(
|
||||
sessionFile,
|
||||
JSON.stringify({
|
||||
type: 'model_change',
|
||||
provider: 'openrouter',
|
||||
modelId: 'openai/gpt-5.1-codex',
|
||||
}),
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
expect(extractRuntimeFromPiSession(sessionFile)).toEqual({
|
||||
actualProvider: 'openrouter',
|
||||
actualModel: 'openai/gpt-5.1-codex',
|
||||
tokensUsed: null,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns nulls for missing or unreadable files', () => {
|
||||
expect(extractRuntimeFromPiSession(path.join(tmpDir, 'missing.jsonl'))).toEqual({
|
||||
actualProvider: null,
|
||||
actualModel: null,
|
||||
tokensUsed: null,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -76,6 +76,9 @@ export interface AgentOutput {
|
|||
status: 'success' | 'error';
|
||||
result: string | null;
|
||||
newSessionId?: string;
|
||||
actualProvider?: string | null;
|
||||
actualModel?: string | null;
|
||||
tokensUsed?: number | null;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
|
|
@ -133,6 +136,87 @@ function newestSessionFile(sessionDir: string): string | undefined {
|
|||
}
|
||||
}
|
||||
|
||||
export function extractRuntimeFromPiSession(sessionFile: string): {
|
||||
actualProvider: string | null;
|
||||
actualModel: string | null;
|
||||
tokensUsed: number | null;
|
||||
} {
|
||||
try {
|
||||
const raw = fs.readFileSync(sessionFile, 'utf-8');
|
||||
if (!raw.trim()) {
|
||||
return {
|
||||
actualProvider: null,
|
||||
actualModel: null,
|
||||
tokensUsed: null,
|
||||
};
|
||||
}
|
||||
|
||||
const lines = raw.split('\n').filter(Boolean);
|
||||
let actualProvider: string | null = null;
|
||||
let actualModel: string | null = null;
|
||||
let tokensUsed: number | null = null;
|
||||
|
||||
for (let i = lines.length - 1; i >= 0; i -= 1) {
|
||||
try {
|
||||
const parsed = JSON.parse(lines[i]) as {
|
||||
type?: string;
|
||||
provider?: unknown;
|
||||
modelId?: unknown;
|
||||
message?: {
|
||||
role?: unknown;
|
||||
provider?: unknown;
|
||||
model?: unknown;
|
||||
usage?: { totalTokens?: unknown };
|
||||
};
|
||||
};
|
||||
|
||||
const message = parsed.message;
|
||||
if (
|
||||
message?.role === 'assistant' &&
|
||||
(typeof message.provider === 'string' ||
|
||||
typeof message.model === 'string' ||
|
||||
typeof message.usage?.totalTokens === 'number')
|
||||
) {
|
||||
actualProvider =
|
||||
typeof message.provider === 'string' ? message.provider : null;
|
||||
actualModel = typeof message.model === 'string' ? message.model : null;
|
||||
tokensUsed =
|
||||
typeof message.usage?.totalTokens === 'number'
|
||||
? message.usage.totalTokens
|
||||
: null;
|
||||
break;
|
||||
}
|
||||
|
||||
if (
|
||||
parsed.type === 'model_change' &&
|
||||
(!actualProvider || !actualModel)
|
||||
) {
|
||||
if (!actualProvider && typeof parsed.provider === 'string') {
|
||||
actualProvider = parsed.provider;
|
||||
}
|
||||
if (!actualModel && typeof parsed.modelId === 'string') {
|
||||
actualModel = parsed.modelId;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
actualProvider,
|
||||
actualModel,
|
||||
tokensUsed,
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
actualProvider: null,
|
||||
actualModel: null,
|
||||
tokensUsed: null,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export function isSessionOversize(
|
||||
sessionDir: string,
|
||||
sessionId: string,
|
||||
|
|
@ -581,6 +665,13 @@ export async function runJailAgent(
|
|||
const sessionFile = PI_TUI_NO_SESSION
|
||||
? undefined
|
||||
: newestSessionFile(sessionDir);
|
||||
const runtime = sessionFile
|
||||
? extractRuntimeFromPiSession(path.join(sessionDir, sessionFile))
|
||||
: {
|
||||
actualProvider: null,
|
||||
actualModel: null,
|
||||
tokensUsed: null,
|
||||
};
|
||||
|
||||
if (code === 0) {
|
||||
void finish(
|
||||
|
|
@ -588,6 +679,9 @@ export async function runJailAgent(
|
|||
status: 'success',
|
||||
result: stdout.trim() || null,
|
||||
newSessionId: sessionFile,
|
||||
actualProvider: runtime.actualProvider,
|
||||
actualModel: runtime.actualModel,
|
||||
tokensUsed: runtime.tokensUsed,
|
||||
},
|
||||
code,
|
||||
);
|
||||
|
|
@ -598,6 +692,9 @@ export async function runJailAgent(
|
|||
result: null,
|
||||
error: stderr.trim() || `pi exited with code ${code ?? 'null'}`,
|
||||
newSessionId: sessionFile,
|
||||
actualProvider: runtime.actualProvider,
|
||||
actualModel: runtime.actualModel,
|
||||
tokensUsed: runtime.tokensUsed,
|
||||
},
|
||||
code,
|
||||
);
|
||||
|
|
|
|||
57
src/index.ts
57
src/index.ts
|
|
@ -39,11 +39,14 @@ import {
|
|||
} from './config.js';
|
||||
import { getControlplaneSessionDir } from './controlplane-paths.js';
|
||||
import { incCounter, registerGauge, startMetricsServer } from './metrics.js';
|
||||
import {
|
||||
insertActivity,
|
||||
recordChatSpend,
|
||||
} from './controlplane-db.js';
|
||||
import {
|
||||
buildStartupReport,
|
||||
buildStartupReportWithDiagnostics,
|
||||
} from './startup-report.js';
|
||||
import { recordChatSpend } from './controlplane-db.js';
|
||||
import { recordTokenSpend } from './controlplane-budget.js';
|
||||
|
||||
// ── Metrics prefix — derived from agent name for multi-install dashboards ──
|
||||
|
|
@ -632,6 +635,24 @@ async function runAgent(
|
|||
new Set(Object.keys(registeredGroups)),
|
||||
);
|
||||
|
||||
const persistRuntimeMetadata = async (output: JailOutput): Promise<void> => {
|
||||
if (!output.actualProvider || !output.actualModel) return;
|
||||
group.jailConfig = {
|
||||
...(group.jailConfig ?? {}),
|
||||
lastRuntimeProvider: output.actualProvider,
|
||||
lastRuntimeModel: output.actualModel,
|
||||
lastRuntimeAt: new Date().toISOString(),
|
||||
};
|
||||
try {
|
||||
await setRegisteredGroup(chatJid, group);
|
||||
} catch {
|
||||
logger.debug(
|
||||
{ chatJid, groupFolder: group.folder },
|
||||
'Failed to persist chat runtime metadata',
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Wrap onOutput to track session ID from streamed results
|
||||
const wrappedOnOutput = onOutput
|
||||
? async (output: JailOutput) => {
|
||||
|
|
@ -639,6 +660,7 @@ async function runAgent(
|
|||
sessions[group.folder] = output.newSessionId;
|
||||
void setSession(group.folder, output.newSessionId);
|
||||
}
|
||||
await persistRuntimeMetadata(output);
|
||||
await onOutput(output);
|
||||
}
|
||||
: undefined;
|
||||
|
|
@ -667,6 +689,7 @@ async function runAgent(
|
|||
sessions[group.folder] = output.newSessionId;
|
||||
void setSession(group.folder, output.newSessionId);
|
||||
}
|
||||
await persistRuntimeMetadata(output);
|
||||
|
||||
if (output.status === 'error') {
|
||||
logger.error(
|
||||
|
|
@ -676,6 +699,16 @@ async function runAgent(
|
|||
return 'error';
|
||||
}
|
||||
|
||||
if (output.actualProvider && output.actualModel) {
|
||||
group.jailConfig = {
|
||||
...(group.jailConfig || {}),
|
||||
lastRuntimeProvider: output.actualProvider,
|
||||
lastRuntimeModel: output.actualModel,
|
||||
lastRuntimeAt: new Date().toISOString(),
|
||||
};
|
||||
void setRegisteredGroup(chatJid, group);
|
||||
}
|
||||
|
||||
// Store only the agent's reply as memory — not the full prompt (which can be huge)
|
||||
if (output.result) {
|
||||
void storeSessionSummary(output.result, {
|
||||
|
|
@ -702,17 +735,35 @@ async function runAgent(
|
|||
}
|
||||
|
||||
// Record estimated spend against the agent's global daily budget (used by /usage)
|
||||
// since pi doesn't currently return token usage for chat runs.
|
||||
// and persist one activity row so runtime analytics can report actual
|
||||
// provider/model when available for the main chat path.
|
||||
try {
|
||||
const inputTokensEst = Math.round(prompt.length / 4);
|
||||
const outputTokensEst = output.result
|
||||
? Math.round(output.result.length / 4)
|
||||
: 0;
|
||||
const tokensUsed = output.tokensUsed ?? inputTokensEst + outputTokensEst;
|
||||
await recordTokenSpend(
|
||||
getMemoryPool(),
|
||||
TENANT_ID,
|
||||
inputTokensEst + outputTokensEst,
|
||||
tokensUsed,
|
||||
);
|
||||
await insertActivity(getMemoryPool(), {
|
||||
agent_id: TENANT_ID,
|
||||
event_type: 'chat_run',
|
||||
payload: {
|
||||
chat_jid: chatJid,
|
||||
group_folder: group.folder,
|
||||
session_id: output.newSessionId ?? null,
|
||||
configured_provider: PI_TUI_PROVIDER || null,
|
||||
configured_model: PI_TUI_MODEL || null,
|
||||
override_provider: group.jailConfig?.provider || null,
|
||||
override_model: group.jailConfig?.model || null,
|
||||
actual_provider: output.actualProvider ?? null,
|
||||
actual_model: output.actualModel ?? null,
|
||||
},
|
||||
tokens_used: tokensUsed,
|
||||
});
|
||||
} catch {
|
||||
logger.debug({ chatJid }, 'Agent budget spend recording failed');
|
||||
}
|
||||
|
|
|
|||
|
|
@ -154,6 +154,16 @@ function getChatModelOverrideLine(chatJid: string): string | null {
|
|||
return `Chat override: ${effectiveProvider} / ${effectiveModel}`;
|
||||
}
|
||||
|
||||
function getChatLastActualRuntimeLine(chatJid: string): string | null {
|
||||
const c = ctx;
|
||||
if (!c) return null;
|
||||
const group = c.registeredGroups()[chatJid];
|
||||
const provider = (group?.jailConfig?.lastRuntimeProvider || '').trim();
|
||||
const model = (group?.jailConfig?.lastRuntimeModel || '').trim();
|
||||
if (!provider || !model) return null;
|
||||
return `Last actual runtime: ${provider} / ${model}`;
|
||||
}
|
||||
|
||||
async function replyAuthFailure(ctxArg: any, message: string): Promise<void> {
|
||||
try {
|
||||
if (ctxArg.callbackQuery) {
|
||||
|
|
@ -571,6 +581,8 @@ export async function handlePolicyCommand(
|
|||
);
|
||||
const overrideLine = getChatModelOverrideLine(chatJid);
|
||||
if (overrideLine) lines.push(overrideLine);
|
||||
const actualLine = getChatLastActualRuntimeLine(chatJid);
|
||||
if (actualLine) lines.push(actualLine);
|
||||
if (used !== null && limit !== null) {
|
||||
lines.push(
|
||||
`Budget: ${used.toLocaleString()}/${limit.toLocaleString()} tokens (${remainingPct ?? '?'}% remaining)`,
|
||||
|
|
@ -1778,6 +1790,8 @@ export async function handleStatusCommand(
|
|||
|
||||
const overrideProvider = (group?.jailConfig?.provider || '').trim();
|
||||
const overrideModel = (group?.jailConfig?.model || '').trim();
|
||||
const actualProvider = (group?.jailConfig?.lastRuntimeProvider || '').trim();
|
||||
const actualModel = (group?.jailConfig?.lastRuntimeModel || '').trim();
|
||||
const hasOverride = Boolean(overrideProvider || overrideModel);
|
||||
const effectiveProvider =
|
||||
overrideProvider || (defaultProvider || '').trim();
|
||||
|
|
@ -1792,6 +1806,9 @@ export async function handleStatusCommand(
|
|||
`Default model: <code>${defaultProvider}/${defaultModel}</code>`,
|
||||
);
|
||||
}
|
||||
if (actualProvider && actualModel) {
|
||||
lines.push(`Last actual: <code>${actualProvider}/${actualModel}</code>`);
|
||||
}
|
||||
}
|
||||
|
||||
const active = c.queue.getActiveCount();
|
||||
|
|
|
|||
|
|
@ -32,6 +32,9 @@ export interface JailConfig {
|
|||
timeout?: number; // Default: 300000 (5 minutes)
|
||||
provider?: string; // Per-group LLM provider override
|
||||
model?: string; // Per-group LLM model override
|
||||
lastRuntimeProvider?: string; // Last actual provider observed from pi session metadata
|
||||
lastRuntimeModel?: string; // Last actual model observed from pi session metadata
|
||||
lastRuntimeAt?: string; // ISO timestamp for last observed actual runtime
|
||||
}
|
||||
|
||||
export interface RegisteredGroup {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue