Improve doctor signal and pg query safety
- Avoid concurrent pg Client queries (fix pg@9 deprecation warning). - Treat old pipeline failures as stale and ignore once a newer reply succeeded. - Make doctor warn (not fail) when only skills artifact is missing (expected in dev). - Align .env.example PI_TUI_MODEL default with current ZAI usage (glm-5-turbo). --- Build: pass | Tests: not run
This commit is contained in:
parent
0898c6d6e0
commit
6cd78dddf7
4 changed files with 46 additions and 27 deletions
|
|
@ -18,7 +18,7 @@ PI_TUI_BIN=pi
|
|||
# Explicit provider/model values below override the profile defaults.
|
||||
PI_TUI_PROFILE=operator
|
||||
PI_TUI_PROVIDER=zai
|
||||
PI_TUI_MODEL=glm-5
|
||||
PI_TUI_MODEL=glm-5-turbo
|
||||
|
||||
# Primary API key for pi
|
||||
ZAI_API_KEY=
|
||||
|
|
|
|||
|
|
@ -67,7 +67,16 @@ async function main(): Promise<void> {
|
|||
const splitBrainIssues = collectSplitBrainIssues(splitBrain);
|
||||
const splitBrainStatus = deriveSplitBrainReadiness(splitBrain);
|
||||
const issues = [...health.issues, ...splitBrainIssues];
|
||||
const status = splitBrainIssues.length > 0 ? 'error' : health.status;
|
||||
const criticalSplitBrain =
|
||||
splitBrain.skillsDb !== 'available' ||
|
||||
splitBrain.memoryDb !== 'available' ||
|
||||
splitBrain.skillsRuntimeLookup !== 'present';
|
||||
const status: 'ok' | 'warn' | 'error' =
|
||||
health.status === 'error' || criticalSplitBrain
|
||||
? 'error'
|
||||
: issues.length > 0
|
||||
? 'warn'
|
||||
: 'ok';
|
||||
const { snapshot } = health;
|
||||
|
||||
console.log(`STATUS: ${status}`);
|
||||
|
|
|
|||
|
|
@ -271,6 +271,9 @@ export function assessHealth(snapshot = readHealthSnapshot()): HealthAssessment
|
|||
|
||||
const messageLoopAge = ageMs(snapshot.host.message_loop_heartbeat_at);
|
||||
const schedulerAge = ageMs(snapshot.host.scheduler_heartbeat_at);
|
||||
const lastFailureAge = ageMs(snapshot.pipeline.last_reply_failed_at);
|
||||
const lastReplySentAge = ageMs(snapshot.pipeline.last_reply_sent_at);
|
||||
const failureStaleMs = 6 * 60 * 60 * 1000;
|
||||
|
||||
if (!snapshot.host.process_started_at) {
|
||||
issues.push('host process has not recorded startup');
|
||||
|
|
@ -299,10 +302,17 @@ export function assessHealth(snapshot = readHealthSnapshot()): HealthAssessment
|
|||
snapshot.pipeline.last_failure_code &&
|
||||
snapshot.pipeline.last_reply_failed_at
|
||||
) {
|
||||
issues.push(
|
||||
`last pipeline failure: ${snapshot.pipeline.last_failure_code} (${snapshot.pipeline.last_failure_message || 'no message'})`,
|
||||
);
|
||||
if (status === 'ok') status = 'warn';
|
||||
const isStale = lastFailureAge !== null && lastFailureAge > failureStaleMs;
|
||||
const wasFollowedBySuccess =
|
||||
lastFailureAge !== null &&
|
||||
lastReplySentAge !== null &&
|
||||
lastReplySentAge < lastFailureAge;
|
||||
if (!isStale && !wasFollowedBySuccess) {
|
||||
issues.push(
|
||||
`last pipeline failure: ${snapshot.pipeline.last_failure_code} (${snapshot.pipeline.last_failure_message || 'no message'})`,
|
||||
);
|
||||
if (status === 'ok') status = 'warn';
|
||||
}
|
||||
}
|
||||
|
||||
return { status, issues, snapshot };
|
||||
|
|
|
|||
|
|
@ -100,21 +100,18 @@ export async function collectSplitBrainStatus(): Promise<SplitBrainStatus> {
|
|||
});
|
||||
await client.connect();
|
||||
try {
|
||||
const [artifactRows, documentRows, chunkRows, latestArtifact] =
|
||||
await Promise.all([
|
||||
client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM builtin_knowledge_artifacts',
|
||||
),
|
||||
client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM builtin_knowledge_documents',
|
||||
),
|
||||
client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM builtin_knowledge_chunks',
|
||||
),
|
||||
client.query<{ artifact_version: string }>(
|
||||
'SELECT artifact_version FROM builtin_knowledge_artifacts ORDER BY created_at DESC LIMIT 1',
|
||||
),
|
||||
]);
|
||||
const artifactRows = await client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM builtin_knowledge_artifacts',
|
||||
);
|
||||
const documentRows = await client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM builtin_knowledge_documents',
|
||||
);
|
||||
const chunkRows = await client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM builtin_knowledge_chunks',
|
||||
);
|
||||
const latestArtifact = await client.query<{ artifact_version: string }>(
|
||||
'SELECT artifact_version FROM builtin_knowledge_artifacts ORDER BY created_at DESC LIMIT 1',
|
||||
);
|
||||
|
||||
status.skillsArtifactRows = Number(artifactRows.rows[0]?.count ?? 0);
|
||||
status.skillsDocumentRows = Number(documentRows.rows[0]?.count ?? 0);
|
||||
|
|
@ -160,12 +157,15 @@ export async function collectSplitBrainStatus(): Promise<SplitBrainStatus> {
|
|||
});
|
||||
await client.connect();
|
||||
try {
|
||||
const [memoryRows, memoryChunkRows, memoryEmbeddingRows] =
|
||||
await Promise.all([
|
||||
client.query<{ count: string }>('SELECT count(*) FROM memories'),
|
||||
client.query<{ count: string }>('SELECT count(*) FROM memory_chunks'),
|
||||
client.query<{ count: string }>('SELECT count(*) FROM memory_embeddings'),
|
||||
]);
|
||||
const memoryRows = await client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM memories',
|
||||
);
|
||||
const memoryChunkRows = await client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM memory_chunks',
|
||||
);
|
||||
const memoryEmbeddingRows = await client.query<{ count: string }>(
|
||||
'SELECT count(*) FROM memory_embeddings',
|
||||
);
|
||||
|
||||
status.memoryRows = Number(memoryRows.rows[0]?.count ?? 0);
|
||||
status.memoryChunkRows = Number(memoryChunkRows.rows[0]?.count ?? 0);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue