111 lines
3.5 KiB
JavaScript
111 lines
3.5 KiB
JavaScript
// Score a predictions file against ground-truth DOM bboxes.
|
|
// Usage: node score.mjs results/predictions-<model>.json
|
|
//
|
|
// Outputs a per-target table + an aggregate report:
|
|
// - pass rate at 30 px tolerance (center distance)
|
|
// - mean / median / max pixel distance
|
|
// - in-bbox rate (predicted point falls inside the ground-truth bbox)
|
|
//
|
|
// Fails per target if either ground truth is missing or the prediction is
|
|
// missing.
|
|
|
|
import { readFileSync, readdirSync, writeFileSync } from 'fs';
|
|
import path from 'path';
|
|
|
|
const TOLERANCE_PX = 30;
|
|
const ROOT = path.resolve(import.meta.dirname);
|
|
|
|
const predPath = process.argv[2];
|
|
if (!predPath) {
|
|
console.error('usage: node score.mjs <predictions.json>');
|
|
process.exit(1);
|
|
}
|
|
|
|
const preds = JSON.parse(readFileSync(predPath, 'utf8'));
|
|
const truthFiles = readdirSync(path.join(ROOT, 'results'))
|
|
.filter((f) => /^\d{2}-.+\.json$/.test(f));
|
|
|
|
const truth = {};
|
|
for (const f of truthFiles) {
|
|
const t = JSON.parse(readFileSync(path.join(ROOT, 'results', f), 'utf8'));
|
|
truth[t.fixture] = Object.fromEntries(
|
|
t.targets.map((tg) => [tg.id, tg]),
|
|
);
|
|
}
|
|
|
|
const rows = [];
|
|
for (const [fixture, predTargets] of Object.entries(preds.fixtures)) {
|
|
for (const p of predTargets) {
|
|
const t = truth[fixture]?.[p.id];
|
|
if (!t || t.missing) {
|
|
rows.push({ fixture, id: p.id, error: 'no ground truth' });
|
|
continue;
|
|
}
|
|
if (!p.pred || typeof p.pred.x !== 'number' || typeof p.pred.y !== 'number') {
|
|
rows.push({ fixture, id: p.id, label: t.label, error: 'no prediction' });
|
|
continue;
|
|
}
|
|
const dx = p.pred.x - t.center.x;
|
|
const dy = p.pred.y - t.center.y;
|
|
const dist = Math.round(Math.sqrt(dx * dx + dy * dy));
|
|
const inside =
|
|
p.pred.x >= t.bbox.x &&
|
|
p.pred.x <= t.bbox.x + t.bbox.w &&
|
|
p.pred.y >= t.bbox.y &&
|
|
p.pred.y <= t.bbox.y + t.bbox.h;
|
|
const pass = dist <= TOLERANCE_PX;
|
|
rows.push({
|
|
fixture,
|
|
id: p.id,
|
|
label: t.label,
|
|
pred: p.pred,
|
|
truth_center: t.center,
|
|
bbox: t.bbox,
|
|
dist,
|
|
inside,
|
|
pass,
|
|
});
|
|
}
|
|
}
|
|
|
|
const scored = rows.filter((r) => !r.error);
|
|
const passes = scored.filter((r) => r.pass).length;
|
|
const insides = scored.filter((r) => r.inside).length;
|
|
const dists = scored.map((r) => r.dist).sort((a, b) => a - b);
|
|
const mean = dists.reduce((a, b) => a + b, 0) / dists.length;
|
|
const median = dists[Math.floor(dists.length / 2)];
|
|
const max = dists[dists.length - 1];
|
|
|
|
const summary = {
|
|
model: preds.model,
|
|
tolerance_px: TOLERANCE_PX,
|
|
total: scored.length,
|
|
pass_count: passes,
|
|
pass_rate: +(passes / scored.length).toFixed(3),
|
|
inside_bbox_count: insides,
|
|
inside_bbox_rate: +(insides / scored.length).toFixed(3),
|
|
mean_dist_px: Math.round(mean),
|
|
median_dist_px: median,
|
|
max_dist_px: max,
|
|
};
|
|
|
|
console.log('Per-target results:');
|
|
console.log('─'.repeat(96));
|
|
for (const r of rows) {
|
|
if (r.error) {
|
|
console.log(` [${r.fixture}] ${r.id} ✗ ${r.error}`);
|
|
continue;
|
|
}
|
|
const tag = r.pass ? 'PASS' : 'FAIL';
|
|
const insideTag = r.inside ? ' in-bbox' : '';
|
|
console.log(
|
|
` [${r.fixture}] ${r.id.padEnd(18)} ${tag} dist=${String(r.dist).padStart(4)}px${insideTag} pred=(${r.pred.x},${r.pred.y}) truth=(${r.truth_center.x},${r.truth_center.y}) "${r.label}"`,
|
|
);
|
|
}
|
|
console.log('─'.repeat(96));
|
|
console.log('Summary:');
|
|
console.log(JSON.stringify(summary, null, 2));
|
|
|
|
const out = path.join(ROOT, 'results', `score-${path.basename(predPath, '.json')}.json`);
|
|
writeFileSync(out, JSON.stringify({ summary, rows }, null, 2));
|
|
console.log(`\nWrote ${out}`);
|