2026-06-05 13:22:07 +02:00
6 changed files with 0 additions and 805 deletions
--- a/src/browser-operator.test.ts
+++ b/src/browser-operator.test.ts
@ -1,115 +0,0 @@
-import { describe, expect, it, vi } from 'vitest';
-
-import { createClawdieBrowserOperator, type ClawdieBrowserOperator } from './browser-operator.js';
-import type { BackendCaller } from './browser-orchestrator.js';
-import type { BrowserCloneRow } from './browser-session-registry.js';
-
-const baseTime = new Date('2026-05-11T12:00:00.000Z');
-
-class OperatorPool {
-  row: BrowserCloneRow = {
-    session_id: '00000000-0000-4000-8000-000000000150',
-    tenant_id: 'tenant-a',
-    clone_name: 'browsertask150',
-    ip: '192.168.72.150',
-    status: 'open',
-    credential_mode: 'clean',
-    operator_grant_token_jti: null,
-    created_at: baseTime,
-    updated_at: baseTime,
-    opened_at: baseTime,
-    closed_at: null,
-    last_action_at: null,
-    expires_at: null,
-    error_code: null,
-    error_message: null,
-  };
-
-  async query(sql: string, params: unknown[] = []): Promise<{ rows: any[] }> {
-    const normalized = sql.replace(/\s+/g, ' ').trim();
-    if (normalized.startsWith('SELECT session_id::text')) {
-      return { rows: this.row.session_id === params[0] ? [this.row] : [] };
-    }
-    if (normalized.startsWith('UPDATE browser_clones')) {
-      const [, status, updatedAt, , closedAt, lastActionAt] = params;
-      this.row.status = status as any;
-      this.row.updated_at = updatedAt as Date;
-      if (closedAt) this.row.closed_at = closedAt as Date;
-      if (lastActionAt) this.row.last_action_at = lastActionAt as Date;
-    }
-    return { rows: [] };
-  }
-}
-
-describe('ClawdieBrowserOperator', () => {
-  it('exports the UI-TARS-compatible screenshot/execute shape', () => {
-    const operator: ClawdieBrowserOperator = createClawdieBrowserOperator(new OperatorPool() as any, 's1');
-    expect(typeof operator.screenshot).toBe('function');
-    expect(typeof operator.execute).toBe('function');
-    expect(typeof operator.finished).toBe('function');
-    expect(typeof operator.close).toBe('function');
-  });
-
-  it('adapts screenshot responses to imageBase64', async () => {
-    const backend: BackendCaller = vi.fn(async () => ({
-      ok: true,
-      status: 200,
-      data: { ok: true, image_base64: 'iVBORw0=', width: 1024, height: 768 },
-    }));
-    const operator = createClawdieBrowserOperator(
-      new OperatorPool() as any,
-      '00000000-0000-4000-8000-000000000150',
-      { backend, now: () => baseTime },
-    );
-    await expect(operator.screenshot()).resolves.toEqual({
-      imageBase64: 'iVBORw0=',
-      width: 1024,
-      height: 768,
-    });
-    expect(backend).toHaveBeenCalledWith('192.168.72.150', 'POST', '/screenshot', {}, 30000);
-  });
-
-  it('translates basic predictions to browser action helpers', async () => {
-    const backend: BackendCaller = vi.fn(async (_ip, _method, path, body) => ({
-      ok: true,
-      status: 200,
-      data: { ok: true, path, body },
-    }));
-    const operator = createClawdieBrowserOperator(
-      new OperatorPool() as any,
-      '00000000-0000-4000-8000-000000000150',
-      { backend, now: () => baseTime },
-    );
-    await expect(operator.execute({ action: 'navigate', url: 'https://example.com/' })).resolves.toMatchObject({
-      path: '/navigate',
-      body: { url: 'https://example.com/' },
-    });
-    await expect(operator.execute({ action: 'click', selector: '#go' })).resolves.toMatchObject({
-      path: '/click',
-      body: { selector: '#go' },
-    });
-    await expect(operator.execute({ action: 'type', text: 'hello' })).resolves.toMatchObject({
-      path: '/type',
-      body: { text: 'hello' },
-    });
-  });
-
-  it('tracks finish and closes the browser session', async () => {
-    const pool = new OperatorPool();
-    const hostd = vi.fn(async () => ({ id: 'x', ok: true, output: 'ok' }));
-    const operator = createClawdieBrowserOperator(
-      pool as any,
-      '00000000-0000-4000-8000-000000000150',
-      { hostd, now: () => baseTime },
-    );
-    await expect(operator.execute({ action: 'finish' })).resolves.toEqual({ ok: true, finished: true });
-    expect(await operator.finished?.()).toBe(true);
-    await operator.close?.();
-    expect(hostd).toHaveBeenCalledWith('browser-clone-reap', {
-      clone: 'browsertask150',
-      ip: '192.168.72.150',
-      suffix: 'bt150',
-    });
-    expect(pool.row.status).toBe('closed');
-  });
-});
--- a/src/browser-operator.ts
+++ b/src/browser-operator.ts
@ -1,161 +0,0 @@
-import type pg from 'pg';
-
-import {
-  clickBrowserSession,
-  closeBrowserSession,
-  navigateBrowserSession,
-  readDomBrowserSession,
-  screenshotBrowserSession,
-  scrollBrowserSession,
-  typeBrowserSession,
-  type BrowserOrchestratorDeps,
-} from './browser-orchestrator.js';
-
-export interface ClawdieBrowserScreenshot {
-  imageBase64: string;
-  width?: number;
-  height?: number;
-}
-
-export interface ClawdieBrowserOperator {
-  screenshot(): Promise<ClawdieBrowserScreenshot>;
-  execute(prediction: unknown): Promise<unknown>;
-  finished?(): Promise<boolean> | boolean;
-  close?(): Promise<void>;
-}
-
-type Prediction = {
-  action?: string;
-  type?: string;
-  url?: string;
-  x?: number;
-  y?: number;
-  selector?: string;
-  text?: string;
-  dx?: number;
-  dy?: number;
-  full_page?: boolean;
-  max_bytes?: number;
-};
-
-function predictionAction(prediction: Prediction): string {
-  return String(prediction.action ?? prediction.type ?? '').trim().toLowerCase();
-}
-
-function ensureObject(prediction: unknown): Prediction {
-  if (!prediction || typeof prediction !== 'object') {
-    throw new Error('browser operator prediction must be an object');
-  }
-  return prediction as Prediction;
-}
-
-function unwrap<T>(result: { ok: true; value: T } | { ok: false; code: string; error: string }): T {
-  if (!result.ok) throw Object.assign(new Error(result.error), { code: result.code });
-  return result.value;
-}
-
-export class BrowserSessionOperator implements ClawdieBrowserOperator {
-  private done = false;
-
-  constructor(
-    private readonly pool: pg.Pool,
-    private readonly sessionId: string,
-    private readonly deps: BrowserOrchestratorDeps = {},
-  ) {}
-
-  async screenshot(): Promise<ClawdieBrowserScreenshot> {
-    const result = unwrap(await screenshotBrowserSession(this.pool, this.sessionId, {}, this.deps)) as {
-      image_base64?: string;
-      width?: number;
-      height?: number;
-    };
-    return {
-      imageBase64: result.image_base64 ?? '',
-      width: result.width,
-      height: result.height,
-    };
-  }
-
-  async execute(rawPrediction: unknown): Promise<unknown> {
-    const prediction = ensureObject(rawPrediction);
-    const action = predictionAction(prediction);
-    switch (action) {
-      case 'navigate':
-      case 'open':
-        if (!prediction.url) throw new Error('navigate action requires url');
-        return unwrap(await navigateBrowserSession(this.pool, this.sessionId, { url: prediction.url }, this.deps));
-      case 'click':
-        return unwrap(
-          await clickBrowserSession(
-            this.pool,
-            this.sessionId,
-            prediction.selector
-              ? { selector: prediction.selector }
-              : { x: prediction.x, y: prediction.y },
-            this.deps,
-          ),
-        );
-      case 'type':
-      case 'input':
-        return unwrap(
-          await typeBrowserSession(
-            this.pool,
-            this.sessionId,
-            { text: prediction.text ?? '', selector: prediction.selector },
-            this.deps,
-          ),
-        );
-      case 'scroll':
-        return unwrap(
-          await scrollBrowserSession(
-            this.pool,
-            this.sessionId,
-            { dx: prediction.dx, dy: prediction.dy, selector: prediction.selector },
-            this.deps,
-          ),
-        );
-      case 'read_dom':
-      case 'read':
-        return unwrap(
-          await readDomBrowserSession(
-            this.pool,
-            this.sessionId,
-            { max_bytes: prediction.max_bytes },
-            this.deps,
-          ),
-        );
-      case 'screenshot':
-        return unwrap(
-          await screenshotBrowserSession(
-            this.pool,
-            this.sessionId,
-            { full_page: prediction.full_page },
-            this.deps,
-          ),
-        );
-      case 'finish':
-      case 'done':
-        this.done = true;
-        return { ok: true, finished: true };
-      default:
-        throw new Error(`unsupported browser operator action: ${action || 'unknown'}`);
-    }
-  }
-
-  finished(): boolean {
-    return this.done;
-  }
-
-  async close(): Promise<void> {
-    await closeBrowserSession(this.pool, this.sessionId, this.deps);
-    this.done = true;
-  }
-}
-
-export function createClawdieBrowserOperator(
-  pool: pg.Pool,
-  sessionId: string,
-  deps: BrowserOrchestratorDeps = {},
-): ClawdieBrowserOperator {
-  return new BrowserSessionOperator(pool, sessionId, deps);
-}
--- a/src/tts.test.ts
+++ b/src/tts.test.ts
@ -1,115 +0,0 @@
-import { describe, it, expect } from 'vitest';
-
-import { shouldApplyTts, stripTtsMarker, stripMarkdown } from './tts.js';
-
-describe('shouldApplyTts', () => {
-  it('returns false for off mode', () => {
-    expect(
-      shouldApplyTts({ mode: 'off', hadInboundAudio: false, text: 'hello' }),
-    ).toBe(false);
-  });
-
-  it('returns true for always mode', () => {
-    expect(
-      shouldApplyTts({ mode: 'always', hadInboundAudio: false, text: 'hello' }),
-    ).toBe(true);
-  });
-
-  it('returns true for inbound mode when inbound was audio', () => {
-    expect(
-      shouldApplyTts({ mode: 'inbound', hadInboundAudio: true, text: 'hello' }),
-    ).toBe(true);
-  });
-
-  it('returns false for inbound mode when inbound was not audio', () => {
-    expect(
-      shouldApplyTts({
-        mode: 'inbound',
-        hadInboundAudio: false,
-        text: 'hello',
-      }),
-    ).toBe(false);
-  });
-
-  it('returns true for tagged mode when [[tts]] is present', () => {
-    expect(
-      shouldApplyTts({
-        mode: 'tagged',
-        hadInboundAudio: false,
-        text: 'hello [[tts]] world',
-      }),
-    ).toBe(true);
-  });
-
-  it('returns false for tagged mode without marker', () => {
-    expect(
-      shouldApplyTts({
-        mode: 'tagged',
-        hadInboundAudio: false,
-        text: 'hello world',
-      }),
-    ).toBe(false);
-  });
-
-  it('[[tts]] is case-insensitive', () => {
-    expect(
-      shouldApplyTts({
-        mode: 'tagged',
-        hadInboundAudio: false,
-        text: '[[TTS]]',
-      }),
-    ).toBe(true);
-  });
-});
-
-describe('stripTtsMarker', () => {
-  it('removes [[tts]] from text', () => {
-    expect(stripTtsMarker('hello [[tts]] world')).toBe('hello  world');
-  });
-
-  it('returns text unchanged when no marker', () => {
-    expect(stripTtsMarker('hello world')).toBe('hello world');
-  });
-
-  it('removes multiple markers', () => {
-    expect(stripTtsMarker('[[tts]] a [[tts]] b')).toBe('a  b');
-  });
-
-  it('is case-insensitive', () => {
-    expect(stripTtsMarker('[[TTS]]hello')).toBe('hello');
-  });
-});
-
-describe('stripMarkdown', () => {
-  it('removes bold markers', () => {
-    expect(stripMarkdown('**hello**')).toBe('hello');
-  });
-
-  it('removes italic markers', () => {
-    expect(stripMarkdown('*hello*')).toBe('hello');
-  });
-
-  it('removes code fences', () => {
-    expect(stripMarkdown('```js\nconsole.log("hi")\n```')).toBe('');
-  });
-
-  it('removes inline code', () => {
-    expect(stripMarkdown('use `foo` here')).toBe('use foo here');
-  });
-
-  it('removes links keeping text', () => {
-    expect(stripMarkdown('[click](https://example.com)')).toBe('click');
-  });
-
-  it('removes heading markers', () => {
-    expect(stripMarkdown('## Title')).toBe('Title');
-  });
-
-  it('removes list markers', () => {
-    expect(stripMarkdown('- item')).toBe('item');
-  });
-
-  it('removes strikethrough', () => {
-    expect(stripMarkdown('~~deleted~~')).toBe('deleted');
-  });
-});
--- a/src/tts.ts
+++ b/src/tts.ts
@ -1,171 +0,0 @@
-import fs from 'fs';
-import path from 'path';
-import { spawn } from 'child_process';
-
-import { logger } from './logger.js';
-import {
-  TTS_PROVIDER,
-  TTS_MAX_TEXT_LENGTH,
-  TTS_OUTPUT_FORMAT,
-  TTS_VOICE,
-  TMP_DIR,
-} from './config.js';
-
-export type TtsAutoMode = 'always' | 'inbound' | 'tagged' | 'off';
-
-interface TtsResult {
-  audioPath: string;
-}
-
-export function stripMarkdown(text: string): string {
-  let out = text;
-  out = out.replace(/\[Vision OCR\][\s\S]*?\[\/Vision OCR\]/g, '');
-  out = out.replace(/\[System note\][\s\S]*?\[\/System note\]/g, '');
-  out = out.replace(/```[\s\S]*?```/g, '');
-  out = out.replace(/`([^`]+)`/g, '$1');
-  out = out.replace(/\*\*([^*]+)\*\*/g, '$1');
-  out = out.replace(/\*([^*]+)\*/g, '$1');
-  out = out.replace(/__([^_]+)__/g, '$1');
-  out = out.replace(/_([^_]+)_/g, '$1');
-  out = out.replace(/~~([^~]+)~~/g, '$1');
-  out = out.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
-  out = out.replace(/^#{1,6}\s+/gm, '');
-  out = out.replace(/^[-*+]\s+/gm, '');
-  out = out.replace(/^\d+\.\s+/gm, '');
-  out = out.replace(/^>\s+/gm, '');
-  out = out.replace(/---+/g, '');
-  out = out.replace(/\n{3,}/g, '\n\n');
-  return out.trim();
-}
-
-export function shouldApplyTts(opts: {
-  mode: TtsAutoMode;
-  hadInboundAudio: boolean;
-  text: string;
-}): boolean {
-  if (opts.mode === 'off') return false;
-  if (opts.mode === 'always') return true;
-  if (opts.mode === 'inbound') return opts.hadInboundAudio;
-  if (opts.mode === 'tagged') return /\[\[tts\]\]/i.test(opts.text);
-  return false;
-}
-
-export function stripTtsMarker(text: string): string {
-  return text.replace(/\[\[tts\]\]/gi, '').trim();
-}
-
-export async function synthesize(
-  text: string,
-  opts?: {
-    voice?: string;
-    outputFormat?: string;
-  },
-): Promise<TtsResult> {
-  const voice = opts?.voice || TTS_VOICE;
-  const outputFormat = opts?.outputFormat || TTS_OUTPUT_FORMAT;
-
-  const cleanText = stripMarkdown(text);
-  if (!cleanText) {
-    throw new Error('TTS: no text to synthesize after stripping markdown');
-  }
-
-  const truncated =
-    cleanText.length > TTS_MAX_TEXT_LENGTH
-      ? cleanText.slice(0, TTS_MAX_TEXT_LENGTH)
-      : cleanText;
-
-  const ttsDir = path.join(TMP_DIR, 'tts');
-  fs.mkdirSync(ttsDir, { recursive: true });
-
-  const ext = inferExtension(outputFormat);
-  const fileName = `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}${ext}`;
-  const audioPath = path.join(ttsDir, fileName);
-
-  try {
-    if (TTS_PROVIDER === 'azure') {
-      throw new Error('TTS: azure provider not implemented yet');
-    } else {
-      await runEdgeTtsCli({
-        text: truncated,
-        voice,
-        audioPath,
-      });
-    }
-  } catch (err) {
-    try {
-      fs.unlinkSync(audioPath);
-    } catch {}
-    throw err;
-  }
-
-  if (!fs.existsSync(audioPath)) {
-    throw new Error('TTS: edge-tts completed but no audio file produced');
-  }
-  const stat = fs.statSync(audioPath);
-  if (!stat.isFile()) {
-    throw new Error('TTS: synthesized audio path is not a file');
-  }
-  if (stat.size < 1024) {
-    throw new Error(`TTS: synthesized audio file too small (${stat.size} bytes)`);
-  }
-
-  logger.info(
-    {
-      voice,
-      format: outputFormat,
-      chars: truncated.length,
-      bytes: stat.size,
-      path: audioPath,
-    },
-    'TTS synthesized',
-  );
-
-  return { audioPath };
-}
-
-function runEdgeTtsCli(opts: {
-  text: string;
-  voice: string;
-  audioPath: string;
-}): Promise<void> {
-  return new Promise((resolve, reject) => {
-    const args = [
-      '--text',
-      opts.text,
-      '--voice',
-      opts.voice,
-      '--write-media',
-      opts.audioPath,
-    ];
-    const binPath = path.join(
-      path.dirname(path.dirname(new URL(import.meta.url).pathname)),
-      'bin',
-      'edge-tts',
-    );
-    const proc = spawn(binPath, args, { stdio: 'ignore' });
-    proc.on('error', (err: any) => {
-      if (err && err.code === 'ENOENT') {
-        reject(
-          new Error(
-            'TTS: edge-tts not found. Install with: pip install edge-tts',
-          ),
-        );
-        return;
-      }
-      reject(err);
-    });
-    proc.on('exit', (code) => {
-      if (code === 0) resolve();
-      else reject(new Error(`TTS: edge-tts exited with code ${code}`));
-    });
-  });
-}
-
-function inferExtension(outputFormat: string): string {
-  const n = outputFormat.toLowerCase();
-  if (n.includes('ogg') || n.includes('opus')) return '.ogg';
-  if (n.includes('webm')) return '.webm';
-  if (n.includes('wav') || n.includes('riff') || n.includes('pcm'))
-    return '.wav';
-  return '.mp3';
-}
--- a/src/vision.test.ts
+++ b/src/vision.test.ts
@ -1,60 +0,0 @@
-import fs from 'fs';
-import path from 'path';
-
-import { afterEach, describe, expect, it, vi } from 'vitest';
-
-import { TMP_TESTS_DIR } from './config.js';
-
-function writeTinyJpeg(filePath: string): void {
-  // Minimal JPEG header + EOI (enough for base64 packaging; model may ignore).
-  fs.writeFileSync(filePath, Buffer.from([0xff, 0xd8, 0xff, 0xd9]));
-}
-
-describe('augmentPromptWithVision', () => {
-  const originalEnv = { ...process.env };
-
-  afterEach(() => {
-    process.env = { ...originalEnv };
-    vi.restoreAllMocks();
-    vi.resetModules();
-  });
-
-  it('injects [Vision OCR] block for saved Telegram photos under TMP_DIR', async () => {
-    const dir = fs.mkdtempSync(path.join(TMP_TESTS_DIR, 'vision-test-'));
-    const imagePath = path.join(dir, 'photo.jpg');
-    writeTinyJpeg(imagePath);
-
-    process.env.VISION_PROVIDER = 'openrouter';
-    process.env.VISION_MODEL = 'nvidia/nemotron-nano-12b-v2-vl:free';
-    process.env.OPENROUTER_API_KEY = 'test-key';
-
-    const fetchMock = vi.fn(async () => {
-      return {
-        ok: true,
-        json: async () => ({
-          choices: [
-            {
-              message: { content: 'OCR:\nHELLO\n\nSUMMARY:\n- hi' },
-            },
-          ],
-        }),
-      } as any;
-    });
-    // @ts-expect-error vitest runtime override
-    globalThis.fetch = fetchMock;
-
-    // config.ts evaluates VISION_PROVIDER at module load; the static
-    // import of TMP_TESTS_DIR above already cached an empty value.
-    // Reset so the dynamic import below re-reads env we just set.
-    vi.resetModules();
-    const { augmentPromptWithVision } = await import('./vision.js');
-    const input = `User: test\n[Photo saved: ${imagePath}]`;
-    const out = await augmentPromptWithVision(input);
-
-    expect(fetchMock).toHaveBeenCalled();
-    expect(out).toContain('[Vision OCR]');
-    expect(out).toContain('OCR:\nHELLO');
-    expect(out).toContain('Do not claim you "cannot see"');
-  });
-});
-
--- a/src/vision.ts
+++ b/src/vision.ts
@ -1,183 +0,0 @@
-import fs from 'fs';
-import path from 'path';
-
-import { readEnvFile } from './env.js';
-import {
-  RUNTIME_ID,
-  TMP_DIR,
-  VISION_MAX_CHARS_PER_IMAGE,
-  VISION_MAX_IMAGES,
-  VISION_MAX_TOTAL_CHARS,
-  VISION_MODEL,
-  VISION_PROVIDER,
-} from './config.js';
-import { logger } from './logger.js';
-
-type OpenRouterResponse = {
-  choices?: Array<{
-    message?: { content?: string };
-  }>;
-};
-
-let cachedOpenRouterKey: string | null | undefined;
-function getOpenRouterKey(): string | null {
-  if (cachedOpenRouterKey !== undefined) return cachedOpenRouterKey;
-  const fromEnv = process.env.OPENROUTER_API_KEY;
-  if (fromEnv) {
-    cachedOpenRouterKey = fromEnv;
-    return cachedOpenRouterKey;
-  }
-  const parsed = readEnvFile(['OPENROUTER_API_KEY']);
-  cachedOpenRouterKey = parsed.OPENROUTER_API_KEY || null;
-  return cachedOpenRouterKey;
-}
-
-function clampText(text: string, maxChars: number): string {
-  if (text.length <= maxChars) return text;
-  return `${text.slice(0, maxChars)}\n…(truncated)…`;
-}
-
-function isSafeTmpPath(filePath: string): boolean {
-  const resolved = path.resolve(filePath);
-  const safeRoot = path.resolve(TMP_DIR) + path.sep;
-  return resolved.startsWith(safeRoot);
-}
-
-async function describeImageOpenRouter(imagePath: string): Promise<string> {
-  const key = getOpenRouterKey();
-  if (!key) throw new Error('OPENROUTER_API_KEY not configured');
-
-  const resolved = path.resolve(imagePath);
-  const bytes = fs.readFileSync(resolved);
-  const base64 = bytes.toString('base64');
-  const ext = path.extname(resolved).toLowerCase().replace('.', '') || 'png';
-  const mime = ext === 'jpg' ? 'jpeg' : ext;
-  const dataUrl = `data:image/${mime};base64,${base64}`;
-
-  const prompt =
-    'You are an OCR + screenshot/meme helper.\n' +
-    '\n' +
-    'Task:\n' +
-    '1) Extract ALL readable text verbatim (including meme overlay text).\n' +
-    '2) If some text is unclear, write [unclear] for that span.\n' +
-    '3) After OCR, add a short bulleted summary (max 5 bullets) of what the image shows.\n' +
-    '\n' +
-    'Output format:\n' +
-    'OCR:\n' +
-    '<verbatim text>\n' +
-    '\n' +
-    'SUMMARY:\n' +
-    '- ...';
-
-  const body = {
-    model: VISION_MODEL,
-    messages: [
-      {
-        role: 'user',
-        content: [
-          { type: 'text', text: prompt },
-          { type: 'image_url', image_url: { url: dataUrl } },
-        ],
-      },
-    ],
-    max_tokens: 1200,
-    temperature: 0,
-  };
-
-  const resp = await fetch('https://openrouter.ai/api/v1/chat/completions', {
-    method: 'POST',
-    headers: {
-      Authorization: `Bearer ${key}`,
-      'Content-Type': 'application/json',
-      'HTTP-Referer': 'https://codeberg.org/Clawdie/Clawdie-AI',
-      'X-Title': `${RUNTIME_ID}-ai`,
-    },
-    body: JSON.stringify(body),
-  });
-
-  if (!resp.ok) {
-    const txt = await resp.text().catch(() => '');
-    throw new Error(`OpenRouter vision failed: ${resp.status} ${txt}`);
-  }
-
-  const data = (await resp.json()) as OpenRouterResponse;
-  const content = data.choices?.[0]?.message?.content?.trim() || '';
-  if (!content) throw new Error('OpenRouter vision returned empty content');
-  return content;
-}
-
-async function describeImage(imagePath: string): Promise<string> {
-  if (VISION_PROVIDER !== 'openrouter') {
-    throw new Error(`Unsupported vision provider: ${VISION_PROVIDER || '(empty)'}`);
-  }
-  return describeImageOpenRouter(imagePath);
-}
-
-/**
- * Replace `[Photo saved: /path]` placeholders with an OCR/description block.
- * Only runs when `VISION_PROVIDER` is set and `VISION_MAX_IMAGES > 0`.
- */
-export async function augmentPromptWithVision(text: string): Promise<string> {
-  if (!VISION_PROVIDER) return text;
-  if (VISION_MAX_IMAGES <= 0) return text;
-
-  const re = /\[Photo saved:\s*([^\]\n]+)\]/g;
-  const matches = Array.from(text.matchAll(re))
-    .map((m) => m[1]?.trim())
-    .filter(Boolean) as string[];
-  if (matches.length === 0) return text;
-
-  const unique: string[] = [];
-  for (const m of matches) {
-    if (!unique.includes(m)) unique.push(m);
-    if (unique.length >= VISION_MAX_IMAGES) break;
-  }
-
-  let totalAdded = 0;
-  let out = text;
-  let injectedAny = false;
-  for (const imagePath of unique) {
-    if (!isSafeTmpPath(imagePath)) {
-      logger.warn({ imagePath }, 'Vision skipped (path outside TMP_DIR)');
-      continue;
-    }
-    if (!fs.existsSync(imagePath)) {
-      logger.warn({ imagePath }, 'Vision skipped (file missing)');
-      continue;
-    }
-
-    try {
-      logger.info({ imagePath, model: VISION_MODEL }, 'Vision OCR started');
-      const ocr = await describeImage(imagePath);
-      logger.info(
-        { imagePath, snippet: clampText(ocr, 240) },
-        'Vision OCR raw output (snippet)',
-      );
-      const clipped = clampText(ocr, VISION_MAX_CHARS_PER_IMAGE);
-      const block = `\n\n[Vision OCR]\n${clipped}\n[/Vision OCR]\n`;
-      if (totalAdded + block.length > VISION_MAX_TOTAL_CHARS) {
-        logger.warn(
-          { imagePath, totalAdded, maxTotal: VISION_MAX_TOTAL_CHARS },
-          'Vision OCR skipped (budget exceeded)',
-        );
-        continue;
-      }
-      totalAdded += block.length;
-      out = out.replace(`[Photo saved: ${imagePath}]`, `[Photo saved: ${imagePath}]${block}`);
-      injectedAny = true;
-      logger.info({ imagePath }, 'Vision OCR injected into prompt');
-    } catch (err) {
-      logger.warn({ imagePath, err }, 'Vision OCR failed');
-    }
-  }
-
-  if (!injectedAny) return out;
-  return (
-    '[System note]\n' +
-    'One or more images were OCR’d by a vision helper model. Use the content inside\n' +
-    '`[Vision OCR]...[/Vision OCR]` as authoritative input. Do not claim you "cannot see"\n' +
-    'the image; answer using the OCR/summary provided.\n' +
-    '[/System note]\n\n' +
-    out
-  );
-}