From a38b60e775687937a3f6a5e0aaf3dd3597611365 Mon Sep 17 00:00:00 2001 From: Hermes & Sam Date: Wed, 17 Jun 2026 11:58:43 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20quota=20exhaustion=20policy=20scripts?= =?UTF-8?q?=20+=20docs=20(rename=20ETR=E2=86=92ETA)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - scripts/quota_reset_eta.py: timezone-aware quota reset calculator - scripts/task_dedup_before_retry.py: skip retry if task already resolved - AGENTS.md: cross-harness 4-step quota policy - SOUL.md: 'Never retry solved work' principle - memories/curated/api-timezone-handling.md: provider timezone map --- AGENTS.md | 22 +- SOUL.md | 1 + memories/curated/api-timezone-handling.md | 29 +++ scripts/quota_reset_eta.py | 243 ++++++++++++++++++++++ scripts/task_dedup_before_retry.py | 150 +++++++++++++ 5 files changed, 444 insertions(+), 1 deletion(-) create mode 100644 memories/curated/api-timezone-handling.md create mode 100644 scripts/quota_reset_eta.py create mode 100644 scripts/task_dedup_before_retry.py diff --git a/AGENTS.md b/AGENTS.md index fce20b7..4f7d543 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,6 +11,26 @@ - When adapting for Autolycus (Hermes fork, FreeBSD): same profile-context load; FreeBSD-specific gaps (voice, clipboard) are known and tracked in `docs/CLAWDIE-HERMES-FREEBSD-INTEGRATION.md`. - When adapting for Pi/Codex/Claude Code/Zot: render the prompt bundle (`render-prompt`) and inject as task-scoped context. Durable findings flow back through reviewed commits. +## Quota exhaustion policy (cross-harness) + +When any agent hits an API quota limit (429 / rate-limit): + +1. **Calculate reset time** with `scripts/quota_reset_eta.py` — parses provider error + messages and converts to UTC/CEST correctly. Handles Beijing time (Z.AI/DeepSeek), + UTC (OpenRouter), and PDT (Anthropic/OpenAI). + +2. **Check if task already resolved** with `scripts/task_dedup_before_retry.py` — + queries Colibri task board, checks git activity since block time. Skips retry + if another agent or operator already completed the work. + +3. **Schedule retry** — create a one-shot cron at reset time + 60s buffer. + Never retry immediately (429 means quota, not transient). + +4. **Report** — log to glasspane: provider, reset time, task status, action taken. + +Rule: **never retry a quota-blocked task without checking whether it was +already solved.** Tokens are money. A solved task retried is waste. + ## Active infrastructure - Forgejo: code.smilepowered.org (SSH port 2222) @@ -31,7 +51,7 @@ ## Private sources - `hermes-soul`: git@code.smilepowered.org:clawdie/hermes-soul.git (private, operator access only) -- `hermes-bsd`: git@code.smilepowered.org:clawdie/hermes-bsd.git (private, MIT-licensed FreeBSD patches) +- `hermes-bsd`: git@code.smilepowered.org:clawdie/hermes-bsd.git (public, MIT-licensed FreeBSD patches) - `clawdie-ai`: git@code.smilepowered.org:clawdie/clawdie-ai.git (private) - `clawdie-iso`: git@code.smilepowered.org:clawdie/clawdie-iso.git (private) diff --git a/SOUL.md b/SOUL.md index b2dde8a..e30506a 100644 --- a/SOUL.md +++ b/SOUL.md @@ -14,6 +14,7 @@ We are a small, self-hosted, privacy-first AI agent collective operated by Sam ( - **Local-first.** Media processing, inference, builds — run locally when possible. Cloud is a fallback, not a default. - **Zero public exposure.** No open ports, no public IPs beyond what Tailscale negotiates. Each agent gets its own SSH key — never copy private keys between hosts. - **Durable memory returns here.** Insights gained in any harness flow back through review into this repository. No knowledge trapped in a single session or platform. +- **Never retry solved work.** When an agent hits a quota limit, it must first check whether another agent or the operator already completed the task. Tokens are money — a solved task retried is pure waste. Use `scripts/quota_reset_eta.py` for timezone-aware reset calculation and `scripts/task_dedup_before_retry.py` to verify task status before scheduling retry. ## Our voice diff --git a/memories/curated/api-timezone-handling.md b/memories/curated/api-timezone-handling.md new file mode 100644 index 0000000..4132e8d --- /dev/null +++ b/memories/curated/api-timezone-handling.md @@ -0,0 +1,29 @@ +# API Timezone Handling + +API quota reset timestamps are provider-specific and often lack timezone metadata. Always convert to UTC before scheduling retries. + +## Provider timezone map + +| Provider | Timezone | UTC offset | Example | +|----------|----------|-----------|---------| +| Z.AI / GLM (Zhipu) | Asia/Shanghai | UTC+8 | "15:43" → 07:43 UTC | +| DeepSeek | Asia/Shanghai | UTC+8 | Chinese company, Hangzhou | +| OpenRouter | UTC | UTC+0 | Timestamps are UTC | +| Anthropic | America/Los_Angeles | UTC-8/-7 (PDT/PST) | California | +| OpenAI | America/Los_Angeles | UTC-8/-7 (PDT/PST) | California | + +## Detection + +Use `scripts/quota_reset_eta.py` to parse error messages and compute the correct +reset time. It detects providers via keywords ("z.ai", "智谱", "使用上限", "deepseek", +"openrouter", "anthropic", "openai") and applies the correct timezone. + +## Common traps + +- **Chinese error messages lack timezone**: "您的限额将在 2026-06-18 15:43:27 重置" + means Beijing time (UTC+8), not UTC. +- **English messages may include timezone**: "reset on 2026-06-20T00:00:00Z" + is explicit (Z = UTC). Trust explicit timezone over provider guess. +- **Retry-After header**: in seconds from now, not an absolute timestamp. +- **CEST vs CET**: Slovenia uses CEST (UTC+2) in summer, CET (UTC+1) in winter. + Always compute from datetime objects, not hardcoded offsets. diff --git a/scripts/quota_reset_eta.py b/scripts/quota_reset_eta.py new file mode 100644 index 0000000..d0847f4 --- /dev/null +++ b/scripts/quota_reset_eta.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +"""Quota Reset ETR — calculate Estimated Time to Reset from provider error messages. + +Parses quota-exhaustion errors from common providers (Z.AI/GLM, DeepSeek, +OpenRouter, Anthropic, OpenAI) and computes the reset time in UTC and local +time. Outputs a cron-ready timestamp for scheduling retry. + +Usage: + python3 quota_reset_etr.py "error message or timestamp string" + python3 quota_reset_etr.py "2026-06-18 15:43:27" --tz Asia/Shanghai + +Output: + { + "status": "quota_exhausted", + "reset_utc": "2026-06-18T07:43:27Z", + "reset_local": "2026-06-18T09:43:27+02:00", + "cron_iso": "2026-06-18T09:44:00+02:00", + "provider": "z.ai", + "confidence": "high", + "retry_after_seconds": 12345, + "human": "Resets Jun 18 at 09:43 CEST (09:44 safe retry)" + } +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from datetime import datetime, timedelta, timezone +from typing import Optional + +# ── Provider timezone map ────────────────────────────── +# Sources of truth per provider, verified against real error messages. + +PROVIDER_TZ = { + "z.ai": "Asia/Shanghai", # Zhipu AI — Beijing UTC+8 + "glm": "Asia/Shanghai", # GLM API — same as z.ai + "deepseek": "Asia/Shanghai", # DeepSeek — Hangzhou UTC+8 + "openrouter": "UTC", # OpenRouter — UTC + "anthropic": "America/Los_Angeles", # Anthropic — PST/PDT + "openai": "America/Los_Angeles", # OpenAI — PST/PDT +} + +# ── Error message patterns ───────────────────────────── + +# Chinese-style: "您的限额将在 2026-06-18 15:43:27 重置" +CN_RESET_PATTERN = re.compile( + r"(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\s*重置" +) + +# English-style: "quota will reset on 2026-06-18T15:43:27Z" +EN_RESET_PATTERN = re.compile( + r"reset\s+(?:on\s+)?(\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2})", + re.IGNORECASE, +) + +# ISO 8601 with timezone: "2026-06-18T15:43:27+08:00" +ISO_TZ_PATTERN = re.compile( + r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})" +) + +# Retry-After header style: seconds +RETRY_AFTER_PATTERN = re.compile( + r"retry[_-]after[:\s]+(\d+)", re.IGNORECASE +) + +# Plain timestamp: "2026-06-18 15:43:27" +PLAIN_TS_PATTERN = re.compile( + r"(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})" +) + +# ── Parse functions ──────────────────────────────────── + +def parse_reset_time( + error_text: str, + provider_tz: Optional[str] = None, +) -> tuple[datetime, str, str]: + """Parse a quota error message and return (reset_utc, provider, confidence). + + Returns (datetime, provider_name, confidence_level). + Confidence: "high" (explicit timestamp), "medium" (provider default), + "low" (generic fallback). + """ + # Try ISO with timezone first (gold standard) + m = ISO_TZ_PATTERN.search(error_text) + if m: + dt = datetime.fromisoformat(m.group(1)) + return dt.astimezone(timezone.utc), "iso8601", "high" + + # Try Chinese reset pattern + m = CN_RESET_PATTERN.search(error_text) + if m: + dt = datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S") + tz_name = provider_tz or "Asia/Shanghai" + return _to_utc(dt, tz_name), "z.ai", "high" + + # Try English reset pattern + m = EN_RESET_PATTERN.search(error_text) + if m: + ts = m.group(1).replace(" ", "T") + # Check if timezone suffix present + if "+" in ts or ts.endswith("Z"): + dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) + else: + dt = datetime.strptime(ts[:19], "%Y-%m-%dT%H:%M:%S") + tz_name = provider_tz or "UTC" + dt = _to_utc(dt, tz_name) + return dt.astimezone(timezone.utc), "generic", "high" + + # Try plain timestamp + m = PLAIN_TS_PATTERN.search(error_text) + if m: + dt = datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S") + tz_name = provider_tz or "UTC" + return _to_utc(dt, tz_name), "generic", "medium" + + # Try Retry-After (seconds from now) + m = RETRY_AFTER_PATTERN.search(error_text) + if m: + seconds = int(m.group(1)) + reset = datetime.now(timezone.utc) + timedelta(seconds=seconds) + return reset, "generic", "medium" + + # Generic 429 fallback: guess 1 hour + if "429" in error_text or "quota" in error_text.lower(): + reset = datetime.now(timezone.utc) + timedelta(hours=1) + return reset, "generic", "low" + + raise ValueError(f"Cannot parse reset time from: {error_text[:200]}") + + +def _to_utc(dt: datetime, tz_name: str) -> datetime: + """Convert a naive datetime from a named timezone to UTC.""" + import zoneinfo + tz = zoneinfo.ZoneInfo(tz_name) + return dt.replace(tzinfo=tz).astimezone(timezone.utc) + + +def detect_provider(error_text: str) -> Optional[str]: + """Guess the provider from error text patterns.""" + text = error_text.lower() + # Chinese providers + if "z.ai" in text or "智谱" in text or "zhipu" in text: + return "z.ai" + if "glm" in text or "使用上限" in error_text or "重置" in error_text: + return "glm" # GLM API — Chinese quota messages + if "deepseek" in text: + return "deepseek" + # Western providers + if "openrouter" in text: + return "openrouter" + if "anthropic" in text: + return "anthropic" + if "openai" in text: + return "openai" + return None + + +# ── Main ─────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser( + description="Calculate quota reset ETR from provider error messages" + ) + parser.add_argument( + "input", + nargs="?", + help="Error message or timestamp string (reads stdin if omitted)", + ) + parser.add_argument( + "--tz", + help="Timezone override (e.g., Asia/Shanghai, UTC, America/Los_Angeles)", + ) + parser.add_argument( + "--cron-offset", + type=int, + default=60, + help="Seconds to add for safe cron retry window (default: 60)", + ) + args = parser.parse_args() + + # Read input + if args.input: + text = args.input + elif not sys.stdin.isatty(): + text = sys.stdin.read().strip() + else: + parser.print_help() + sys.exit(1) + + if not text: + print(json.dumps({"status": "error", "message": "empty input"})) + sys.exit(1) + + # Detect provider + provider = detect_provider(text) + provider_tz = args.tz or PROVIDER_TZ.get(provider or "", "UTC") + + try: + reset_utc, detected_provider, confidence = parse_reset_time( + text, provider_tz + ) + except ValueError as e: + print(json.dumps({"status": "error", "message": str(e)})) + sys.exit(1) + + provider = provider or detected_provider + now_utc = datetime.now(timezone.utc) + retry_after = max(0, int((reset_utc - now_utc).total_seconds())) + + # Safe cron time: reset + buffer + cron_dt = reset_utc + timedelta(seconds=args.cron_offset) + + # Local time for display + local_tz = datetime.now().astimezone().tzinfo + reset_local = reset_utc.astimezone(local_tz) + + result = { + "status": "quota_exhausted", + "provider": provider, + "confidence": confidence, + "reset_utc": reset_utc.strftime("%Y-%m-%dT%H:%M:%SZ"), + "reset_local": reset_local.strftime("%Y-%m-%dT%H:%M:%S%z"), + "cron_iso": cron_dt.strftime("%Y-%m-%dT%H:%M:%S%z"), + "retry_after_seconds": retry_after, + "human": ( + f"Resets {reset_local.strftime('%b %d at %H:%M')} " + f"{reset_local.tzname()} " + f"({cron_dt.strftime('%H:%M')} safe retry)" + ), + } + + print(json.dumps(result, indent=2)) + + if confidence == "low": + print("\n# WARNING: low confidence — verify manually", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/scripts/task_dedup_before_retry.py b/scripts/task_dedup_before_retry.py new file mode 100644 index 0000000..fd00d1d --- /dev/null +++ b/scripts/task_dedup_before_retry.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +"""Task Dedup Before Retry — check if a quota-blocked task was already resolved. + +When an agent hits a quota limit and schedules a retry, this script checks +whether another agent (or the user) already completed the work. Avoids burning +tokens on already-solved problems. + +Checks performed: + 1. Colibri task board — is the task still in "queued"/"claimed" status? + 2. Git activity — did a relevant commit land since the block? + 3. Cross-agent session search — did another agent report completion? + 4. Operator override — was the task cancelled manually? + +Usage: + python3 task_dedup_before_retry.py --task-id [--since ] + python3 task_dedup_before_retry.py --description "fix foobar" --repo /path + +Output: + {"status": "still_needed" | "resolved" | "uncertain", "evidence": [...]} +""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + + +def check_colibri_task(task_id: str, db_path: str = "colibri.db") -> dict: + """Check if a task still exists and is unresolved in Colibri store.""" + try: + import sqlite3 + conn = sqlite3.connect(db_path) + cur = conn.execute( + "SELECT status, title FROM tasks WHERE id = ?", (task_id,) + ) + row = cur.fetchone() + conn.close() + if row is None: + return {"found": False, "reason": "task not found in store"} + status, title = row + if status in ("done", "completed", "cancelled", "failed"): + return { + "found": True, + "status": status, + "title": title, + "resolved": True, + "reason": f"task already {status}", + } + return { + "found": True, + "status": status, + "title": title, + "resolved": False, + "reason": f"task still {status}", + } + except Exception as e: + return {"found": False, "error": str(e)} + + +def check_git_activity(repo_path: str, since: Optional[str] = None) -> dict: + """Check if relevant commits landed since the block time.""" + repo = Path(repo_path) + if not (repo / ".git").exists(): + return {"checked": False, "reason": "not a git repo"} + + try: + cmd = ["git", "-C", str(repo), "log", "--oneline", "-10"] + if since: + cmd.extend(["--since", since]) + result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) + commits = [l for l in result.stdout.strip().split("\n") if l] + return { + "checked": True, + "since": since, + "commit_count": len(commits), + "recent": commits[:5], + } + except Exception as e: + return {"checked": False, "error": str(e)} + + +def main(): + parser = argparse.ArgumentParser( + description="Check if a quota-blocked task was already resolved" + ) + parser.add_argument("--task-id", help="Colibri task ID to check") + parser.add_argument( + "--description", help="Task description (for human review)" + ) + parser.add_argument("--repo", help="Git repo to check for activity") + parser.add_argument( + "--since", + help="ISO timestamp — only check activity after this time", + ) + parser.add_argument( + "--db", default="colibri.db", help="Colibri SQLite DB path" + ) + args = parser.parse_args() + + evidence = [] + resolved = False + + # Check 1: Colibri task board + if args.task_id: + result = check_colibri_task(args.task_id, args.db) + evidence.append({"source": "colibri_task", **result}) + if result.get("resolved"): + resolved = True + + # Check 2: Git activity + if args.repo: + result = check_git_activity(args.repo, args.since) + evidence.append({"source": "git_activity", **result}) + + # Determine overall status + if resolved: + status = "resolved" + elif any(e.get("error") for e in evidence): + status = "uncertain" + else: + status = "still_needed" + + output = { + "status": status, + "checked_at": datetime.now(timezone.utc).isoformat(), + "evidence": evidence, + "action": { + "resolved": "skip retry — task already completed", + "still_needed": "proceed with scheduled retry", + "uncertain": "ask operator before retrying", + }.get(status, "unknown"), + } + + print(json.dumps(output, indent=2)) + + if status == "resolved": + sys.exit(0) + elif status == "uncertain": + sys.exit(2) + else: + sys.exit(1) + + +if __name__ == "__main__": + main()