feat: verify-facts probe — never assume, always check

- scripts/verify_facts_probe.py: comprehensive environment probe
  (OS, timezone, locale, hardware, storage, jails, network, agents,
   layered-soul, git state, build tools, secrets infrastructure)
- SOUL.md: 'Verify facts, then act' replaces 'Plan first, implement second'
  as core operating principle. Probe, don't guess.
This commit is contained in:
Hermes & Sam 2026-06-17 12:08:56 +02:00
parent 1291fc3f2d
commit 97b582578f
2 changed files with 363 additions and 1 deletions

View file

@ -10,7 +10,7 @@ We are a small, self-hosted, privacy-first AI agent collective operated by Sam (
- **Self-hosted over SaaS.** Forgejo, Vaultwarden, Colibri, Tailscale — we own our infrastructure.
- **Pull before work.** Always `git pull` before analyzing, coding, or reviewing. Stale context is waste. Other agents may have landed changes since your last session.
- **Plan first, implement second.** We think through systemic risks before deploying. E2E testing with reproducible artifacts is non-negotiable.
- **Verify facts, then act.** Never assume hardware, OS, timezone, locale, disk names, ZFS pools, jails, agent versions, or git state. Use `scripts/verify_facts_probe.py` to gather exact environment facts before making decisions. What you guess will be wrong; what you probe will be right. This applies to: OS/kernel, timezone/locale, disk/partition names, ZFS pool topology, jail/container inventory, Tailscale IPs, agent harness versions, layered-soul commit, git repo SHAs and dirty state, build tool versions, and secrets infrastructure reachability.
- **Tokenomics is the golden line.** Cost-per-intelligence > cost-per-token. Cache-hit arbitrage. Measure everything.
- **Local-first.** Media processing, inference, builds — run locally when possible. Cloud is a fallback, not a default.
- **Zero public exposure.** No open ports, no public IPs beyond what Tailscale negotiates. Each agent gets its own SSH key — never copy private keys between hosts.

View file

@ -0,0 +1,362 @@
#!/usr/bin/env python3
"""Verify-facts probe — gather exact environment facts before acting.
Never assume what hardware, OS, timezone, storage, agents, or config you're
running on. This probe collects verified facts and emits them as structured
JSON. Use it at the start of any session where the environment matters.
Usage:
python3 verify_facts_probe.py # all facts
python3 verify_facts_probe.py --hw # hardware only
python3 verify_facts_probe.py --agents # agent/harness only
python3 verify_facts_probe.py --json # JSON output (default)
python3 verify_facts_probe.py --text # human-readable output
"""
from __future__ import annotations
import argparse
import json
import os
import platform
import re
import shutil
import socket
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
def run(cmd: list[str], timeout: int = 10) -> str:
"""Run a command and return stripped stdout, or error string."""
try:
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=timeout
)
return result.stdout.strip() or result.stderr.strip()
except Exception as e:
return f"ERROR: {e}"
def probe_os() -> dict:
return {
"system": platform.system(),
"release": platform.release(),
"version": platform.version(),
"machine": platform.machine(),
"hostname": socket.gethostname(),
"kernel": run(["uname", "-a"]),
"freebsd_version": run(["freebsd-version"]) if shutil.which("freebsd-version") else None,
}
def probe_timezone_locale() -> dict:
tz = None
# Linux
if os.path.exists("/etc/timezone"):
tz = Path("/etc/timezone").read_text().strip()
elif os.path.exists("/etc/localtime"):
tz = os.path.realpath("/etc/localtime")
tz_env = os.environ.get("TZ", "")
timedatectl = run(["timedatectl", "show", "--property=Timezone"]) if shutil.which("timedatectl") else None
locale = {}
for key in ("LANG", "LC_ALL", "LC_TIME", "LC_CTYPE", "LC_MESSAGES"):
val = os.environ.get(key, "")
if val:
locale[key] = val
localectl = run(["localectl", "status"]) if shutil.which("localectl") else None
return {
"timezone": {
"system": tz,
"env_TZ": tz_env or None,
"timedatectl": timedatectl,
"now_utc": datetime.now(timezone.utc).isoformat(),
"now_local": datetime.now().astimezone().isoformat(),
},
"locale": {
"env": locale or None,
"localectl": localectl,
},
}
def probe_hardware() -> dict:
disks = []
# Linux
if shutil.which("lsblk"):
out = run(["lsblk", "-o", "NAME,PATH,SIZE,MODEL,SERIAL,TRAN,RM,HOTPLUG,MOUNTPOINTS", "-b", "-n"])
for line in out.split("\n"):
parts = line.split()
if parts:
disks.append({"name": parts[0], "path": parts[1] if len(parts) > 1 else ""})
# FreeBSD
elif shutil.which("camcontrol"):
out = run(["camcontrol", "devlist"])
for line in out.split("\n"):
disks.append({"device": line.strip()})
memory = {}
if shutil.which("free"):
out = run(["free", "-h"])
memory["free"] = out
elif shutil.which("sysctl"):
out = run(["sysctl", "-n", "hw.physmem"])
memory["physmem"] = f"{int(out) // (1024**3)} GiB" if out.isdigit() else out
cpu = {}
if shutil.which("lscpu"):
out = run(["lscpu"])
for line in out.split("\n"):
if "Model name" in line:
cpu["model"] = line.split(":", 1)[1].strip()
if "CPU(s)" in line and "NUMA" not in line:
cpu["cores"] = line.split(":", 1)[1].strip()
elif shutil.which("sysctl"):
cpu["model"] = run(["sysctl", "-n", "hw.model"])
cpu["cores"] = run(["sysctl", "-n", "hw.ncpu"])
gpu = run(["lspci", "|", "grep", "-i", "vga"]) if shutil.which("lspci") else None
return {
"disks": disks,
"memory": memory,
"cpu": cpu,
"gpu": gpu,
}
def probe_storage() -> dict:
zfs = {}
if shutil.which("zpool"):
zfs["pools"] = run(["zpool", "list", "-H", "-o", "name,size,health"])
zfs["datasets"] = run(["zfs", "list", "-H", "-o", "name,mountpoint,used,available"])
mounts = run(["mount"]) if shutil.which("mount") else run(["df", "-h"])
return {
"zfs": zfs or None,
"mounts": mounts[:500] if mounts else None,
}
def probe_jails_containers() -> dict:
jails = run(["jls", "-n"]) if shutil.which("jls") else None
docker = run(["docker", "ps", "--format", "table {{.Names}}\t{{.Status}}"]) if shutil.which("docker") else None
return {
"freebsd_jails": jails,
"docker_containers": docker,
}
def probe_network() -> dict:
tailscale = {}
if shutil.which("tailscale"):
tailscale["status"] = run(["tailscale", "status"])
interfaces = run(["ip", "addr"]) if shutil.which("ip") else run(["ifconfig"])
dns = Path("/etc/resolv.conf").read_text().strip() if os.path.exists("/etc/resolv.conf") else None
return {
"tailscale": tailscale or None,
"interfaces": interfaces[:500],
"dns": dns,
}
def probe_agents() -> dict:
agents = {}
# Hermes
hermes_bin = shutil.which("hermes")
if hermes_bin:
agents["hermes"] = {
"binary": hermes_bin,
"version": run(["hermes", "--version"], timeout=5),
"home": os.environ.get("HERMES_HOME", str(Path.home() / ".hermes")),
}
# Colibri daemon
colibri_bin = shutil.which("colibri")
if colibri_bin:
out = run(["colibri", "status"], timeout=5)
agents["colibri"] = {
"binary": colibri_bin,
"status": out[:500] if out else "no response",
}
# Zot
zot_bin = shutil.which("zot")
if zot_bin:
agents["zot"] = {"binary": zot_bin}
# Codex
codex_bin = shutil.which("codex")
if codex_bin:
agents["codex"] = {"binary": codex_bin, "version": run(["codex", "--version"], timeout=5)}
# Claude Code
claude_bin = shutil.which("claude")
if claude_bin:
agents["claude-code"] = {"binary": claude_bin}
return agents
def probe_layered_soul() -> dict:
"""Find and report layered-soul identity version."""
candidates = [
Path.home() / "ai" / "layered-soul",
Path(os.environ.get("LAYERED_SOUL_PATH", "")),
]
for path in candidates:
if (path / "manifest.json").exists():
try:
manifest = json.loads((path / "manifest.json").read_text())
import subprocess as sp
commit = sp.run(
["git", "-C", str(path), "rev-parse", "--short", "HEAD"],
capture_output=True, text=True, timeout=5
).stdout.strip()
return {
"path": str(path),
"schema": manifest.get("schema"),
"commit": commit,
"skills": len(list((path / "skills").rglob("SKILL.md"))),
"memories": len(list((path / "memories" / "curated").rglob("*.md"))),
}
except Exception:
pass
return {"found": False}
def probe_git() -> dict:
"""Check common repos for current state."""
repos = {}
for name in ["colibri", "layered-soul", "clawdie-iso", "clawdie-ai", "hermes-bsd", "hermes-soul"]:
path = Path.home() / "ai" / name
if (path / ".git").exists():
try:
branch = subprocess.run(
["git", "-C", str(path), "branch", "--show-current"],
capture_output=True, text=True, timeout=5
).stdout.strip()
sha = subprocess.run(
["git", "-C", str(path), "rev-parse", "--short", "HEAD"],
capture_output=True, text=True, timeout=5
).stdout.strip()
dirty = subprocess.run(
["git", "-C", str(path), "status", "--porcelain"],
capture_output=True, text=True, timeout=5
).stdout.strip()
repos[name] = {
"branch": branch,
"sha": sha,
"dirty": bool(dirty),
}
except Exception:
pass
return repos
def probe_build_tools() -> dict:
return {
"rust": run(["rustc", "--version"]) if shutil.which("rustc") else None,
"go": run(["go", "version"]) if shutil.which("go") else None,
"zig": run(["zig", "version"]) if shutil.which("zig") else None,
"python": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
"node": run(["node", "--version"]) if shutil.which("node") else None,
"npm": run(["npm", "--version"]) if shutil.which("npm") else None,
"gmake": run(["gmake", "--version"]) if shutil.which("gmake") else None,
"uv": run(["uv", "--version"]) if shutil.which("uv") else None,
}
def probe_secrets() -> dict:
"""Check secrets infrastructure reachability."""
result = {"vaultwarden": {}}
bw_bin = shutil.which("bw")
if bw_bin:
result["vaultwarden"]["bw_installed"] = True
result["vaultwarden"]["server"] = os.environ.get("BW_SERVER", "not set")
try:
status = run(["bw", "status"], timeout=5)
result["vaultwarden"]["status"] = status[:200]
except Exception:
result["vaultwarden"]["status"] = "unreachable"
else:
result["vaultwarden"]["bw_installed"] = False
return result
# ── Main ───────────────────────────────────────────────
FACTS = {
"os": probe_os,
"timezone_locale": probe_timezone_locale,
"hardware": probe_hardware,
"storage": probe_storage,
"jails_containers": probe_jails_containers,
"network": probe_network,
"agents": probe_agents,
"layered_soul": probe_layered_soul,
"git": probe_git,
"build_tools": probe_build_tools,
"secrets": probe_secrets,
}
def main():
parser = argparse.ArgumentParser(
description="Verify facts — gather exact environment facts before acting"
)
parser.add_argument("--json", action="store_true", default=True)
parser.add_argument("--text", action="store_true")
# Per-category flags
for key in FACTS:
parser.add_argument(f"--{key.replace('_', '-')}", action="store_true")
args = parser.parse_args()
# Determine which probes to run
specific = [k for k in FACTS if getattr(args, k.replace("-", "_"), False)]
probes = specific if specific else list(FACTS)
output: dict = {"probed_at": datetime.now(timezone.utc).isoformat()}
for key in probes:
try:
output[key] = FACTS[key]()
except Exception as e:
output[key] = {"error": str(e)}
if args.text:
_print_text(output)
else:
print(json.dumps(output, indent=2, default=str))
def _print_text(data: dict) -> None:
"""Human-readable output."""
for section, content in data.items():
if section == "probed_at":
print(f"=== PROBED {content} ===\n")
continue
print(f"--- {section.upper()} ---")
if isinstance(content, dict):
for k, v in content.items():
if v is not None:
print(f" {k}: {_fmt(v)}")
print()
def _fmt(val) -> str:
if isinstance(val, dict):
return json.dumps(val, default=str)[:200]
if isinstance(val, list):
return f"[{len(val)} items]"
return str(val)[:200]
if __name__ == "__main__":
main()