feat(verify-facts): OS-first probe with synthesized command map

- detect_os() runs first, builds Linux/FreeBSD/Darwin command map
- All subsequent probes can use the map for safe cross-platform ops
- SOUL.md: explicit callout — Linux vs FreeBSD command differences
  (grep, sed, dd, sha256sum, bash, make, pkg vs apt, device names)
  are the primary reason to verify facts before acting
This commit is contained in:
Hermes & Sam 2026-06-17 12:12:51 +02:00
parent 97b582578f
commit 6f32a3210f
2 changed files with 99 additions and 10 deletions

View file

@ -10,7 +10,7 @@ We are a small, self-hosted, privacy-first AI agent collective operated by Sam (
- **Self-hosted over SaaS.** Forgejo, Vaultwarden, Colibri, Tailscale — we own our infrastructure. - **Self-hosted over SaaS.** Forgejo, Vaultwarden, Colibri, Tailscale — we own our infrastructure.
- **Pull before work.** Always `git pull` before analyzing, coding, or reviewing. Stale context is waste. Other agents may have landed changes since your last session. - **Pull before work.** Always `git pull` before analyzing, coding, or reviewing. Stale context is waste. Other agents may have landed changes since your last session.
- **Verify facts, then act.** Never assume hardware, OS, timezone, locale, disk names, ZFS pools, jails, agent versions, or git state. Use `scripts/verify_facts_probe.py` to gather exact environment facts before making decisions. What you guess will be wrong; what you probe will be right. This applies to: OS/kernel, timezone/locale, disk/partition names, ZFS pool topology, jail/container inventory, Tailscale IPs, agent harness versions, layered-soul commit, git repo SHAs and dirty state, build tool versions, and secrets infrastructure reachability. - **Verify facts, then act.** Never assume hardware, OS, timezone, locale, disk names, ZFS pools, jails, agent versions, or git state. Use `scripts/verify_facts_probe.py` to gather exact environment facts before making decisions. OS is the first and most critical check — Linux and FreeBSD differ in grep, sed, dd, sha256sum, bash location, make, package managers, device names, and service management. A command that works on debby may silently fail or corrupt on OSA. The probe synthesizes an OS-specific command map so all subsequent operations are safe. What you guess will be wrong; what you probe will be right.
- **Tokenomics is the golden line.** Cost-per-intelligence > cost-per-token. Cache-hit arbitrage. Measure everything. - **Tokenomics is the golden line.** Cost-per-intelligence > cost-per-token. Cache-hit arbitrage. Measure everything.
- **Local-first.** Media processing, inference, builds — run locally when possible. Cloud is a fallback, not a default. - **Local-first.** Media processing, inference, builds — run locally when possible. Cloud is a fallback, not a default.
- **Zero public exposure.** No open ports, no public IPs beyond what Tailscale negotiates. Each agent gets its own SSH key — never copy private keys between hosts. - **Zero public exposure.** No open ports, no public IPs beyond what Tailscale negotiates. Each agent gets its own SSH key — never copy private keys between hosts.

View file

@ -27,6 +27,103 @@ import sys
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
# ── OS detection (run FIRST — everything else depends on it) ──
def detect_os() -> dict:
"""Detect OS and return synthesized command map for cross-platform safety.
This is the first probe. All other probes use the command map instead of
hardcoded commands. Prevents Linux/FreeBSD command drift bugs.
"""
system = platform.system()
release = platform.release()
machine = platform.machine()
hostname = socket.gethostname()
kernel = run_raw(["uname", "-a"])
# OS-specific command equivalents
if system == "FreeBSD":
freebsd_version = run_raw(["freebsd-version"])
cmds = {
"disk_list": ["camcontrol", "devlist"],
"disk_info": ["gpart", "show"],
"memory": ["sysctl", "-n", "hw.physmem"],
"cpu_model": ["sysctl", "-n", "hw.model"],
"cpu_cores": ["sysctl", "-n", "hw.ncpu"],
"mounts": ["mount"],
"network": ["ifconfig"],
"shell": "/usr/local/bin/bash",
"make": "gmake",
"package_manager": "pkg install -y",
"service_manager": "rc.d",
"device_prefix": "/dev/da",
"checksum_cmd": "sha256",
"sed_in_place": "sed -i ''",
"grep_extended": "grep -E",
}
elif system == "Linux":
freebsd_version = None
cmds = {
"disk_list": ["lsblk", "-o", "NAME,PATH,SIZE,MODEL,SERIAL,TRAN,RM,HOTPLUG,MOUNTPOINTS", "-b", "-n"],
"disk_info": ["lsblk"],
"memory": ["free", "-h"],
"cpu_model": ["lscpu"],
"cpu_cores": ["lscpu"],
"mounts": ["mount"],
"network": ["ip", "addr"],
"shell": "/bin/bash",
"make": "make",
"package_manager": "apt install -y",
"service_manager": "systemd",
"device_prefix": "/dev/sd",
"checksum_cmd": "sha256sum",
"sed_in_place": "sed -i",
"grep_extended": "grep -E",
}
elif system == "Darwin":
freebsd_version = None
cmds = {
"disk_list": ["diskutil", "list"],
"disk_info": ["diskutil", "list"],
"memory": ["sysctl", "-n", "hw.memsize"],
"cpu_model": ["sysctl", "-n", "machdep.cpu.brand_string"],
"cpu_cores": ["sysctl", "-n", "hw.ncpu"],
"mounts": ["mount"],
"network": ["ifconfig"],
"shell": "/bin/bash",
"make": "make",
"package_manager": "brew install",
"service_manager": "launchd",
"device_prefix": "/dev/disk",
"checksum_cmd": "shasum -a 256",
"sed_in_place": "sed -i ''",
"grep_extended": "grep -E",
}
else:
freebsd_version = None
cmds = {}
return {
"system": system,
"release": release,
"machine": machine,
"hostname": hostname,
"kernel": kernel,
"freebsd_version": freebsd_version,
"_commands": cmds, # synthesized — use this for all subsequent probes
}
def run_raw(cmd: list[str], timeout: int = 10) -> str:
"""Run a command and return stripped stdout, or error string."""
try:
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=timeout
)
return result.stdout.strip() or result.stderr.strip()
except Exception as e:
return f"ERROR: {e}"
def run(cmd: list[str], timeout: int = 10) -> str: def run(cmd: list[str], timeout: int = 10) -> str:
"""Run a command and return stripped stdout, or error string.""" """Run a command and return stripped stdout, or error string."""
@ -40,15 +137,7 @@ def run(cmd: list[str], timeout: int = 10) -> str:
def probe_os() -> dict: def probe_os() -> dict:
return { return detect_os() # always first — synthesizes command map for all other probes
"system": platform.system(),
"release": platform.release(),
"version": platform.version(),
"machine": platform.machine(),
"hostname": socket.gethostname(),
"kernel": run(["uname", "-a"]),
"freebsd_version": run(["freebsd-version"]) if shutil.which("freebsd-version") else None,
}
def probe_timezone_locale() -> dict: def probe_timezone_locale() -> dict: