#!/bin/sh # wiki-lint — validate the docs/wiki/ knowledge base against the codebase. # # Three deterministic checks (no LLM, CI-friendly): # 1. Dangling references: every path/line cited in wiki pages must exist. # 2. Resurrected old names: "Shipped" renames from naming-decisions.md # must not reappear in code (outside the wiki). # 3. Orphan pages: every docs/wiki/*.md must be linked from index.md. # # Output: PASS count or FAIL report. Non-zero exit on failure in --strict. # # Usage: # ./scripts/wiki-lint # advisory (report only, exit 0) # ./scripts/wiki-lint --strict # gate mode (non-zero exit on failure) set -eu SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" WIKI_DIR="$REPO_ROOT/docs/wiki" FAIL=0 PASS=0 STRICT=0 if [ "${1:-}" = "--strict" ]; then STRICT=1; fi # ── helpers ─────────────────────────────────────────────────────────── fail() { printf " FAIL %s\n" "$*"; FAIL=$((FAIL + 1)); } pass() { PASS=$((PASS + 1)); } _wiki_path() { printf '%s' "$1" | sed "s|^$REPO_ROOT/||"; } # Matches inline code references like `path/to/file.rs` or `path/to/file:123` # but NOT URLs, wiki links, or shell commands with flags. extract_paths() { grep -o '`[^`]*`' "$1" | tr -d '`' \ | grep -E '[a-zA-Z0-9_/.-]' \ | grep -v -E '^https?:|^\{|^\./|^#|^-[a-z]|--| ' \ | sed 's/:.*//' } # ── 1. dangling references ──────────────────────────────────────────── echo "=== 1. dangling references ===" for wiki_file in "$WIKI_DIR"/*.md; do display="$(_wiki_path "$wiki_file")" _tmp_paths=$(mktemp) extract_paths "$wiki_file" > "$_tmp_paths" while IFS= read -r path_ref; do case "$path_ref" in ""|index*|agent-harness*|naming-decisions*|quality-gates*) continue ;; path/to/*|clawdie-iso/*) continue ;; fake-pi-agent.py) continue ;; # old name in the Shipped table stage-colibri-iso.sh) continue ;; # cross-repo (clawdie-iso) ADR-agent-harness-consolidation.md) continue ;; # wiki-documented as absent *.md|*.rs|*.sh|*.py|*.sql|*.json|*.toml|*.yml|*.cfg|*.env|*.txt) ;; *) continue ;; esac candidate="$REPO_ROOT/$path_ref" if [ -f "$candidate" ] || [ -d "$candidate" ]; then pass else candidate="$WIKI_DIR/$path_ref" if [ -f "$candidate" ] || [ -d "$candidate" ]; then pass elif ! printf '%s' "$path_ref" | grep -q '/'; then found=$(find "$REPO_ROOT" -maxdepth 7 -name "$path_ref" \ -not -path '*/.git/*' -not -path '*/target/*' 2>/dev/null | head -1) if [ -n "$found" ]; then pass; else fail "$display → '$path_ref' (not found)"; fi else fail "$display → '$path_ref' (not found)" fi fi done < "$_tmp_paths" rm -f "$_tmp_paths" done # AGENTS.md lives at the repo root, not under docs/. for wiki_file in "$WIKI_DIR"/*.md; do if grep -q 'AGENTS.md' "$wiki_file"; then if [ -f "$REPO_ROOT/AGENTS.md" ]; then pass else fail "$(_wiki_path "$wiki_file") → 'AGENTS.md' (not found at repo root)"; fi fi done # The wiki claims ADR-agent-harness-consolidation.md does NOT exist; verify. if [ -f "$REPO_ROOT/docs/ADR-agent-harness-consolidation.md" ]; then fail "docs/ADR-agent-harness-consolidation.md exists — conflicts with wiki claim" else echo " PASS ADR-agent-harness-consolidation.md correctly absent (wiki-documented)" pass fi echo "" # ── 2. resurrected old names ────────────────────────────────────────── echo "=== 2. resurrected old names ===" # Old names are parsed from the "Shipped" section of naming-decisions.md, so the # check self-updates when a rename is recorded there — no hardcoded list to rot. # Each Shipped row is `| `old` → `new` | … |`; take the backticked token(s) # before the arrow. OLD_NAMES_FILE=$(mktemp) FILE_LIST=$(mktemp) FILTER_FILE=$(mktemp) trap 'rm -f "$OLD_NAMES_FILE" "$FILE_LIST" "$FILTER_FILE"' EXIT awk '/^## Shipped/{f=1; next} /^## /{f=0} f' "$WIKI_DIR/naming-decisions.md" \ | grep '→' \ | sed 's/→.*//' \ | grep -o '`[^`]*`' | tr -d '`' \ | sort -u > "$OLD_NAMES_FILE" # Source files to scan (exclude the wiki itself, vcs, build output). find "$REPO_ROOT" -type f \ \( -name '*.rs' -o -name '*.sh' -o -name '*.py' -o -name '*.md' \ -o -name '*.toml' -o -name '*.json' -o -name '*.sql' \ -o -name '*.cfg' -o -name '*.env' -o -name '*.txt' \) \ -not -path '*/.git/*' \ -not -path '*/target/*' \ -not -path '*/node_modules/*' \ -not -path '*/docs/wiki/*' \ > "$FILE_LIST" 2>/dev/null # Lines that legitimately mention an old name (migration code, back-compat # aliases, changelog history) are not drift. Matched case-insensitively as # substrings against each hit line. cat > "$FILTER_FILE" <<'FILTER' rename formerly old name back-compat backward deprecated legacy migrat alias to_regclass _id_seq provider_env changelog FILTER # Loop in the MAIN shell (read from a file, not a pipe) so fail/pass counters # and the --strict exit propagate. while IFS= read -r old_name; do [ -z "$old_name" ] && continue hits=$(xargs grep -nHF -e "$old_name" < "$FILE_LIST" 2>/dev/null \ | grep -ivF -f "$FILTER_FILE" || true) if [ -n "$hits" ]; then fail "resurrected old name '$old_name':" printf '%s\n' "$hits" | while IFS= read -r line; do printf ' %s\n' "$line" done else pass fi done < "$OLD_NAMES_FILE" # The renamed sample agent must not reappear under its old filename. if [ -f "$REPO_ROOT/scripts/fake-pi-agent.py" ]; then fail "scripts/fake-pi-agent.py still exists (renamed to sample-pi-agent.py)" else pass fi echo "" # ── 3. orphan pages ─────────────────────────────────────────────────── echo "=== 3. orphan pages ===" for wiki_file in "$WIKI_DIR"/*.md; do base="$(basename "$wiki_file")" [ "$base" = "index.md" ] && continue if grep -qF "$base" "$WIKI_DIR/index.md" 2>/dev/null; then pass else fail "docs/wiki/$base not linked from index.md (orphan)" fi done echo "" # ── report ──────────────────────────────────────────────────────────── printf "=== PASS: %d FAIL: %d ===\n" "$PASS" "$FAIL" if [ "$FAIL" -gt 0 ]; then echo "wiki-lint: drift detected — review failures above" [ "$STRICT" -eq 1 ] && exit 1 else echo "wiki-lint: clean" fi