colibri/scripts/wiki-lint
Sam & Claude 8ffbf09f12
Some checks are pending
CI / rust (pull_request) Waiting to run
CI / markdown (pull_request) Waiting to run
CI / port (pull_request) Waiting to run
CI / agent-jail-pkgs (pull_request) Waiting to run
feat(wiki-lint): check #4 — top-level docs dangling links
PR #224 fixed two stale references to removed docs by hand. The root
cause: wiki-lint only validated docs/wiki/, never the top-level
docs/*.md — so a doc could link to a removed sibling forever with
nothing to catch it.

Add check #4: scan docs/*.md for two doc-reference patterns and
verify they resolve (relative to docs/ or repo root):

  a) markdown links [label](local.md) — the exact #224 bug class
  b) backtick SHOUTING-CASE .md refs (e.g. `FOO-BAR.md`)

Scoped to doc-to-doc references deliberately. External URLs, anchors,
and cross-repo paths are skipped, and bare lowercase source filenames
(env.sh, build.sh — often runtime/contextual) are out of scope, so
the check has zero false positives on current main (171 pass) and
fail-closes under --strict (which CI already runs).

Calibrated by injecting fake removed-doc links: both the markdown
link and the backtick doc-name ref are detected and exit non-zero.

(Sam & Claude)
2026-06-26 21:50:07 +02:00

235 lines
8.6 KiB
Bash
Executable file

#!/bin/sh
# wiki-lint — validate the docs/wiki/ knowledge base against the codebase.
#
# Four deterministic checks (no LLM, CI-friendly):
# 1. Dangling references: every path/line cited in wiki pages must exist.
# 2. Resurrected old names: "Shipped" renames from naming-decisions.md
# must not reappear in code (outside the wiki).
# 3. Orphan pages: every docs/wiki/*.md must be linked from index.md.
# 4. Top-level docs dangling links: markdown links + doc-name refs in
# docs/*.md must resolve (catches links to removed docs, e.g. PR #224).
#
# Output: PASS count or FAIL report. Non-zero exit on failure in --strict.
#
# Usage:
# ./scripts/wiki-lint # advisory (report only, exit 0)
# ./scripts/wiki-lint --strict # gate mode (non-zero exit on failure)
set -eu
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
WIKI_DIR="$REPO_ROOT/docs/wiki"
TOP_DOCS_DIR="$REPO_ROOT/docs"
FAIL=0
PASS=0
STRICT=0
if [ "${1:-}" = "--strict" ]; then STRICT=1; fi
# ── helpers ───────────────────────────────────────────────────────────
fail() { printf " FAIL %s\n" "$*"; FAIL=$((FAIL + 1)); }
pass() { PASS=$((PASS + 1)); }
_wiki_path() { printf '%s' "$1" | sed "s|^$REPO_ROOT/||"; }
# Matches inline code references like `path/to/file.rs` or `path/to/file:123`
# but NOT URLs, wiki links, or shell commands with flags.
extract_paths() {
grep -o '`[^`]*`' "$1" | tr -d '`' \
| grep -E '[a-zA-Z0-9_/.-]' \
| grep -v -E '^https?:|^\{|^\./|^#|^-[a-z]|--| ' \
| sed 's/:.*//'
}
# ── 1. dangling references ────────────────────────────────────────────
echo "=== 1. dangling references ==="
for wiki_file in "$WIKI_DIR"/*.md; do
display="$(_wiki_path "$wiki_file")"
_tmp_paths=$(mktemp)
extract_paths "$wiki_file" > "$_tmp_paths"
while IFS= read -r path_ref; do
case "$path_ref" in
""|index*|agent-harness*|naming-decisions*|quality-gates*) continue ;;
path/to/*|clawdie-iso/*) continue ;;
stage-colibri-iso.sh) continue ;; # cross-repo (clawdie-iso)
ADR-agent-harness-consolidation.md) continue ;; # wiki-documented as absent
*.md|*.rs|*.sh|*.py|*.sql|*.json|*.toml|*.yml|*.cfg|*.env|*.txt) ;;
*) continue ;;
esac
candidate="$REPO_ROOT/$path_ref"
if [ -f "$candidate" ] || [ -d "$candidate" ]; then
pass
else
candidate="$WIKI_DIR/$path_ref"
if [ -f "$candidate" ] || [ -d "$candidate" ]; then
pass
elif ! printf '%s' "$path_ref" | grep -q '/'; then
found=$(find "$REPO_ROOT" -maxdepth 7 -name "$path_ref" \
-not -path '*/.git/*' -not -path '*/target/*' 2>/dev/null | head -1)
if [ -n "$found" ]; then pass; else fail "$display → '$path_ref' (not found)"; fi
else
fail "$display → '$path_ref' (not found)"
fi
fi
done < "$_tmp_paths"
rm -f "$_tmp_paths"
done
# AGENTS.md lives at the repo root, not under docs/.
for wiki_file in "$WIKI_DIR"/*.md; do
if grep -q 'AGENTS.md' "$wiki_file"; then
if [ -f "$REPO_ROOT/AGENTS.md" ]; then pass
else fail "$(_wiki_path "$wiki_file") → 'AGENTS.md' (not found at repo root)"; fi
fi
done
# The wiki claims ADR-agent-harness-consolidation.md does NOT exist; verify.
if [ -f "$REPO_ROOT/docs/ADR-agent-harness-consolidation.md" ]; then
fail "docs/ADR-agent-harness-consolidation.md exists — conflicts with wiki claim"
else
echo " PASS ADR-agent-harness-consolidation.md correctly absent (wiki-documented)"
pass
fi
echo ""
# ── 2. resurrected old names ──────────────────────────────────────────
echo "=== 2. resurrected old names ==="
# Old names are parsed from the "Shipped" section of naming-decisions.md, so the
# check self-updates when a rename is recorded there — no hardcoded list to rot.
# Each Shipped row is `| `old` → `new` | … |`; take the backticked token(s)
# before the arrow.
OLD_NAMES_FILE=$(mktemp)
FILE_LIST=$(mktemp)
FILTER_FILE=$(mktemp)
trap 'rm -f "$OLD_NAMES_FILE" "$FILE_LIST" "$FILTER_FILE"' EXIT
awk '/^## Shipped/{f=1; next} /^## /{f=0} f' "$WIKI_DIR/naming-decisions.md" \
| grep '→' \
| sed 's/→.*//' \
| grep -o '`[^`]*`' | tr -d '`' \
| sort -u > "$OLD_NAMES_FILE"
# Source files to scan (exclude the wiki itself, vcs, build output).
find "$REPO_ROOT" -type f \
\( -name '*.rs' -o -name '*.sh' -o -name '*.py' -o -name '*.md' \
-o -name '*.toml' -o -name '*.json' -o -name '*.sql' \
-o -name '*.cfg' -o -name '*.env' -o -name '*.txt' \) \
-not -path '*/.git/*' \
-not -path '*/target/*' \
-not -path '*/node_modules/*' \
-not -path '*/docs/wiki/*' \
> "$FILE_LIST" 2>/dev/null
# Lines that legitimately mention an old name (migration code, back-compat
# aliases, changelog history) are not drift. Matched case-insensitively as
# substrings against each hit line.
cat > "$FILTER_FILE" <<'FILTER'
rename
formerly
old name
back-compat
backward
deprecated
legacy
migrat
alias
to_regclass
_id_seq
provider_env
changelog
FILTER
# Loop in the MAIN shell (read from a file, not a pipe) so fail/pass counters
# and the --strict exit propagate.
while IFS= read -r old_name; do
[ -z "$old_name" ] && continue
hits=$(xargs grep -nHF -e "$old_name" < "$FILE_LIST" 2>/dev/null \
| grep -ivF -f "$FILTER_FILE" || true)
if [ -n "$hits" ]; then
fail "resurrected old name '$old_name':"
printf '%s\n' "$hits" | while IFS= read -r line; do
printf ' %s\n' "$line"
done
else
pass
fi
done < "$OLD_NAMES_FILE"
echo ""
# ── 3. orphan pages ───────────────────────────────────────────────────
echo "=== 3. orphan pages ==="
for wiki_file in "$WIKI_DIR"/*.md; do
base="$(basename "$wiki_file")"
[ "$base" = "index.md" ] && continue
if grep -qF "$base" "$WIKI_DIR/index.md" 2>/dev/null; then
pass
else
fail "docs/wiki/$base not linked from index.md (orphan)"
fi
done
echo ""
# ── 4. top-level docs dangling links ──────────────────────────────────
#
# Catches links to removed docs (the PR #224 bug class). Two patterns:
# a) markdown links [label](local.md) — resolve relative to docs/ or root
# b) backtick SHOUTING-CASE .md refs — doc-name references (e.g. FOO-BAR.md)
# External URLs, anchors, and cross-repo paths are skipped. Bare lowercase
# source filenames (env.sh, build.sh) are intentionally out of scope — those
# are often runtime/contextual, not committed doc references.
echo "=== 4. top-level docs dangling links ==="
for doc_file in "$TOP_DOCS_DIR"/*.md; do
base="$(basename "$doc_file")"
# index.md (README) and the wiki-split plan doc are meta; still check them.
# a) markdown links to local .md files
_tmp_links=$(mktemp)
grep -oE '\]\([^)]+\.md\)' "$doc_file" \
| sed 's/^\](//; s/)$//' > "$_tmp_links"
while IFS= read -r link; do
case "$link" in
http*|\#|mailto:*) continue ;;
esac
if [ -e "${TOP_DOCS_DIR}/$link" ] || [ -e "$REPO_ROOT/$link" ]; then
pass
else
fail "docs/$base → markdown link '$link' (target not found)"
fi
done < "$_tmp_links"
rm -f "$_tmp_links"
# b) backtick SHOUTING-CASE doc-name references (e.g. `COLIBRI-SKILLS.md`)
_tmp_docrefs=$(mktemp)
grep -o '`[A-Z][A-Z0-9_-]*\.md`' "$doc_file" | tr -d '`' > "$_tmp_docrefs"
while IFS= read -r docref; do
if [ -e "${TOP_DOCS_DIR}/$docref" ] || [ -e "$REPO_ROOT/$docref" ]; then
pass
else
fail "docs/$base → doc reference '$docref' (not found)"
fi
done < "$_tmp_docrefs"
rm -f "$_tmp_docrefs"
done
echo ""
# ── report ────────────────────────────────────────────────────────────
printf "=== PASS: %d FAIL: %d ===\n" "$PASS" "$FAIL"
if [ "$FAIL" -gt 0 ]; then
echo "wiki-lint: drift detected — review failures above"
[ "$STRICT" -eq 1 ] && exit 1
else
echo "wiki-lint: clean"
fi