#!/bin/sh
# wiki-lint — validate the docs/wiki/ knowledge base against the codebase.
#
# Three deterministic checks (no LLM, CI-friendly):
#   1. Dangling references: every path/line cited in wiki pages must exist.
#   2. Resurrected old names: "Shipped" renames from naming-decisions.md
#      must not reappear in code (outside the wiki).
#   3. Orphan pages: every docs/wiki/*.md must be linked from index.md.
#
# Output: PASS count or FAIL report. Non-zero exit on failure in --strict.
#
# Usage:
#   ./scripts/wiki-lint           # advisory (report only, exit 0)
#   ./scripts/wiki-lint --strict  # gate mode (non-zero exit on failure)
set -eu

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
WIKI_DIR="$REPO_ROOT/docs/wiki"
FAIL=0
PASS=0
STRICT=0
if [ "${1:-}" = "--strict" ]; then STRICT=1; fi

# ── helpers ───────────────────────────────────────────────────────────

fail() { printf "  FAIL  %s\n" "$*"; FAIL=$((FAIL + 1)); }
pass() { PASS=$((PASS + 1)); }
_wiki_path() { printf '%s' "$1" | sed "s|^$REPO_ROOT/||"; }

# Matches inline code references like `path/to/file.rs` or `path/to/file:123`
# but NOT URLs, wiki links, or shell commands with flags.
extract_paths() {
    grep -o '`[^`]*`' "$1" | tr -d '`' \
        | grep -E '[a-zA-Z0-9_/.-]' \
        | grep -v -E '^https?:|^\{|^\./|^#|^-[a-z]|--| ' \
        | sed 's/:.*//'
}

# ── 1. dangling references ────────────────────────────────────────────

echo "=== 1. dangling references ==="

for wiki_file in "$WIKI_DIR"/*.md; do
    display="$(_wiki_path "$wiki_file")"
    _tmp_paths=$(mktemp)
    extract_paths "$wiki_file" > "$_tmp_paths"
    while IFS= read -r path_ref; do
        case "$path_ref" in
            ""|index*|agent-harness*|naming-decisions*|quality-gates*) continue ;;
            path/to/*|clawdie-iso/*) continue ;;
            fake-pi-agent.py) continue ;;            # old name in the Shipped table
            stage-colibri-iso.sh) continue ;;        # cross-repo (clawdie-iso)
            ADR-agent-harness-consolidation.md) continue ;;  # wiki-documented as absent
            *.md|*.rs|*.sh|*.py|*.sql|*.json|*.toml|*.yml|*.cfg|*.env|*.txt) ;;
            *) continue ;;
        esac
        candidate="$REPO_ROOT/$path_ref"
        if [ -f "$candidate" ] || [ -d "$candidate" ]; then
            pass
        else
            candidate="$WIKI_DIR/$path_ref"
            if [ -f "$candidate" ] || [ -d "$candidate" ]; then
                pass
            elif ! printf '%s' "$path_ref" | grep -q '/'; then
                found=$(find "$REPO_ROOT" -maxdepth 7 -name "$path_ref" \
                    -not -path '*/.git/*' -not -path '*/target/*' 2>/dev/null | head -1)
                if [ -n "$found" ]; then pass; else fail "$display → '$path_ref' (not found)"; fi
            else
                fail "$display → '$path_ref' (not found)"
            fi
        fi
    done < "$_tmp_paths"
    rm -f "$_tmp_paths"
done

# AGENTS.md lives at the repo root, not under docs/.
for wiki_file in "$WIKI_DIR"/*.md; do
    if grep -q 'AGENTS.md' "$wiki_file"; then
        if [ -f "$REPO_ROOT/AGENTS.md" ]; then pass
        else fail "$(_wiki_path "$wiki_file") → 'AGENTS.md' (not found at repo root)"; fi
    fi
done

# The wiki claims ADR-agent-harness-consolidation.md does NOT exist; verify.
if [ -f "$REPO_ROOT/docs/ADR-agent-harness-consolidation.md" ]; then
    fail "docs/ADR-agent-harness-consolidation.md exists — conflicts with wiki claim"
else
    echo "  PASS  ADR-agent-harness-consolidation.md correctly absent (wiki-documented)"
    pass
fi

echo ""

# ── 2. resurrected old names ──────────────────────────────────────────

echo "=== 2. resurrected old names ==="

# Old names are parsed from the "Shipped" section of naming-decisions.md, so the
# check self-updates when a rename is recorded there — no hardcoded list to rot.
# Each Shipped row is `| `old` → `new` | … |`; take the backticked token(s)
# before the arrow.
OLD_NAMES_FILE=$(mktemp)
FILE_LIST=$(mktemp)
FILTER_FILE=$(mktemp)
trap 'rm -f "$OLD_NAMES_FILE" "$FILE_LIST" "$FILTER_FILE"' EXIT

awk '/^## Shipped/{f=1; next} /^## /{f=0} f' "$WIKI_DIR/naming-decisions.md" \
    | grep '→' \
    | sed 's/→.*//' \
    | grep -o '`[^`]*`' | tr -d '`' \
    | sort -u > "$OLD_NAMES_FILE"

# Source files to scan (exclude the wiki itself, vcs, build output).
find "$REPO_ROOT" -type f \
    \( -name '*.rs' -o -name '*.sh' -o -name '*.py' -o -name '*.md' \
       -o -name '*.toml' -o -name '*.json' -o -name '*.sql' \
       -o -name '*.cfg' -o -name '*.env' -o -name '*.txt' \) \
    -not -path '*/.git/*' \
    -not -path '*/target/*' \
    -not -path '*/node_modules/*' \
    -not -path '*/docs/wiki/*' \
    > "$FILE_LIST" 2>/dev/null

# Lines that legitimately mention an old name (migration code, back-compat
# aliases, changelog history) are not drift. Matched case-insensitively as
# substrings against each hit line.
cat > "$FILTER_FILE" <<'FILTER'
rename
formerly
old name
back-compat
backward
deprecated
legacy
migrat
alias
to_regclass
_id_seq
provider_env
changelog
FILTER

# Loop in the MAIN shell (read from a file, not a pipe) so fail/pass counters
# and the --strict exit propagate.
while IFS= read -r old_name; do
    [ -z "$old_name" ] && continue
    hits=$(xargs grep -nHF -e "$old_name" < "$FILE_LIST" 2>/dev/null \
        | grep -ivF -f "$FILTER_FILE" || true)
    if [ -n "$hits" ]; then
        fail "resurrected old name '$old_name':"
        printf '%s\n' "$hits" | while IFS= read -r line; do
            printf '         %s\n' "$line"
        done
    else
        pass
    fi
done < "$OLD_NAMES_FILE"

# The renamed sample agent must not reappear under its old filename.
if [ -f "$REPO_ROOT/scripts/fake-pi-agent.py" ]; then
    fail "scripts/fake-pi-agent.py still exists (renamed to sample-pi-agent.py)"
else
    pass
fi

echo ""

# ── 3. orphan pages ───────────────────────────────────────────────────

echo "=== 3. orphan pages ==="

for wiki_file in "$WIKI_DIR"/*.md; do
    base="$(basename "$wiki_file")"
    [ "$base" = "index.md" ] && continue
    if grep -qF "$base" "$WIKI_DIR/index.md" 2>/dev/null; then
        pass
    else
        fail "docs/wiki/$base not linked from index.md (orphan)"
    fi
done

echo ""

# ── report ────────────────────────────────────────────────────────────

printf "=== PASS: %d  FAIL: %d ===\n" "$PASS" "$FAIL"

if [ "$FAIL" -gt 0 ]; then
    echo "wiki-lint: drift detected — review failures above"
    [ "$STRICT" -eq 1 ] && exit 1
else
    echo "wiki-lint: clean"
fi
