clawdie-ai/scripts/hooks/pre-commit

#!/usr/local/bin/bash
# Pre-commit hook: catch sensitive data before it hits the repo
# Checks staged files for patterns: usernames, IPs, domains, credentials

set -e

# Colors for output
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color

# Configuration
SENSITIVE_PATTERNS_FILE="${GIT_DIR:-$(git rev-parse --git-dir)}/hooks/sensitive-patterns.txt"
ALLOWLIST_FILE="${GIT_DIR:-$(git rev-parse --git-dir)}/hooks/sensitive-allowlist.txt"

# Create default pattern file if it doesn't exist
if [ ! -f "$SENSITIVE_PATTERNS_FILE" ]; then
    cat > "$SENSITIVE_PATTERNS_FILE" << 'PATTERNS'
# Sensitive patterns to block
# Format: pattern|description

samob|Specific username (samob)
smilepowered|Specific domain (smilepowered)
192\.168\.|Private IP range (192.168.x.x)
10\.[0-9]{1,3}\.[0-9]{1,3}\.|Private IP range (10.x.x.x)
172\.(1[6-9]|2[0-9]|3[01])\.|Private IP range (172.16-31.x.x)
password\s*=|Likely credential (password=)
secret\s*=|Likely credential (secret=)
api.?key\s*=|Likely credential (api_key=)
token\s*=|Likely credential (token=)
authorization:\s*bearer|Bearer token exposure
aws_access_key|AWS credentials
private.?key|Private key exposure
PATTERNS
fi

# Create default allowlist if it doesn't exist
if [ ! -f "$ALLOWLIST_FILE" ]; then
    touch "$ALLOWLIST_FILE"
fi

# Check if hook is disabled
if [ "$SKIP_SECURITY_CHECK" = "1" ]; then
    echo -e "${YELLOW}⚠ Security check skipped (SKIP_SECURITY_CHECK=1)${NC}"
    exit 0
fi

VIOLATIONS=0
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)

if [ -z "$STAGED_FILES" ]; then
    exit 0
fi

echo "🔒 Scanning staged files for sensitive data..."

while IFS='|' read -r pattern description; do
    # Skip empty lines and comments
    [[ -z "$pattern" || "$pattern" =~ ^[[:space:]]*# ]] && continue

    # Check each staged file
    for file in $STAGED_FILES; do
        # Skip binary files
        if git diff --cached "$file" | file - | grep -q "executable\|data"; then
            continue
        fi

        # Search for pattern in staged content
        if git diff --cached "$file" | grep -E "$pattern" > /dev/null; then
            # Check if it's in the allowlist
            if grep -F "$file:$pattern" "$ALLOWLIST_FILE" > /dev/null 2>&1; then
                continue
            fi

            echo -e "${RED}✗ Found: $description${NC}"
            echo "  File: $file"
            echo "  Pattern: $pattern"
            echo ""
            VIOLATIONS=$((VIOLATIONS + 1))
        fi
    done
done < "$SENSITIVE_PATTERNS_FILE"

if [ $VIOLATIONS -gt 0 ]; then
    echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${RED}COMMIT BLOCKED: $VIOLATIONS sensitive pattern(s) detected${NC}"
    echo ""
    echo "Options:"
    echo "  1. Remove the sensitive data and re-stage"
    echo "  2. Add to allowlist if it's a false positive:"
    echo "     echo 'FILE:PATTERN' >> $ALLOWLIST_FILE"
    echo "  3. Skip check (not recommended):"
    echo "     SKIP_SECURITY_CHECK=1 git commit"
    echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    exit 1
fi

echo -e "${GREEN}✓ No sensitive data detected${NC}"
exit 0