Implement Privacy-First Documentation strategy: - Pre-commit hook scans staged files for credentials, IPs, usernames, domains - Blocks commits containing: samob, smilepowered, 192.168.x.x, 10.x.x.x, password=, secret=, api_key=, token=, etc. - Auto-generates .git/hooks/sensitive-patterns.txt and sensitive-allowlist.txt on first run - Users can add false positives to allowlist without removing legitimate content - Documented in scripts/hooks/SECURITY.md with customization and audit instructions - Updated CONTRIBUTING.md with setup and troubleshooting steps Enforcement: All contributors now have automatic sensitive data detection before push. Benefits: Docs become safe to share publicly, prevents credential leaks, enables Privacy-First approach. Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com> --- Build: pass | Tests: pass — Tests 489 passed | 10 skipped (499)
100 lines
3.3 KiB
Bash
Executable file
100 lines
3.3 KiB
Bash
Executable file
#!/usr/local/bin/bash
|
|
# Pre-commit hook: catch sensitive data before it hits the repo
|
|
# Checks staged files for patterns: usernames, IPs, domains, credentials
|
|
|
|
set -e
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
GREEN='\033[0;32m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Configuration
|
|
SENSITIVE_PATTERNS_FILE="${GIT_DIR:-$(git rev-parse --git-dir)}/hooks/sensitive-patterns.txt"
|
|
ALLOWLIST_FILE="${GIT_DIR:-$(git rev-parse --git-dir)}/hooks/sensitive-allowlist.txt"
|
|
|
|
# Create default pattern file if it doesn't exist
|
|
if [ ! -f "$SENSITIVE_PATTERNS_FILE" ]; then
|
|
cat > "$SENSITIVE_PATTERNS_FILE" << 'PATTERNS'
|
|
# Sensitive patterns to block
|
|
# Format: pattern|description
|
|
|
|
samob|Specific username (samob)
|
|
smilepowered|Specific domain (smilepowered)
|
|
192\.168\.|Private IP range (192.168.x.x)
|
|
10\.[0-9]{1,3}\.[0-9]{1,3}\.|Private IP range (10.x.x.x)
|
|
172\.(1[6-9]|2[0-9]|3[01])\.|Private IP range (172.16-31.x.x)
|
|
password\s*=|Likely credential (password=)
|
|
secret\s*=|Likely credential (secret=)
|
|
api.?key\s*=|Likely credential (api_key=)
|
|
token\s*=|Likely credential (token=)
|
|
authorization:\s*bearer|Bearer token exposure
|
|
aws_access_key|AWS credentials
|
|
private.?key|Private key exposure
|
|
PATTERNS
|
|
fi
|
|
|
|
# Create default allowlist if it doesn't exist
|
|
if [ ! -f "$ALLOWLIST_FILE" ]; then
|
|
touch "$ALLOWLIST_FILE"
|
|
fi
|
|
|
|
# Check if hook is disabled
|
|
if [ "$SKIP_SECURITY_CHECK" = "1" ]; then
|
|
echo -e "${YELLOW}⚠ Security check skipped (SKIP_SECURITY_CHECK=1)${NC}"
|
|
exit 0
|
|
fi
|
|
|
|
VIOLATIONS=0
|
|
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)
|
|
|
|
if [ -z "$STAGED_FILES" ]; then
|
|
exit 0
|
|
fi
|
|
|
|
echo "🔒 Scanning staged files for sensitive data..."
|
|
|
|
while IFS='|' read -r pattern description; do
|
|
# Skip empty lines and comments
|
|
[[ -z "$pattern" || "$pattern" =~ ^[[:space:]]*# ]] && continue
|
|
|
|
# Check each staged file
|
|
for file in $STAGED_FILES; do
|
|
# Skip binary files
|
|
if git diff --cached "$file" | file - | grep -q "executable\|data"; then
|
|
continue
|
|
fi
|
|
|
|
# Search for pattern in staged content
|
|
if git diff --cached "$file" | grep -E "$pattern" > /dev/null; then
|
|
# Check if it's in the allowlist
|
|
if grep -F "$file:$pattern" "$ALLOWLIST_FILE" > /dev/null 2>&1; then
|
|
continue
|
|
fi
|
|
|
|
echo -e "${RED}✗ Found: $description${NC}"
|
|
echo " File: $file"
|
|
echo " Pattern: $pattern"
|
|
echo ""
|
|
VIOLATIONS=$((VIOLATIONS + 1))
|
|
fi
|
|
done
|
|
done < "$SENSITIVE_PATTERNS_FILE"
|
|
|
|
if [ $VIOLATIONS -gt 0 ]; then
|
|
echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
|
echo -e "${RED}COMMIT BLOCKED: $VIOLATIONS sensitive pattern(s) detected${NC}"
|
|
echo ""
|
|
echo "Options:"
|
|
echo " 1. Remove the sensitive data and re-stage"
|
|
echo " 2. Add to allowlist if it's a false positive:"
|
|
echo " echo 'FILE:PATTERN' >> $ALLOWLIST_FILE"
|
|
echo " 3. Skip check (not recommended):"
|
|
echo " SKIP_SECURITY_CHECK=1 git commit"
|
|
echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
echo -e "${GREEN}✓ No sensitive data detected${NC}"
|
|
exit 0
|