#!/bin/sh # maintenance-mode.sh — Clawdie pool maintenance and recovery # # Provides: # 1. Pool health check # 2. Replace failed disk # 3. Scrub pool # 4. Disk space cleanup # 5. Boot environment management # 6. Pool migration # 7. Import read-only (data recovery) set -e POOL_NAME="clawdie" LOG="/var/log/clawdie-maintenance.log" SHARE="/usr/local/share/clawdie-iso" dialog() { bsddialog --backtitle "Clawdie Maintenance Mode" "$@" ; } die() { echo "ERROR: $1" >&2; exit 1; } log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG"; } . "${SHARE}/build.cfg" get_disk_info() { local disk="$1" local info="" local model=$(camcontrol inquiry "$disk" 2>/dev/null | head -1) local serial=$(camcontrol identify "$disk" 2>/dev/null | grep -i 'serial number' | cut -d: -f2 | tr -d ' ') local size_bytes=$(diskinfo "$disk" 2>/dev/null | awk '{print $3}') local size_gb=$((size_bytes / 1073741824)) local temp=$(smartctl -A "$disk" 2>/dev/null | grep -i temperature | awk '{print $10}') local health=$(smartctl -H "$disk" 2>/dev/null | grep -E '^SMART overall-health' | awk '{print $NF}') local slot=$(sesutil map 2>/dev/null | grep "^${disk}" | awk '{print $3}') info="${disk} — ${model} (${size_gb}GB)" [ -n "$serial" ] && info="${info} S/N:${serial}" [ -n "$temp" ] && info="${info} ${temp}°C" [ -n "$health" ] && info="${info} ${health}" [ -n "$slot" ] && info="${info} Slot:${slot}" echo "$info" } import_pool_rw() { log "Importing pool $POOL_NAME (read-write)" if zpool list "$POOL_NAME" >/dev/null 2>&1; then log "Pool already imported" return 0 fi if ! zpool import "$POOL_NAME" 2>&1 | tee -a "$LOG"; then dialog --msgbox "\nFailed to import pool '$POOL_NAME'.\n\nCheck logs: $LOG" 10 50 return 1 fi log "Pool imported successfully" return 0 } import_pool_ro() { log "Importing pool $POOL_NAME (read-only)" if ! zpool import -o readonly=on "$POOL_NAME" 2>&1 | tee -a "$LOG"; then dialog --msgbox "\nFailed to import pool '$POOL_NAME' (read-only).\n\nCheck logs: $LOG" 10 50 return 1 fi log "Pool imported read-only" return 0 } export_pool() { log "Exporting pool $POOL_NAME" zpool export "$POOL_NAME" 2>&1 | tee -a "$LOG" } menu_pool_health() { log "Pool health check selected" import_pool_rw || return 1 local status=$(zpool status "$POOL_NAME" 2>/dev/null) local state=$(echo "$status" | grep "state:" | head -1) local scan=$(echo "$status" | grep -A2 "scan:") local errors=$(echo "$status" | grep -A5 "errors:") local vdev_status="" local line="" while IFS= read -r line; do if echo "$line" | grep -qE '^\s+(ada|da|nvd|nda)'; then local disk=$(echo "$line" | awk '{print $1}') local disk_state=$(echo "$line" | awk '{print $2}') local read_err=$(echo "$line" | awk '{print $3}') local write_err=$(echo "$line" | awk '{print $4}') local cksum_err=$(echo "$line" | awk '{print $5}') local disk_info=$(get_disk_info "$disk") vdev_status="${vdev_status}${disk_info}\n State: ${disk_state} Errors: R:${read_err} W:${write_err} C:${cksum_err}\n\n" fi done <&1 1>&2 2>&3) case "$action" in "scrub") menu_pool_scrub ;; esac } menu_replace_disk() { log "Replace failed disk selected" import_pool_rw || return 1 local status=$(zpool status "$POOL_NAME" 2>/dev/null) local failed_disks="" local line="" while IFS= read -r line; do if echo "$line" | grep -qE '^\s+(ada|da|nvd|nda)'; then local disk=$(echo "$line" | awk '{print $1}') local dstate=$(echo "$line" | awk '{print $2}') if [ "$dstate" != "ONLINE" ]; then local disk_info=$(get_disk_info "$disk") failed_disks="${failed_disks}${disk} \"${disk_info} (${dstate})\" " fi fi done <&1 1>&2 2>&3) [ $? -ne 0 ] && return 0 local pool_disks=$(zpool status "$POOL_NAME" 2>/dev/null | grep -E '^\s+(ada|da|nvd|nda)' | awk '{print $1}' | sort -u) local all_disks=$(camcontrol devlist 2>/dev/null | grep -oE '(ada|da|nvd|nda)[0-9]+' | sort -u) local available="" for d in $all_disks; do if ! echo "$pool_disks" | grep -qw "$d"; then local info=$(get_disk_info $d) available="${available}${d} \"${info}\" " fi done if [ -z "$available" ]; then dialog --msgbox "\nNo replacement disks available.\n\nConnect a replacement disk and restart." 10 50 return 1 fi local replacement=$(dialog --menu \ "Select replacement disk for ${failed_disk}:" \ 15 70 10 \ $available \ 3>&1 1>&2 2>&3) [ $? -ne 0 ] && return 0 local failed_size=$(diskinfo "$failed_disk" 2>/dev/null | awk '{print $3}') local replace_size=$(diskinfo "$replacement" 2>/dev/null | awk '{print $3}') if [ "$replace_size" -lt "$failed_size" ]; then dialog --msgbox "\nReplacement disk is smaller than failed disk.\n\nUse a disk of equal or larger size." 10 50 return 1 fi local confirm=$(dialog --yesno \ "\nConfirm disk replacement:\n\n ${failed_disk} → ${replacement}\n\nThis will take 4-8 hours.\nPool remains usable during resilver." \ 12 60) [ "$confirm" != "yes" ] && return 0 log "Replacing $failed_disk with $replacement" zpool replace "$POOL_NAME" "$failed_disk" "$replacement" 2>&1 | tee -a "$LOG" dialog --msgbox \ "\nDisk replacement started.\n\nMonitor progress:\n zpool status $POOL_NAME\n\nPool remains usable.\nDo not power off until resilver completes." \ 12 60 } menu_pool_scrub() { log "Pool scrub selected" import_pool_rw || return 1 local status=$(zpool status "$POOL_NAME" 2>/dev/null) local last_scrub=$(echo "$status" | grep -A1 "scan:" | grep "scrub" | head -1) local action=$(dialog --menu \ "Pool Scrub\n\nLast scrub:\n${last_scrub:-None}\n\nScrub reads all data and verifies checksums.\nRepairs silent corruption if redundancy exists." \ 15 60 3 \ "start" "Start scrub now" \ "status" "Check scrub status" \ "stop" "Stop running scrub" \ "back" "Back to main menu" \ 3>&1 1>&2 2>&3) case "$action" in "start") zpool scrub "$POOL_NAME" 2>&1 | tee -a "$LOG" dialog --msgbox "\nScrub started.\n\nPool remains usable during scrub.\nMonitor: zpool status $POOL_NAME" 10 50 ;; "status") local scrub_status=$(zpool status "$POOL_NAME" 2>/dev/null | grep -A5 "scan:") dialog --msgbox "${scrub_status}" 15 60 ;; "stop") local confirm=$(dialog --yesno "\nStop running scrub?\n\nThis is safe but not recommended." 10 50) [ "$confirm" = "yes" ] && zpool scrub -s "$POOL_NAME" 2>&1 | tee -a "$LOG" ;; esac } menu_disk_cleanup() { log "Disk space cleanup selected" import_pool_rw || return 1 local used=$(zfs list -Hp -o used "$POOL_NAME" 2>/dev/null) local avail=$(zfs list -Hp -o avail "$POOL_NAME" 2>/dev/null) local total=$((used + avail)) local used_gb=$((used / 1073741824)) local total_gb=$((total / 1073741824)) local percent=$((used * 100 / total)) local action=$(dialog --menu \ "Disk Space Cleanup\n\nPool: $POOL_NAME\nUsed: ${used_gb} GB / ${total_gb} GB (${percent}%)\n\nSelect cleanup type:" \ 18 70 6 \ "quick" "Quick cleanup (pkg cache, npm cache, old logs)" \ "snapshots" "Snapshot cleanup (delete old snapshots)" \ "be" "Boot environment cleanup (delete old BEs)" \ "large" "Find large files (>100MB)" \ "analyze" "Analyze disk usage by dataset" \ "back" "Back to main menu" \ 3>&1 1>&2 2>&3) case "$action" in "quick") local confirm=$(dialog --yesno \ "\nQuick cleanup will remove:\n\n - Package cache (/var/cache/pkg)\n - npm cache (~/.npm)\n - Old logs (>7 days)\n - Temporary files\n - Crash dumps\n\nContinue?" \ 12 60) if [ "$confirm" = "yes" ]; then log "Running quick cleanup" rm -rf /var/cache/pkg/* 2>/dev/null rm -rf /home/*/.npm 2>/dev/null find /var/log -type f -mtime +7 -delete 2>/dev/null rm -rf /tmp/* 2>/dev/null rm -rf /var/tmp/* 2>/dev/null rm -rf /var/crash/* 2>/dev/null dialog --msgbox "\nQuick cleanup complete." 8 40 fi ;; "snapshots") local snapshots=$(zfs list -t snapshot -r -H -o name "$POOL_NAME" 2>/dev/null) local snapshot_count=$(echo "$snapshots" | wc -l) local confirm=$(dialog --yesno \ "\nFound ${snapshot_count} snapshots.\n\nDelete snapshots older than 30 days?" \ 10 50) if [ "$confirm" = "yes" ]; then log "Deleting old snapshots" while read -r snap; do local creation=$(zfs get -Hp -o value creation "$snap" 2>/dev/null) local age_days=$(( ($(date +%s) - creation) / 86400 )) if [ "$age_days" -gt 30 ]; then zfs destroy "$snap" 2>&1 | tee -a "$LOG" log "Deleted snapshot: $snap (age: ${age_days} days)" fi done </dev/null | awk '{print $1}') local active=$(bectl list 2>/dev/null | grep -E '^\S+\s+.*\s+.*\s+R' | awk '{print $1}') dialog --msgbox \ "\nBoot Environments\n\nActive: ${active:-unknown}\n\nAvailable:\n${bes}\n\nUse 'bectl destroy ' to remove old BEs." \ 15 60 ;; "large") dialog --infobox "Scanning for large files..." 5 50 local large_files=$(find / -type f -size +100M 2>/dev/null | head -20) dialog --msgbox \ "\nLarge files (>100MB):\n\n${large_files:-None found}\n\nReview and delete manually if needed." \ 20 70 ;; "analyze") local datasets=$(zfs list -r -H -o name,used "$POOL_NAME" 2>/dev/null | sort -k2 -rn | head -15) dialog --msgbox "\nDatasets by size:\n\n${datasets}" 20 70 ;; esac } menu_be_manager() { log "Boot environment manager selected" import_pool_rw || return 1 local bes=$(bectl list -H 2>/dev/null) local active=$(bectl list 2>/dev/null | grep -E '^\S+\s+.*\s+.*\s+R' | awk '{print $1}') local action=$(dialog --menu \ "Boot Environments\n\nActive: ${active:-unknown}\n\nSelect action:" \ 15 60 4 \ "list" "List all boot environments" \ "create" "Create new boot environment" \ "activate" "Activate a boot environment" \ "destroy" "Destroy a boot environment" \ "back" "Back to main menu" \ 3>&1 1>&2 2>&3) case "$action" in "list") dialog --msgbox "$(bectl list 2>/dev/null)" 15 70 ;; "create") local name=$(dialog --inputbox "New boot environment name:" 8 50 "" 3>&1 1>&2 2>&3) [ -n "$name" ] && bectl create "$name" 2>&1 | tee -a "$LOG" ;; "activate") local be_list=$(bectl list -H 2>/dev/null | awk '{print $1}') local name=$(dialog --menu "Select BE to activate:" 15 60 10 $(echo "$be_list" | while read -r be; do echo "$be \"\""; done) 3>&1 1>&2 2>&3) [ -n "$name" ] && bectl activate "$name" 2>&1 | tee -a "$LOG" ;; "destroy") local be_list=$(bectl list -H 2>/dev/null | awk '{print $1}') local name=$(dialog --menu "Select BE to destroy:" 15 60 10 $(echo "$be_list" | grep -v "^${active}$" | while read -r be; do echo "$be \"\""; done) 3>&1 1>&2 2>&3) if [ -n "$name" ] && [ "$name" != "$active" ]; then local confirm=$(dialog --yesno "\nDestroy boot environment '$name'?\n\nThis cannot be undone." 10 50) [ "$confirm" = "yes" ] && bectl destroy "$name" 2>&1 | tee -a "$LOG" fi ;; esac [ "$action" != "back" ] && menu_be_manager } menu_pool_migrate() { log "Pool migration selected" exec "${SHARE}/firstboot/zfs-pool-migrate.sh" } menu_import_ro() { log "Import read-only selected" import_pool_ro dialog --msgbox \ "\nPool '$POOL_NAME' imported read-only.\n\nData is accessible at:\n /$POOL_NAME/\n\nUseful for data recovery.\n\nExport when done:\n zpool export "$POOL_NAME"" \ 15 60 /bin/sh } main_menu() { local pool_status="" local pool_detected="" if zpool list "$POOL_NAME" >/dev/null 2>&1; then pool_detected="yes" pool_status=$(zpool status "$POOL_NAME" 2>/dev/null | grep "state:" | head -1 | awk -F: '{print $2}') fi local menu_items="" if [ "$pool_detected" = "yes" ]; then menu_items="\"1\" \"Pool Health Check — Status: ${pool_status}\" " menu_items="${menu_items}\"2\" \"Replace Failed Disk\" " menu_items="${menu_items}\"3\" \"Scrub Pool\" " menu_items="${menu_items}\"4\" \"Disk Space Cleanup\" " menu_items="${menu_items}\"5\" \"Boot Environments\" " menu_items="${menu_items}\"6\" \"Pool Migration\" " menu_items="${menu_items}\"7\" \"Import Read-Only\" " menu_items="${menu_items}\"8\" \"Export Pool & Reboot\" " else menu_items="\"1\" \"Import Pool (read-write)\" " menu_items="${menu_items}\"2\" \"Import Pool (read-only)\" " menu_items="${menu_items}\"3\" \"Shell\" " menu_items="${menu_items}\"4\" \"Reboot\" " fi local choice=$(dialog --menu \ "Clawdie Maintenance Mode\n\nPool: ${POOL_NAME:-Not imported}\n" \ 18 70 8 \ $menu_items \ 3>&1 1>&2 2>&3) case "$choice" in "1") if [ "$pool_detected" = "yes" ]; then menu_pool_health else import_pool_rw && main_menu fi ;; "2") if [ "$pool_detected" = "yes" ]; then menu_replace_disk else import_pool_ro && main_menu fi ;; "3") if [ "$pool_detected" = "yes" ]; then menu_pool_scrub else exec /bin/sh fi ;; "4") if [ "$pool_detected" = "yes" ]; then menu_disk_cleanup else reboot fi ;; "5") menu_be_manager ;; "6") menu_pool_migrate ;; "7") menu_import_ro ;; "8") export_pool reboot ;; esac main_menu } main() { log "Maintenance mode started" kldload zfs 2>/dev/null || true dialog --msgbox \ "\nClawdie Maintenance Mode\n\nThis mode provides tools for:\n - Pool health monitoring\n - Disk replacement\n - Data scrubbing\n - Space cleanup\n - Pool migration\n\nNo changes will be made without confirmation." \ 15 60 main_menu } main "$@"