- shell-system.sh: exit → return in sourced module (would kill firstboot) - maintenance-mode.sh: replace bash += with POSIX concat, fix subshell scope in pipe-to-while-read (vdev_status/failed_disks were always empty), quote all $POOL_NAME and $disk expansions - build-vps.sh: portable _sed_i() wrapper for FreeBSD/Linux - firstboot.sh: set -eu; set USB_PKG_PATH to SHARE/packages so offline package repo works after HDD boot (was defaulting to /mnt/media) - firstboot-vps.sh: remove plaintext password log, check loader.efi exists before EFI copy Integration test: PASS (7/7 modules) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
486 lines
16 KiB
Bash
486 lines
16 KiB
Bash
#!/bin/sh
|
|
# maintenance-mode.sh — Clawdie pool maintenance and recovery
|
|
#
|
|
# Provides:
|
|
# 1. Pool health check
|
|
# 2. Replace failed disk
|
|
# 3. Scrub pool
|
|
# 4. Disk space cleanup
|
|
# 5. Boot environment management
|
|
# 6. Pool migration
|
|
# 7. Import read-only (data recovery)
|
|
|
|
set -e
|
|
|
|
POOL_NAME="clawdie"
|
|
LOG="/var/log/clawdie-maintenance.log"
|
|
SHARE="/usr/local/share/clawdie-iso"
|
|
|
|
dialog() { bsddialog --backtitle "Clawdie Maintenance Mode" "$@" ; }
|
|
die() { echo "ERROR: $1" >&2; exit 1; }
|
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG"; }
|
|
|
|
. "${SHARE}/build.cfg"
|
|
|
|
get_disk_info() {
|
|
local disk="$1"
|
|
local info=""
|
|
|
|
local model=$(camcontrol inquiry "$disk" 2>/dev/null | head -1)
|
|
local serial=$(camcontrol identify "$disk" 2>/dev/null | grep -i 'serial number' | cut -d: -f2 | tr -d ' ')
|
|
local size_bytes=$(diskinfo "$disk" 2>/dev/null | awk '{print $3}')
|
|
local size_gb=$((size_bytes / 1073741824))
|
|
local temp=$(smartctl -A "$disk" 2>/dev/null | grep -i temperature | awk '{print $10}')
|
|
local health=$(smartctl -H "$disk" 2>/dev/null | grep -E '^SMART overall-health' | awk '{print $NF}')
|
|
local slot=$(sesutil map 2>/dev/null | grep "^${disk}" | awk '{print $3}')
|
|
|
|
info="${disk} — ${model} (${size_gb}GB)"
|
|
[ -n "$serial" ] && info="${info} S/N:${serial}"
|
|
[ -n "$temp" ] && info="${info} ${temp}°C"
|
|
[ -n "$health" ] && info="${info} ${health}"
|
|
[ -n "$slot" ] && info="${info} Slot:${slot}"
|
|
|
|
echo "$info"
|
|
}
|
|
|
|
import_pool_rw() {
|
|
log "Importing pool $POOL_NAME (read-write)"
|
|
|
|
if zpool list "$POOL_NAME" >/dev/null 2>&1; then
|
|
log "Pool already imported"
|
|
return 0
|
|
fi
|
|
|
|
if ! zpool import "$POOL_NAME" 2>&1 | tee -a "$LOG"; then
|
|
dialog --msgbox "\nFailed to import pool '$POOL_NAME'.\n\nCheck logs: $LOG" 10 50
|
|
return 1
|
|
fi
|
|
|
|
log "Pool imported successfully"
|
|
return 0
|
|
}
|
|
|
|
import_pool_ro() {
|
|
log "Importing pool $POOL_NAME (read-only)"
|
|
|
|
if ! zpool import -o readonly=on "$POOL_NAME" 2>&1 | tee -a "$LOG"; then
|
|
dialog --msgbox "\nFailed to import pool '$POOL_NAME' (read-only).\n\nCheck logs: $LOG" 10 50
|
|
return 1
|
|
fi
|
|
|
|
log "Pool imported read-only"
|
|
return 0
|
|
}
|
|
|
|
export_pool() {
|
|
log "Exporting pool $POOL_NAME"
|
|
zpool export "$POOL_NAME" 2>&1 | tee -a "$LOG"
|
|
}
|
|
|
|
menu_pool_health() {
|
|
log "Pool health check selected"
|
|
|
|
import_pool_rw || return 1
|
|
|
|
local status=$(zpool status "$POOL_NAME" 2>/dev/null)
|
|
local state=$(echo "$status" | grep "state:" | head -1)
|
|
local scan=$(echo "$status" | grep -A2 "scan:")
|
|
local errors=$(echo "$status" | grep -A5 "errors:")
|
|
|
|
local vdev_status=""
|
|
local line=""
|
|
while IFS= read -r line; do
|
|
if echo "$line" | grep -qE '^\s+(ada|da|nvd|nda)'; then
|
|
local disk=$(echo "$line" | awk '{print $1}')
|
|
local disk_state=$(echo "$line" | awk '{print $2}')
|
|
local read_err=$(echo "$line" | awk '{print $3}')
|
|
local write_err=$(echo "$line" | awk '{print $4}')
|
|
local cksum_err=$(echo "$line" | awk '{print $5}')
|
|
|
|
local disk_info=$(get_disk_info "$disk")
|
|
vdev_status="${vdev_status}${disk_info}\n State: ${disk_state} Errors: R:${read_err} W:${write_err} C:${cksum_err}\n\n"
|
|
fi
|
|
done <<EOF
|
|
$status
|
|
EOF
|
|
|
|
local msg="Pool: $POOL_NAME\n${state}\n\n${scan}\n\n${vdev_status}${errors}"
|
|
|
|
local action=$(dialog --menu "$msg" 20 75 3 \
|
|
"scrub" "Start scrub (verify data integrity)" \
|
|
"back" "Back to main menu" \
|
|
3>&1 1>&2 2>&3)
|
|
|
|
case "$action" in
|
|
"scrub")
|
|
menu_pool_scrub
|
|
;;
|
|
esac
|
|
}
|
|
|
|
menu_replace_disk() {
|
|
log "Replace failed disk selected"
|
|
|
|
import_pool_rw || return 1
|
|
|
|
local status=$(zpool status "$POOL_NAME" 2>/dev/null)
|
|
local failed_disks=""
|
|
local line=""
|
|
|
|
while IFS= read -r line; do
|
|
if echo "$line" | grep -qE '^\s+(ada|da|nvd|nda)'; then
|
|
local disk=$(echo "$line" | awk '{print $1}')
|
|
local dstate=$(echo "$line" | awk '{print $2}')
|
|
|
|
if [ "$dstate" != "ONLINE" ]; then
|
|
local disk_info=$(get_disk_info "$disk")
|
|
failed_disks="${failed_disks}${disk} \"${disk_info} (${dstate})\" "
|
|
fi
|
|
fi
|
|
done <<EOF
|
|
$status
|
|
EOF
|
|
|
|
if [ -z "$failed_disks" ]; then
|
|
dialog --msgbox "\nNo failed disks detected.\n\nAll disks are ONLINE." 10 50
|
|
return 0
|
|
fi
|
|
|
|
local failed_disk=$(dialog --menu \
|
|
"Select failed disk to replace:\n\n${failed_disks}" \
|
|
15 70 5 \
|
|
$failed_disks \
|
|
3>&1 1>&2 2>&3)
|
|
|
|
[ $? -ne 0 ] && return 0
|
|
|
|
local pool_disks=$(zpool status "$POOL_NAME" 2>/dev/null | grep -E '^\s+(ada|da|nvd|nda)' | awk '{print $1}' | sort -u)
|
|
local all_disks=$(camcontrol devlist 2>/dev/null | grep -oE '(ada|da|nvd|nda)[0-9]+' | sort -u)
|
|
local available=""
|
|
|
|
for d in $all_disks; do
|
|
if ! echo "$pool_disks" | grep -qw "$d"; then
|
|
local info=$(get_disk_info $d)
|
|
available="${available}${d} \"${info}\" "
|
|
fi
|
|
done
|
|
|
|
if [ -z "$available" ]; then
|
|
dialog --msgbox "\nNo replacement disks available.\n\nConnect a replacement disk and restart." 10 50
|
|
return 1
|
|
fi
|
|
|
|
local replacement=$(dialog --menu \
|
|
"Select replacement disk for ${failed_disk}:" \
|
|
15 70 10 \
|
|
$available \
|
|
3>&1 1>&2 2>&3)
|
|
|
|
[ $? -ne 0 ] && return 0
|
|
|
|
local failed_size=$(diskinfo "$failed_disk" 2>/dev/null | awk '{print $3}')
|
|
local replace_size=$(diskinfo "$replacement" 2>/dev/null | awk '{print $3}')
|
|
|
|
if [ "$replace_size" -lt "$failed_size" ]; then
|
|
dialog --msgbox "\nReplacement disk is smaller than failed disk.\n\nUse a disk of equal or larger size." 10 50
|
|
return 1
|
|
fi
|
|
|
|
local confirm=$(dialog --yesno \
|
|
"\nConfirm disk replacement:\n\n ${failed_disk} → ${replacement}\n\nThis will take 4-8 hours.\nPool remains usable during resilver." \
|
|
12 60)
|
|
|
|
[ "$confirm" != "yes" ] && return 0
|
|
|
|
log "Replacing $failed_disk with $replacement"
|
|
|
|
zpool replace "$POOL_NAME" "$failed_disk" "$replacement" 2>&1 | tee -a "$LOG"
|
|
|
|
dialog --msgbox \
|
|
"\nDisk replacement started.\n\nMonitor progress:\n zpool status $POOL_NAME\n\nPool remains usable.\nDo not power off until resilver completes." \
|
|
12 60
|
|
}
|
|
|
|
menu_pool_scrub() {
|
|
log "Pool scrub selected"
|
|
|
|
import_pool_rw || return 1
|
|
|
|
local status=$(zpool status "$POOL_NAME" 2>/dev/null)
|
|
local last_scrub=$(echo "$status" | grep -A1 "scan:" | grep "scrub" | head -1)
|
|
|
|
local action=$(dialog --menu \
|
|
"Pool Scrub\n\nLast scrub:\n${last_scrub:-None}\n\nScrub reads all data and verifies checksums.\nRepairs silent corruption if redundancy exists." \
|
|
15 60 3 \
|
|
"start" "Start scrub now" \
|
|
"status" "Check scrub status" \
|
|
"stop" "Stop running scrub" \
|
|
"back" "Back to main menu" \
|
|
3>&1 1>&2 2>&3)
|
|
|
|
case "$action" in
|
|
"start")
|
|
zpool scrub "$POOL_NAME" 2>&1 | tee -a "$LOG"
|
|
dialog --msgbox "\nScrub started.\n\nPool remains usable during scrub.\nMonitor: zpool status $POOL_NAME" 10 50
|
|
;;
|
|
"status")
|
|
local scrub_status=$(zpool status "$POOL_NAME" 2>/dev/null | grep -A5 "scan:")
|
|
dialog --msgbox "${scrub_status}" 15 60
|
|
;;
|
|
"stop")
|
|
local confirm=$(dialog --yesno "\nStop running scrub?\n\nThis is safe but not recommended." 10 50)
|
|
[ "$confirm" = "yes" ] && zpool scrub -s "$POOL_NAME" 2>&1 | tee -a "$LOG"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
menu_disk_cleanup() {
|
|
log "Disk space cleanup selected"
|
|
|
|
import_pool_rw || return 1
|
|
|
|
local used=$(zfs list -Hp -o used "$POOL_NAME" 2>/dev/null)
|
|
local avail=$(zfs list -Hp -o avail "$POOL_NAME" 2>/dev/null)
|
|
local total=$((used + avail))
|
|
local used_gb=$((used / 1073741824))
|
|
local total_gb=$((total / 1073741824))
|
|
local percent=$((used * 100 / total))
|
|
|
|
local action=$(dialog --menu \
|
|
"Disk Space Cleanup\n\nPool: $POOL_NAME\nUsed: ${used_gb} GB / ${total_gb} GB (${percent}%)\n\nSelect cleanup type:" \
|
|
18 70 6 \
|
|
"quick" "Quick cleanup (pkg cache, npm cache, old logs)" \
|
|
"snapshots" "Snapshot cleanup (delete old snapshots)" \
|
|
"be" "Boot environment cleanup (delete old BEs)" \
|
|
"large" "Find large files (>100MB)" \
|
|
"analyze" "Analyze disk usage by dataset" \
|
|
"back" "Back to main menu" \
|
|
3>&1 1>&2 2>&3)
|
|
|
|
case "$action" in
|
|
"quick")
|
|
local confirm=$(dialog --yesno \
|
|
"\nQuick cleanup will remove:\n\n - Package cache (/var/cache/pkg)\n - npm cache (~/.npm)\n - Old logs (>7 days)\n - Temporary files\n - Crash dumps\n\nContinue?" \
|
|
12 60)
|
|
|
|
if [ "$confirm" = "yes" ]; then
|
|
log "Running quick cleanup"
|
|
|
|
rm -rf /var/cache/pkg/* 2>/dev/null
|
|
rm -rf /home/*/.npm 2>/dev/null
|
|
find /var/log -type f -mtime +7 -delete 2>/dev/null
|
|
rm -rf /tmp/* 2>/dev/null
|
|
rm -rf /var/tmp/* 2>/dev/null
|
|
rm -rf /var/crash/* 2>/dev/null
|
|
|
|
dialog --msgbox "\nQuick cleanup complete." 8 40
|
|
fi
|
|
;;
|
|
|
|
"snapshots")
|
|
local snapshots=$(zfs list -t snapshot -r -H -o name "$POOL_NAME" 2>/dev/null)
|
|
local snapshot_count=$(echo "$snapshots" | wc -l)
|
|
|
|
local confirm=$(dialog --yesno \
|
|
"\nFound ${snapshot_count} snapshots.\n\nDelete snapshots older than 30 days?" \
|
|
10 50)
|
|
|
|
if [ "$confirm" = "yes" ]; then
|
|
log "Deleting old snapshots"
|
|
while read -r snap; do
|
|
local creation=$(zfs get -Hp -o value creation "$snap" 2>/dev/null)
|
|
local age_days=$(( ($(date +%s) - creation) / 86400 ))
|
|
|
|
if [ "$age_days" -gt 30 ]; then
|
|
zfs destroy "$snap" 2>&1 | tee -a "$LOG"
|
|
log "Deleted snapshot: $snap (age: ${age_days} days)"
|
|
fi
|
|
done <<EOF
|
|
$snapshots
|
|
EOF
|
|
|
|
dialog --msgbox "\nSnapshot cleanup complete." 8 40
|
|
fi
|
|
;;
|
|
|
|
"be")
|
|
local bes=$(bectl list -H 2>/dev/null | awk '{print $1}')
|
|
local active=$(bectl list 2>/dev/null | grep -E '^\S+\s+.*\s+.*\s+R' | awk '{print $1}')
|
|
|
|
dialog --msgbox \
|
|
"\nBoot Environments\n\nActive: ${active:-unknown}\n\nAvailable:\n${bes}\n\nUse 'bectl destroy <name>' to remove old BEs." \
|
|
15 60
|
|
;;
|
|
|
|
"large")
|
|
dialog --infobox "Scanning for large files..." 5 50
|
|
|
|
local large_files=$(find / -type f -size +100M 2>/dev/null | head -20)
|
|
|
|
dialog --msgbox \
|
|
"\nLarge files (>100MB):\n\n${large_files:-None found}\n\nReview and delete manually if needed." \
|
|
20 70
|
|
;;
|
|
|
|
"analyze")
|
|
local datasets=$(zfs list -r -H -o name,used "$POOL_NAME" 2>/dev/null | sort -k2 -rn | head -15)
|
|
|
|
dialog --msgbox "\nDatasets by size:\n\n${datasets}" 20 70
|
|
;;
|
|
esac
|
|
}
|
|
|
|
menu_be_manager() {
|
|
log "Boot environment manager selected"
|
|
|
|
import_pool_rw || return 1
|
|
|
|
local bes=$(bectl list -H 2>/dev/null)
|
|
local active=$(bectl list 2>/dev/null | grep -E '^\S+\s+.*\s+.*\s+R' | awk '{print $1}')
|
|
|
|
local action=$(dialog --menu \
|
|
"Boot Environments\n\nActive: ${active:-unknown}\n\nSelect action:" \
|
|
15 60 4 \
|
|
"list" "List all boot environments" \
|
|
"create" "Create new boot environment" \
|
|
"activate" "Activate a boot environment" \
|
|
"destroy" "Destroy a boot environment" \
|
|
"back" "Back to main menu" \
|
|
3>&1 1>&2 2>&3)
|
|
|
|
case "$action" in
|
|
"list")
|
|
dialog --msgbox "$(bectl list 2>/dev/null)" 15 70
|
|
;;
|
|
"create")
|
|
local name=$(dialog --inputbox "New boot environment name:" 8 50 "" 3>&1 1>&2 2>&3)
|
|
[ -n "$name" ] && bectl create "$name" 2>&1 | tee -a "$LOG"
|
|
;;
|
|
"activate")
|
|
local be_list=$(bectl list -H 2>/dev/null | awk '{print $1}')
|
|
local name=$(dialog --menu "Select BE to activate:" 15 60 10 $(echo "$be_list" | while read -r be; do echo "$be \"\""; done) 3>&1 1>&2 2>&3)
|
|
[ -n "$name" ] && bectl activate "$name" 2>&1 | tee -a "$LOG"
|
|
;;
|
|
"destroy")
|
|
local be_list=$(bectl list -H 2>/dev/null | awk '{print $1}')
|
|
local name=$(dialog --menu "Select BE to destroy:" 15 60 10 $(echo "$be_list" | grep -v "^${active}$" | while read -r be; do echo "$be \"\""; done) 3>&1 1>&2 2>&3)
|
|
if [ -n "$name" ] && [ "$name" != "$active" ]; then
|
|
local confirm=$(dialog --yesno "\nDestroy boot environment '$name'?\n\nThis cannot be undone." 10 50)
|
|
[ "$confirm" = "yes" ] && bectl destroy "$name" 2>&1 | tee -a "$LOG"
|
|
fi
|
|
;;
|
|
esac
|
|
|
|
[ "$action" != "back" ] && menu_be_manager
|
|
}
|
|
|
|
menu_pool_migrate() {
|
|
log "Pool migration selected"
|
|
exec "${SHARE}/firstboot/zfs-pool-migrate.sh"
|
|
}
|
|
|
|
menu_import_ro() {
|
|
log "Import read-only selected"
|
|
|
|
import_pool_ro
|
|
|
|
dialog --msgbox \
|
|
"\nPool '$POOL_NAME' imported read-only.\n\nData is accessible at:\n /$POOL_NAME/\n\nUseful for data recovery.\n\nExport when done:\n zpool export "$POOL_NAME"" \
|
|
15 60
|
|
|
|
/bin/sh
|
|
}
|
|
|
|
main_menu() {
|
|
local pool_status=""
|
|
local pool_detected=""
|
|
|
|
if zpool list "$POOL_NAME" >/dev/null 2>&1; then
|
|
pool_detected="yes"
|
|
pool_status=$(zpool status "$POOL_NAME" 2>/dev/null | grep "state:" | head -1 | awk -F: '{print $2}')
|
|
fi
|
|
|
|
local menu_items=""
|
|
|
|
if [ "$pool_detected" = "yes" ]; then
|
|
menu_items="\"1\" \"Pool Health Check — Status: ${pool_status}\" "
|
|
menu_items="${menu_items}\"2\" \"Replace Failed Disk\" "
|
|
menu_items="${menu_items}\"3\" \"Scrub Pool\" "
|
|
menu_items="${menu_items}\"4\" \"Disk Space Cleanup\" "
|
|
menu_items="${menu_items}\"5\" \"Boot Environments\" "
|
|
menu_items="${menu_items}\"6\" \"Pool Migration\" "
|
|
menu_items="${menu_items}\"7\" \"Import Read-Only\" "
|
|
menu_items="${menu_items}\"8\" \"Export Pool & Reboot\" "
|
|
else
|
|
menu_items="\"1\" \"Import Pool (read-write)\" "
|
|
menu_items="${menu_items}\"2\" \"Import Pool (read-only)\" "
|
|
menu_items="${menu_items}\"3\" \"Shell\" "
|
|
menu_items="${menu_items}\"4\" \"Reboot\" "
|
|
fi
|
|
|
|
local choice=$(dialog --menu \
|
|
"Clawdie Maintenance Mode\n\nPool: ${POOL_NAME:-Not imported}\n" \
|
|
18 70 8 \
|
|
$menu_items \
|
|
3>&1 1>&2 2>&3)
|
|
|
|
case "$choice" in
|
|
"1")
|
|
if [ "$pool_detected" = "yes" ]; then
|
|
menu_pool_health
|
|
else
|
|
import_pool_rw && main_menu
|
|
fi
|
|
;;
|
|
"2")
|
|
if [ "$pool_detected" = "yes" ]; then
|
|
menu_replace_disk
|
|
else
|
|
import_pool_ro && main_menu
|
|
fi
|
|
;;
|
|
"3")
|
|
if [ "$pool_detected" = "yes" ]; then
|
|
menu_pool_scrub
|
|
else
|
|
exec /bin/sh
|
|
fi
|
|
;;
|
|
"4")
|
|
if [ "$pool_detected" = "yes" ]; then
|
|
menu_disk_cleanup
|
|
else
|
|
reboot
|
|
fi
|
|
;;
|
|
"5")
|
|
menu_be_manager
|
|
;;
|
|
"6")
|
|
menu_pool_migrate
|
|
;;
|
|
"7")
|
|
menu_import_ro
|
|
;;
|
|
"8")
|
|
export_pool
|
|
reboot
|
|
;;
|
|
esac
|
|
|
|
main_menu
|
|
}
|
|
|
|
main() {
|
|
log "Maintenance mode started"
|
|
|
|
kldload zfs 2>/dev/null || true
|
|
|
|
dialog --msgbox \
|
|
"\nClawdie Maintenance Mode\n\nThis mode provides tools for:\n - Pool health monitoring\n - Disk replacement\n - Data scrubbing\n - Space cleanup\n - Pool migration\n\nNo changes will be made without confirmation." \
|
|
15 60
|
|
|
|
main_menu
|
|
}
|
|
|
|
main "$@"
|