From 1df13e06af27478690eba457aa2e2dcf954dfd52 Mon Sep 17 00:00:00 2001 From: Sam & Claude Date: Thu, 4 Jun 2026 10:49:19 +0200 Subject: [PATCH] fix(rc.d): supervisor-aware stop + bring clawdie.in to parity (Sam & Claude) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #17. Two issues with the daemon(8) `-r` + child-pidfile pattern: 1. Stop semantics (both services): with `-r`, rc.subr's default stop sends SIGTERM to the *child* pid — and the still-running daemon(8) supervisor respawns it ~1s later, so `service … stop` never actually stops it. Fix: add a `-P` supervisor pidfile and a custom stop_cmd that SIGTERMs the supervisor (which forwards to the child and exits without restarting), waits up to 30s, SIGKILL fallback, then cleans pidfiles. Child pidfile + unique procname are kept for accurate start/status. 2. clawdie.in parity: it still carried the pre-#17 pattern (`-P ${pidfile}` as the only pidfile + procname="/usr/sbin/daemon"), so `service clawdie status` could match tailscaled/colibri_daemon on a stale pidfile. Brought it to the same shape as colibri_daemon.in: child pidfile, procname="clawdie", supervisor pidfile, stop_cmd, socket-ready poststart, socket cleanup poststop, and a `health` command. Packaging-only — no Rust touched, no rebuild needed. `sh -n` clean on both; stop algorithm exercised standalone (kills supervisor, idempotent). FreeBSD start/stop/status/restart validation still owed on OSA. Co-Authored-By: Claude Opus 4.8 --- packaging/freebsd/clawdie.in | 103 +++++++++++++++++++++++++--- packaging/freebsd/colibri_daemon.in | 47 +++++++++++-- 2 files changed, 137 insertions(+), 13 deletions(-) diff --git a/packaging/freebsd/clawdie.in b/packaging/freebsd/clawdie.in index f40abb5..0b314af 100644 --- a/packaging/freebsd/clawdie.in +++ b/packaging/freebsd/clawdie.in @@ -9,9 +9,8 @@ # /usr/local/etc/clawdie/clawdie.env) or set them in rc.conf below. # # clawdie runs in the FOREGROUND (no self-daemonize, no pidfile), so rc.d runs -# it under daemon(8), which backgrounds it, writes the supervisor pidfile, -# restarts on crash, drops privileges to the clawdie user, and redirects -# stdout/stderr (tracing) to a logfile. +# it under daemon(8), which backgrounds it, restarts on crash, drops privileges +# to the clawdie user, and redirects stdout/stderr (tracing) to a logfile. # # Install: # pw groupadd clawdie @@ -47,18 +46,29 @@ load_rc_config $name : ${clawdie_env_file:="/usr/local/etc/clawdie/clawdie.env"} pidfile="${clawdie_run_dir}/clawdie.pid" +# Supervisor pidfile (the daemon(8) parent). Kept distinct from the child +# pidfile so `stop` can target the supervisor — see clawdie_stop. +supervisor_pidfile="${clawdie_run_dir}/clawdie-supervisor.pid" -# Supervise via daemon(8): -P supervisor pidfile, -r restart on exit, -t title, -# -u drop privileges, -o append stdout/stderr to the logfile. +# Run clawdie under daemon(8): +# -P supervisor pidfile (the daemon(8) parent — used by stop) +# -p child pidfile (writes the clawdie PID — used by start/status) +# -r restart on crash, -t process title, -u drop to the clawdie user, +# -o append stdout/stderr to log. command="/usr/sbin/daemon" -command_args="-P ${pidfile} -r -t ${name} -u ${clawdie_user} \ +command_args="-P ${supervisor_pidfile} -p ${pidfile} -r -t ${name} -u ${clawdie_user} \ -o ${clawdie_logfile} ${clawdie_program}" -# rc.subr matches the pidfile process against ${procname}; that is daemon(8), -# the supervised parent — not clawdie itself. -procname="/usr/sbin/daemon" +# Match the child binary so `service clawdie status` finds OUR process via the +# child pidfile, not the generic /usr/sbin/daemon supervisor (which would +# collide with tailscaled, colibri_daemon, and other daemon(8) services). +procname="clawdie" start_precmd="clawdie_prestart" +start_postcmd="clawdie_poststart" +stop_cmd="clawdie_stop" +stop_postcmd="clawdie_poststop" +extra_commands="health" clawdie_prestart() { @@ -89,4 +99,79 @@ clawdie_prestart() fi } +clawdie_poststart() +{ + # Wait for the Herdr socket to appear (daemon forks, child binds socket). + local timeout=10 + local waited=0 + while [ ! -S "${clawdie_socket}" ] && [ $waited -lt $timeout ]; do + sleep 1 + waited=$((waited + 1)) + done + + if [ -S "${clawdie_socket}" ]; then + echo "clawdie socket ready after ${waited}s" + else + echo "WARNING: clawdie socket not ready after ${timeout}s" + fi +} + +clawdie_stop() +{ + # daemon(8) -r restarts the child if it is killed directly, so a plain + # SIGTERM to the child pidfile would just be undone. Stop the supervisor + # instead: on SIGTERM it forwards the signal to the child and exits without + # restarting it. + local _sup="" + [ -f "${supervisor_pidfile}" ] && _sup=$(cat "${supervisor_pidfile}" 2>/dev/null) + if [ -n "${_sup}" ] && kill -0 "${_sup}" 2>/dev/null; then + echo "Stopping ${name} (daemon(8) supervisor pid ${_sup})." + kill -TERM "${_sup}" 2>/dev/null + local _n=0 + while kill -0 "${_sup}" 2>/dev/null && [ ${_n} -lt 30 ]; do + sleep 1 + _n=$((_n + 1)) + done + if kill -0 "${_sup}" 2>/dev/null; then + echo "Supervisor did not exit in time; sending SIGKILL." + kill -KILL "${_sup}" 2>/dev/null + fi + else + echo "${name} is not running." + fi + # Belt-and-suspenders: terminate the child if it somehow outlived the + # supervisor (e.g. supervisor SIGKILLed before it could clean up). + local _ch="" + [ -f "${pidfile}" ] && _ch=$(cat "${pidfile}" 2>/dev/null) + if [ -n "${_ch}" ] && kill -0 "${_ch}" 2>/dev/null; then + kill -TERM "${_ch}" 2>/dev/null + fi + rm -f "${supervisor_pidfile}" "${pidfile}" +} + +clawdie_poststop() +{ + # Clean up tmpfs artifacts on graceful shutdown. + if [ -S "${clawdie_socket}" ]; then + rm -f "${clawdie_socket}" + fi +} + +health_cmd="clawdie_health" +clawdie_health() +{ + if [ -S "${clawdie_socket}" ]; then + if printf '{"cmd":"status"}\n' | nc -U "${clawdie_socket}" -w 2 >/dev/null 2>&1; then + echo "clawdie is healthy (socket responding)" + return 0 + else + echo "clawdie socket exists but not responding" + return 1 + fi + else + echo "clawdie socket not found" + return 1 + fi +} + run_rc_command "$1" diff --git a/packaging/freebsd/colibri_daemon.in b/packaging/freebsd/colibri_daemon.in index 57c526b..e78f65c 100644 --- a/packaging/freebsd/colibri_daemon.in +++ b/packaging/freebsd/colibri_daemon.in @@ -47,12 +47,17 @@ load_rc_config $name : ${colibri_cost_mode:="smart"} pidfile="${colibri_daemon_run_dir}/colibri-daemon.pid" +# Supervisor pidfile (the daemon(8) parent). Kept distinct from the child +# pidfile so `stop` can target the supervisor — see colibri_daemon_stop. +supervisor_pidfile="${colibri_daemon_run_dir}/colibri-daemon-supervisor.pid" -# Run colibri-daemon under daemon(8): -p child pidfile (writes colibri-daemon PID), -# -r restart on exit, -t process title, -u drop to the colibri user, -o append -# stdout/stderr to log. +# Run colibri-daemon under daemon(8): +# -P supervisor pidfile (the daemon(8) parent — used by stop) +# -p child pidfile (writes colibri-daemon PID — used by start/status) +# -r restart on crash, -t process title, -u drop to the colibri user, +# -o append stdout/stderr to log. command="/usr/sbin/daemon" -command_args="-p ${pidfile} -r -t ${name} -u ${colibri_daemon_user} \ +command_args="-P ${supervisor_pidfile} -p ${pidfile} -r -t ${name} -u ${colibri_daemon_user} \ -o ${colibri_daemon_logfile} ${colibri_daemon_program}" # Use the child's process name so rc.subr can find the right process via the @@ -62,6 +67,7 @@ procname="colibri-daemon" start_precmd="colibri_daemon_prestart" start_postcmd="colibri_daemon_poststart" +stop_cmd="colibri_daemon_stop" stop_postcmd="colibri_daemon_poststop" extra_commands="health" @@ -100,6 +106,39 @@ colibri_daemon_poststart() fi } +colibri_daemon_stop() +{ + # daemon(8) -r restarts the child if it is killed directly, so a plain + # SIGTERM to the child pidfile would just be undone. Stop the supervisor + # instead: on SIGTERM it forwards the signal to the child and exits without + # restarting it. + local _sup="" + [ -f "${supervisor_pidfile}" ] && _sup=$(cat "${supervisor_pidfile}" 2>/dev/null) + if [ -n "${_sup}" ] && kill -0 "${_sup}" 2>/dev/null; then + echo "Stopping ${name} (daemon(8) supervisor pid ${_sup})." + kill -TERM "${_sup}" 2>/dev/null + local _n=0 + while kill -0 "${_sup}" 2>/dev/null && [ ${_n} -lt 30 ]; do + sleep 1 + _n=$((_n + 1)) + done + if kill -0 "${_sup}" 2>/dev/null; then + echo "Supervisor did not exit in time; sending SIGKILL." + kill -KILL "${_sup}" 2>/dev/null + fi + else + echo "${name} is not running." + fi + # Belt-and-suspenders: terminate the child if it somehow outlived the + # supervisor (e.g. supervisor SIGKILLed before it could clean up). + local _ch="" + [ -f "${pidfile}" ] && _ch=$(cat "${pidfile}" 2>/dev/null) + if [ -n "${_ch}" ] && kill -0 "${_ch}" 2>/dev/null; then + kill -TERM "${_ch}" 2>/dev/null + fi + rm -f "${supervisor_pidfile}" "${pidfile}" +} + colibri_daemon_poststop() { # Clean up tmpfs artifacts on graceful shutdown.