colibri/packaging/freebsd/clawdie.in
Sam & Claude 1df13e06af fix(rc.d): supervisor-aware stop + bring clawdie.in to parity (Sam & Claude)
Follow-up to #17. Two issues with the daemon(8) `-r` + child-pidfile pattern:

1. Stop semantics (both services): with `-r`, rc.subr's default stop sends
   SIGTERM to the *child* pid — and the still-running daemon(8) supervisor
   respawns it ~1s later, so `service … stop` never actually stops it. Fix:
   add a `-P` supervisor pidfile and a custom stop_cmd that SIGTERMs the
   supervisor (which forwards to the child and exits without restarting),
   waits up to 30s, SIGKILL fallback, then cleans pidfiles. Child pidfile +
   unique procname are kept for accurate start/status.

2. clawdie.in parity: it still carried the pre-#17 pattern (`-P ${pidfile}`
   as the only pidfile + procname="/usr/sbin/daemon"), so `service clawdie
   status` could match tailscaled/colibri_daemon on a stale pidfile. Brought
   it to the same shape as colibri_daemon.in: child pidfile, procname="clawdie",
   supervisor pidfile, stop_cmd, socket-ready poststart, socket cleanup
   poststop, and a `health` command.

Packaging-only — no Rust touched, no rebuild needed. `sh -n` clean on both;
stop algorithm exercised standalone (kills supervisor, idempotent). FreeBSD
start/stop/status/restart validation still owed on OSA.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 10:49:19 +02:00

177 lines
6.2 KiB
Bash

#!/bin/sh
#
# clawdie — FreeBSD rc.d service for the simplified Colibri agent.
#
# Operator-friendly by design: enable it and start it. The two credentials
# (Telegram bot token + DeepSeek key) are normally baked into the binary at
# build time (see crates/clawdie/build.rs), so a baked ISO needs no config.
# To override per-host, drop them in the env file (default
# /usr/local/etc/clawdie/clawdie.env) or set them in rc.conf below.
#
# clawdie runs in the FOREGROUND (no self-daemonize, no pidfile), so rc.d runs
# it under daemon(8), which backgrounds it, restarts on crash, drops privileges
# to the clawdie user, and redirects stdout/stderr (tracing) to a logfile.
#
# Install:
# pw groupadd clawdie
# pw useradd clawdie -g clawdie -d /var/db/clawdie -s /usr/sbin/nologin
# cp packaging/freebsd/clawdie.in /usr/local/etc/rc.d/clawdie
# chmod 555 /usr/local/etc/rc.d/clawdie
# sysrc clawdie_enable=YES
# service clawdie start
#
# Requires the clawdie binary at /usr/local/bin/clawdie.
# PROVIDE: clawdie
# REQUIRE: LOGIN NETWORKING cleanvar
# KEYWORD: shutdown
. /etc/rc.subr
name="clawdie"
rcvar="clawdie_enable"
load_rc_config $name
: ${clawdie_enable:="NO"}
: ${clawdie_user:="clawdie"}
: ${clawdie_group:="clawdie"}
: ${clawdie_program:="/usr/local/bin/clawdie"}
: ${clawdie_data_dir:="/var/db/clawdie"}
: ${clawdie_run_dir:="/var/run/clawdie"}
: ${clawdie_socket:="${clawdie_run_dir}/clawdie.sock"}
: ${clawdie_db_path:="${clawdie_data_dir}/clawdie.sqlite"}
: ${clawdie_logfile:="/var/log/clawdie/clawdie.log"}
: ${clawdie_host:="$(/bin/hostname)"}
: ${clawdie_env_file:="/usr/local/etc/clawdie/clawdie.env"}
pidfile="${clawdie_run_dir}/clawdie.pid"
# Supervisor pidfile (the daemon(8) parent). Kept distinct from the child
# pidfile so `stop` can target the supervisor — see clawdie_stop.
supervisor_pidfile="${clawdie_run_dir}/clawdie-supervisor.pid"
# Run clawdie under daemon(8):
# -P supervisor pidfile (the daemon(8) parent — used by stop)
# -p child pidfile (writes the clawdie PID — used by start/status)
# -r restart on crash, -t process title, -u drop to the clawdie user,
# -o append stdout/stderr to log.
command="/usr/sbin/daemon"
command_args="-P ${supervisor_pidfile} -p ${pidfile} -r -t ${name} -u ${clawdie_user} \
-o ${clawdie_logfile} ${clawdie_program}"
# Match the child binary so `service clawdie status` finds OUR process via the
# child pidfile, not the generic /usr/sbin/daemon supervisor (which would
# collide with tailscaled, colibri_daemon, and other daemon(8) services).
procname="clawdie"
start_precmd="clawdie_prestart"
start_postcmd="clawdie_poststart"
stop_cmd="clawdie_stop"
stop_postcmd="clawdie_poststop"
extra_commands="health"
clawdie_prestart()
{
# /var/run is tmpfs on FreeBSD (wiped each boot) — recreate every start.
install -d -o "${clawdie_user}" -g "${clawdie_group}" -m 0750 "${clawdie_run_dir}"
install -d -o "${clawdie_user}" -g "${clawdie_group}" -m 0750 "${clawdie_data_dir}"
install -d -o "${clawdie_user}" -g "${clawdie_group}" -m 0750 \
"$(/usr/bin/dirname "${clawdie_logfile}")"
# Control-plane config passed to the child via the environment.
# COLIBRI_DB_PATH is REQUIRED: without it the daemon falls back to
# /var/db/colibri/colibri.sqlite (the full Colibri daemon's path, owned by
# the colibri user), which the clawdie user cannot open — Store::open then
# panics and daemon(8) -r restart-loops. Keep clawdie's DB in its own dir.
export COLIBRI_DAEMON_DATA_DIR="${clawdie_data_dir}"
export COLIBRI_DAEMON_SOCKET="${clawdie_socket}"
export COLIBRI_DB_PATH="${clawdie_db_path}"
export COLIBRI_HOST="${clawdie_host}"
# Optional per-host credential overrides (binary already has baked defaults).
# File format: simple KEY=VALUE lines, e.g.
# CLAWDIE_TG_TOKEN=123456:abc
# CLAWDIE_DEEPSEEK_KEY=sk-...
if [ -r "${clawdie_env_file}" ]; then
set -a
. "${clawdie_env_file}"
set +a
fi
}
clawdie_poststart()
{
# Wait for the Herdr socket to appear (daemon forks, child binds socket).
local timeout=10
local waited=0
while [ ! -S "${clawdie_socket}" ] && [ $waited -lt $timeout ]; do
sleep 1
waited=$((waited + 1))
done
if [ -S "${clawdie_socket}" ]; then
echo "clawdie socket ready after ${waited}s"
else
echo "WARNING: clawdie socket not ready after ${timeout}s"
fi
}
clawdie_stop()
{
# daemon(8) -r restarts the child if it is killed directly, so a plain
# SIGTERM to the child pidfile would just be undone. Stop the supervisor
# instead: on SIGTERM it forwards the signal to the child and exits without
# restarting it.
local _sup=""
[ -f "${supervisor_pidfile}" ] && _sup=$(cat "${supervisor_pidfile}" 2>/dev/null)
if [ -n "${_sup}" ] && kill -0 "${_sup}" 2>/dev/null; then
echo "Stopping ${name} (daemon(8) supervisor pid ${_sup})."
kill -TERM "${_sup}" 2>/dev/null
local _n=0
while kill -0 "${_sup}" 2>/dev/null && [ ${_n} -lt 30 ]; do
sleep 1
_n=$((_n + 1))
done
if kill -0 "${_sup}" 2>/dev/null; then
echo "Supervisor did not exit in time; sending SIGKILL."
kill -KILL "${_sup}" 2>/dev/null
fi
else
echo "${name} is not running."
fi
# Belt-and-suspenders: terminate the child if it somehow outlived the
# supervisor (e.g. supervisor SIGKILLed before it could clean up).
local _ch=""
[ -f "${pidfile}" ] && _ch=$(cat "${pidfile}" 2>/dev/null)
if [ -n "${_ch}" ] && kill -0 "${_ch}" 2>/dev/null; then
kill -TERM "${_ch}" 2>/dev/null
fi
rm -f "${supervisor_pidfile}" "${pidfile}"
}
clawdie_poststop()
{
# Clean up tmpfs artifacts on graceful shutdown.
if [ -S "${clawdie_socket}" ]; then
rm -f "${clawdie_socket}"
fi
}
health_cmd="clawdie_health"
clawdie_health()
{
if [ -S "${clawdie_socket}" ]; then
if printf '{"cmd":"status"}\n' | nc -U "${clawdie_socket}" -w 2 >/dev/null 2>&1; then
echo "clawdie is healthy (socket responding)"
return 0
else
echo "clawdie socket exists but not responding"
return 1
fi
else
echo "clawdie socket not found"
return 1
fi
}
run_rc_command "$1"