#!/bin/sh
# Clawdie operator USB live seed importer.
# Reads operator-provided files from the FAT32 CLAWDIESEED partition
# (mountable on Linux/macOS/Windows pre- or post-flash) and installs them into
# the live system at every boot. Idempotent by design: editing the seed and
# rebooting re-applies it. Failure is never fatal — a missing/empty/unmountable
# seed logs and continues so the operator can still log in via SDDM.
#
# Two layers of contract (see clawdie-live-seed.README.txt for the operator view):
#   1. Legacy top-level allowlist: /authorized_keys, /ssh/authorized_keys.
#   2. Per-agent directories: /<agent-name>/ holding env, soul/, harness.toml,
#      ssh/authorized_keys. On the live USB (single agent) the first agent dir
#      is activated for the clawdie user; on a deployed host the importer loops
#      every dir, staging the rest (multi-agent provisioning is a follow-up seam).
#
# SECURITY: this partition is plaintext FAT32. By operator decision the env
# files here may carry secrets (provider API keys, vault-bootstrap creds).
# Treat seeded sticks as secret-bearing media. The importer lands secrets 0600
# owned by the agent user and supports optional consume-and-shred (a /shred
# marker file on the seed) to wipe env files after import.
#
# Runs before LOGIN so sshd sees imported authorized_keys on first attach.

# PROVIDE: clawdie_live_seed
# REQUIRE: FILESYSTEMS devfs
# BEFORE: LOGIN
# KEYWORD: nojail

# rc.subr only exists on the FreeBSD target. Guard it so the import functions
# below can be sourced and unit-tested on a non-FreeBSD host (CLAWDIE_SEED_TEST=1).
if [ -r /etc/rc.subr ]; then
    . /etc/rc.subr
fi

name="clawdie_live_seed"
rcvar="${name}_enable"
start_cmd="${name}_start"
stop_cmd=":"
status_cmd="${name}_status"
extra_commands="status"

SEED_LABEL="CLAWDIESEED"
SEED_MOUNT="${SEED_MOUNT:-/mnt/clawdie-seed}"
SEED_LOG="${SEED_LOG:-/var/log/clawdie-live-seed.log}"
SEED_USER="${SEED_USER:-clawdie}"
SEED_USER_HOME="${SEED_USER_HOME:-/home/clawdie}"
# Daemon user — the colibri_daemon runs as colibri and needs outbound SSH
# material (mother-mcp key + config + known_hosts) installed to its home so
# external MCP SSH connections to mother work.
SEED_DAEMON_USER="${SEED_DAEMON_USER:-colibri}"
SEED_DAEMON_HOME="${SEED_DAEMON_HOME:-/var/db/colibri}"
# Where imported agent payloads are staged. Runtime consumption (loading a soul
# into the agent workspace cwd, launching the chosen harness) reads from here.
SEED_IMPORT_ROOT="${SEED_IMPORT_ROOT:-/var/db/clawdie/seed}"
# Directory names reserved at the seed root (not treated as agent dirs).
SEED_RESERVED_DIRS="ssh"
# Valid harness values mirror Colibri's AgentRuntime enum (colibri-glasspane).
SEED_VALID_HARNESSES="pi zot local"
# Vaultwarden bootstrap creds are routed out of .env into this file (relative to
# the agent home) so clawdie-vault-fetch can consume them.
SEED_VAULT_BOOTSTRAP_REL=".config/vault-bootstrap.env"
# colibri_daemon reads provider keys from this file (rc.conf
# colibri_daemon_provider_env), NOT the operator's ~/.env. The active agent's
# direct provider keys are merged here too so the daemon auto-spawns at boot
# from a seeded stick with no operator action (zero-touch provisioning). The
# importer runs as root before LOGIN, so it can write this root-owned file.
SEED_PROVIDER_ENV="${SEED_PROVIDER_ENV:-/usr/local/etc/colibri/provider.env}"

_seed_log() {
    printf '%s %s\n' "$(date '+%Y-%m-%dT%H:%M:%S')" "$1" >>"${SEED_LOG}" 2>/dev/null || true
}

_seed_find_partition() {
    for _candidate in \
        "/dev/msdosfs/${SEED_LABEL}" \
        "/dev/gpt/${SEED_LABEL}" \
        "/dev/label/${SEED_LABEL}"
    do
        if [ -e "${_candidate}" ]; then
            echo "${_candidate}"
            return 0
        fi
    done

    if command -v gpart >/dev/null 2>&1; then
        gpart show -lp 2>/dev/null \
            | awk -v label="${SEED_LABEL}" '
                $0 ~ label {
                    for (i = 1; i <= NF; i++) {
                        if ($i ~ "^/dev/") { print $i; exit }
                    }
                }
            '
    fi
    return 0
}

_seed_install_authorized_keys() {
    _src="$1"
    _user="${2:-${SEED_USER}}"
    _home="${3:-${SEED_USER_HOME}}"

    _ssh_dir="${_home}/.ssh"
    _dst="${_ssh_dir}/authorized_keys"

    mkdir -p "${_ssh_dir}"
    chown "${_user}:${_user}" "${_ssh_dir}" 2>/dev/null || true
    chmod 0700 "${_ssh_dir}"

    # Strip CRLF so keys created on Windows/Linux editors don't get rejected
    # by sshd for trailing whitespace.
    tr -d '\r' <"${_src}" >"${_dst}.new"
    mv -f "${_dst}.new" "${_dst}"
    chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true
    chmod 0600 "${_dst}"

    _seed_log "installed authorized_keys from ${_src} -> ${_dst}"
}

# Install OUTBOUND ssh client material from an agent's ssh/ dir into the agent
# home so a seeded node can SSH out (e.g. node -> mother) hands-free, with the
# private key delivered on the offline seed instead of baked into the image:
#   config        -> ~/.ssh/config        (0600; e.g. a "Host mother" alias)
#   known_hosts*  -> ~/.ssh/known_hosts*   (0644; merged, so mother's host key
#                                            is trusted and the first connect
#                                            does not prompt)
#   <name>.pub    -> ~/.ssh/<name>.pub     (0644)
#   <name>        -> ~/.ssh/<name>         (0600; any other file = private key)
# authorized_keys is INBOUND and handled by _seed_install_authorized_keys.
_seed_install_ssh_material() {
    _srcdir="$1"
    _user="${2:-${SEED_USER}}"
    _home="${3:-${SEED_USER_HOME}}"

    [ -d "${_srcdir}" ] || return 0

    _ssh_dir="${_home}/.ssh"
    mkdir -p "${_ssh_dir}"
    chown "${_user}:${_user}" "${_ssh_dir}" 2>/dev/null || true
    chmod 0700 "${_ssh_dir}"

    for _f in "${_srcdir}"/*; do
        [ -f "${_f}" ] || continue
        _base="$(basename "${_f}")"
        _dst="${_ssh_dir}/${_base}"
        case "${_base}" in
            authorized_keys)
                continue ;;  # inbound, installed separately
            known_hosts|known_hosts2)
                touch "${_dst}"
                tr -d '\r' <"${_f}" >>"${_dst}"
                # De-dup so re-imports stay idempotent (order is irrelevant here).
                sort -u "${_dst}" -o "${_dst}" 2>/dev/null || true
                chmod 0644 "${_dst}"
                chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true
                _seed_log "merged ${_base} -> ${_dst}" ;;
            config)
                tr -d '\r' <"${_f}" >"${_dst}.new"
                mv -f "${_dst}.new" "${_dst}"
                chmod 0600 "${_dst}"
                chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true
                _seed_log "installed ssh config -> ${_dst}" ;;
            *.pub)
                tr -d '\r' <"${_f}" >"${_dst}.new"
                mv -f "${_dst}.new" "${_dst}"
                chmod 0644 "${_dst}"
                chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true
                _seed_log "installed public key -> ${_dst}" ;;
            *)
                tr -d '\r' <"${_f}" >"${_dst}.new"
                mv -f "${_dst}.new" "${_dst}"
                chmod 0600 "${_dst}"
                chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true
                _seed_log "installed private key -> ${_dst}" ;;
        esac
    done
}

# Valid POSIX-ish shell env var name. Guards against garbled seed lines
# injecting odd content into .env.
_seed_key_ok() {
    case "$1" in
        ''|[!A-Za-z_]*) return 1 ;;
        *[!A-Za-z0-9_]*) return 1 ;;
    esac
    return 0
}

# Merge KEY=VALUE pairs from a plaintext source into a target file, preserving
# keys the source does not mention and replacing those it does. Skips blanks,
# comments, and invalid key names. Lands 0600 owned by the agent user.
_seed_merge_env() {
    _src="$1"
    _dst="$2"
    _user="${3:-${SEED_USER}}"

    [ -f "${_src}" ] || return 0

    mkdir -p "$(dirname "${_dst}")"
    touch "${_dst}"
    chmod 0600 "${_dst}"

    _merged="${_dst}.seedmerge"
    cp "${_dst}" "${_merged}" 2>/dev/null || : >"${_merged}"

    _count=0
    while IFS= read -r _line || [ -n "${_line}" ]; do
        case "${_line}" in
            ''|'#'*) continue ;;
            *=*) : ;;
            *) continue ;;
        esac
        _line="$(printf '%s' "${_line}" | tr -d '\r')"
        _k="${_line%%=*}"
        if ! _seed_key_ok "${_k}"; then
            _seed_log "skipping invalid env key '${_k}'"
            continue
        fi
        grep -v "^${_k}=" "${_merged}" >"${_merged}.tmp" 2>/dev/null || : >"${_merged}.tmp"
        mv "${_merged}.tmp" "${_merged}"
        printf '%s\n' "${_line}" >>"${_merged}"
        _count=$((_count + 1))
    done <"${_src}"

    cp "${_merged}" "${_dst}"
    chmod 0600 "${_dst}"
    chown "${_user}:${_user}" "${_dst}" 2>/dev/null || true
    rm -f "${_merged}"
    _seed_log "merged ${_count} key(s) -> ${_dst}"
}

# Split an agent's seed env into two staged files: bootstrap creds (BW_*) and
# everything else. Writes ${_outdir}/.app.env and ${_outdir}/.boot.env.
_seed_split_env() {
    _src="$1"
    _outdir="$2"

    _app="${_outdir}/.app.env"
    _boot="${_outdir}/.boot.env"
    : >"${_app}"
    : >"${_boot}"
    chmod 0600 "${_app}" "${_boot}" 2>/dev/null || true

    [ -f "${_src}" ] || return 0

    while IFS= read -r _line || [ -n "${_line}" ]; do
        case "${_line}" in
            ''|'#'*) continue ;;
            *=*) : ;;
            *) continue ;;
        esac
        _line="$(printf '%s' "${_line}" | tr -d '\r')"
        _k="${_line%%=*}"
        _seed_key_ok "${_k}" || continue
        case "${_k}" in
            BW_CLIENTID|BW_CLIENTSECRET|BW_PASSWORD)
                printf '%s\n' "${_line}" >>"${_boot}" ;;
            *)
                printf '%s\n' "${_line}" >>"${_app}" ;;
        esac
    done <"${_src}"
}

# Validate + record a harness descriptor. We only parse the `harness` value to
# sanity-check it against the AgentRuntime enum; the full file is staged as-is.
_seed_import_harness() {
    _src="$1"
    _stage="$2"

    [ -f "${_src}" ] || return 0

    _h="$(grep -E '^[[:space:]]*harness[[:space:]]*=' "${_src}" 2>/dev/null \
        | head -n 1 | sed -E 's/^[^=]*=[[:space:]]*"?([A-Za-z]+)"?.*/\1/' | tr 'A-Z' 'a-z')"
    _ok=0
    for _v in ${SEED_VALID_HARNESSES}; do
        [ "${_h}" = "${_v}" ] && _ok=1
    done
    if [ "${_ok}" -ne 1 ]; then
        _seed_log "WARN harness '${_h:-<none>}' in ${_src} not in {${SEED_VALID_HARNESSES}} — recording anyway"
    fi

    mkdir -p "${_stage}"
    cp "${_src}" "${_stage}/harness.toml" 2>/dev/null || true
    _seed_log "recorded harness '${_h:-<none>}' -> ${_stage}/harness.toml"
}

# Stage a soul/ backup tree for later consumption by the agent workspace.
_seed_import_soul() {
    _src="$1"
    _stage="$2"

    [ -d "${_src}" ] || return 0

    mkdir -p "${_stage}/soul"
    # cp -R is portable; the tree is small (layered-soul ~600 KB).
    cp -R "${_src}/." "${_stage}/soul/" 2>/dev/null || true
    _seed_log "staged soul backup ${_src} -> ${_stage}/soul"
}

# Stage non-home-affecting payload for any agent dir: soul, harness, agent name,
# and a 0600 copy of the raw env (so a staged-only agent's secrets are not lost).
# Never writes to a home directory or installs SSH keys.
_seed_stage_agent() {
    _dir="$1"
    _agent="$2"

    _stage="${SEED_IMPORT_ROOT}/${_agent}"
    mkdir -p "${_stage}"
    chmod 0700 "${_stage}" 2>/dev/null || true
    printf '%s\n' "${_agent}" >"${_stage}/agent-name" 2>/dev/null || true

    _seed_import_harness "${_dir}/harness.toml" "${_stage}"
    _seed_import_soul    "${_dir}/soul"         "${_stage}"
    if [ -f "${_dir}/env" ]; then
        cp "${_dir}/env" "${_stage}/env" 2>/dev/null || true
        chmod 0600 "${_stage}/env" 2>/dev/null || true
    fi
    echo "${_stage}"
}

# Activate one agent into a real home: env -> .env, BW_* -> vault-bootstrap.env,
# ssh keys installed. Only the live USB's single active agent gets this.
_seed_activate_agent() {
    _dir="$1"
    _agent="$2"
    _user="$3"
    _home="$4"

    _stage="$(_seed_stage_agent "${_dir}" "${_agent}")"

    if [ -f "${_dir}/env" ]; then
        _seed_split_env "${_dir}/env" "${_stage}"
        _seed_merge_env "${_stage}/.app.env" "${_home}/.env" "${_user}"
        # Feed the daemon too: colibri_daemon reads provider.env, not ~/.env.
        # Routing the active agent's provider keys here lets a seeded stick boot
        # straight into an auto-spawned agent — no Join Hive click, no vault
        # round-trip. Lands root-owned 0600 (the importer is root, pre-LOGIN).
        if [ -s "${_stage}/.app.env" ]; then
            _seed_merge_env "${_stage}/.app.env" "${SEED_PROVIDER_ENV}" root
            _seed_log "merged active-agent provider keys -> ${SEED_PROVIDER_ENV}"
        fi
        if [ -s "${_stage}/.boot.env" ]; then
            _seed_merge_env "${_stage}/.boot.env" "${_home}/${SEED_VAULT_BOOTSTRAP_REL}" "${_user}"
            _seed_log "routed Vaultwarden bootstrap creds -> ${_home}/${SEED_VAULT_BOOTSTRAP_REL}"
        fi
        rm -f "${_stage}/.app.env" "${_stage}/.boot.env"
    fi

    if [ -f "${_dir}/ssh/authorized_keys" ]; then
        _seed_install_authorized_keys "${_dir}/ssh/authorized_keys" "${_user}" "${_home}"
    fi
    # Outbound client material (config, known_hosts, client keys) for node->mother
    # connectivity, delivered via the offline seed rather than baked in the image.
    if [ -d "${_dir}/ssh" ]; then
        _seed_install_ssh_material "${_dir}/ssh" "${_user}" "${_home}"
        # Also install for the daemon user (colibri) — the daemon spawns the
        # external-MCP SSH connection to mother, not the operator (clawdie).
        # Same seed material, same private key, separate ~/.ssh directory.
        _seed_install_ssh_material "${_dir}/ssh" "${SEED_DAEMON_USER}" "${SEED_DAEMON_HOME}"
    fi
    # Install AGENTS.md to zot's global slot so the autospawned zot reads
    # operational rules (mother, verbs, "install Hermes next") on first boot.
    # Must match ZOT_HOME exported by colibri_daemon.in prestart.
    if [ -f "${_dir}/AGENTS.md" ]; then
        _zot_home="${SEED_DAEMON_HOME}/.local/state/zot"
        mkdir -p "${_zot_home}"
        install -m 0644 -o "${SEED_DAEMON_USER}" -g "${SEED_DAEMON_USER}" \
            "${_dir}/AGENTS.md" "${_zot_home}/AGENTS.md"
        _seed_log "installed AGENTS.md -> ${_zot_home}/AGENTS.md"
    fi

    _seed_log "activated agent '${_agent}' for user ${_user} (home ${_home})"
}

# Safe agent directory name: no traversal, no leading dot (skips macOS/FAT
# system dirs like .Spotlight-V100, .fseventsd), allowlisted charset, not reserved.
_seed_agent_name_ok() {
    _n="$1"
    case "${_n}" in
        ''|.|..|.*|*/*) return 1 ;;
        *[!A-Za-z0-9._-]*) return 1 ;;
    esac
    for _r in ${SEED_RESERVED_DIRS}; do
        [ "${_n}" = "${_r}" ] && return 1
    done
    return 0
}

# An agent dir must carry at least one recognized payload to count as an agent.
_seed_agent_has_payload() {
    _d="$1"
    [ -f "${_d}/env" ] && return 0
    [ -f "${_d}/harness.toml" ] && return 0
    [ -d "${_d}/soul" ] && return 0
    [ -f "${_d}/ssh/authorized_keys" ] && return 0
    return 1
}

# Core import routine, factored out of the rc start_cmd so it can be tested
# against a pre-mounted directory (CLAWDIE_SEED_TEST). Operates on $SEED_MOUNT.
_seed_import_tree() {
    _imported=0

    # Layer 1: legacy top-level authorized_keys (default clawdie user).
    if [ -f "${SEED_MOUNT}/ssh/authorized_keys" ]; then
        _seed_install_authorized_keys "${SEED_MOUNT}/ssh/authorized_keys"
        _imported=1
    elif [ -f "${SEED_MOUNT}/authorized_keys" ]; then
        _seed_install_authorized_keys "${SEED_MOUNT}/authorized_keys"
        _imported=1
    fi

    # Layer 2: per-agent directories. On the live USB (single agent) the first
    # valid dir, sorted, is activated for the clawdie user. Additional dirs are
    # staged only — no home is created — pending deployed multi-agent provisioning.
    _first=1
    for _entry in "${SEED_MOUNT}"/*; do
        [ -d "${_entry}" ] || continue
        _agent="$(basename "${_entry}")"
        if ! _seed_agent_name_ok "${_agent}"; then
            case "${_agent}" in
                ssh) : ;;  # reserved, expected
                *) _seed_log "skipping dir '${_agent}' (reserved, hidden, or invalid name)" ;;
            esac
            continue
        fi
        if ! _seed_agent_has_payload "${_entry}"; then
            _seed_log "skipping dir '${_agent}' (no recognized agent payload)"
            continue
        fi

        if [ "${_first}" -eq 1 ]; then
            _seed_activate_agent "${_entry}" "${_agent}" "${SEED_USER}" "${SEED_USER_HOME}"
            printf '%s\n' "${_agent}" >"${SEED_IMPORT_ROOT}/active-agent" 2>/dev/null || true
            _first=0
            _imported=1
        else
            _seed_stage_agent "${_entry}" "${_agent}" >/dev/null
            _seed_log "NOTE additional agent dir '${_agent}' staged only; deployed multi-agent provisioning not yet wired"
        fi
    done

    if [ "${_imported}" -eq 0 ]; then
        _seed_log "no allowlisted files or agent dirs on seed — nothing to import"
    fi
    return 0
}

clawdie_live_seed_start() {
    : >>"${SEED_LOG}" 2>/dev/null || true
    mkdir -p "${SEED_IMPORT_ROOT}" 2>/dev/null || true

    _dev=$(_seed_find_partition)
    if [ -z "${_dev:-}" ]; then
        _seed_log "no ${SEED_LABEL} partition found — skipping import"
        return 0
    fi

    mkdir -p "${SEED_MOUNT}"
    if ! mount -t msdosfs -o ro "${_dev}" "${SEED_MOUNT}" 2>>"${SEED_LOG}"; then
        _seed_log "mount failed on ${_dev} — skipping import"
        return 0
    fi

    _seed_import_tree

    # Optional consume-and-shred: a /shred marker on the seed asks us to wipe
    # env files after import so secrets do not persist on the stick. Requires a
    # brief RW remount; off unless the operator opts in per stick.
    if [ -f "${SEED_MOUNT}/shred" ]; then
        umount "${SEED_MOUNT}" 2>/dev/null || true
        if mount -t msdosfs "${_dev}" "${SEED_MOUNT}" 2>>"${SEED_LOG}"; then
            find "${SEED_MOUNT}" -name env -type f -exec rm -f {} + 2>/dev/null || true
            rm -f "${SEED_MOUNT}/shred" 2>/dev/null || true
            _seed_log "consume-and-shred: wiped env files from seed"
        else
            _seed_log "consume-and-shred requested but RW remount failed — env left on seed"
        fi
    fi

    umount "${SEED_MOUNT}" 2>/dev/null || true
    return 0
}

clawdie_live_seed_status() {
    if [ -s "${SEED_LOG}" ]; then
        echo "${name}: one-shot importer; last run log tail:"
        tail -n 5 "${SEED_LOG}"
        return 0
    fi
    echo "${name}: not yet run this boot (no log at ${SEED_LOG})"
    return 1
}

# On FreeBSD, hand off to rc.subr. Under test (no rc.subr / CLAWDIE_SEED_TEST),
# skip it so the functions above can be exercised directly.
if [ -n "${CLAWDIE_SEED_TEST:-}" ]; then
    :
elif command -v run_rc_command >/dev/null 2>&1; then
    load_rc_config "$name"
    : "${clawdie_live_seed_enable:=YES}"
    run_rc_command "$1"
fi
