feat(daemon): hw register with mother on autospawn (belt + suspenders)

Layer 1 — rc.d ordering:
- Add tailscaled to colibri_daemon REQUIRE so the daemon doesn't start
  before the tailscale daemon is running.

Layer 2 — autospawn hook:
- After agent spawn, if clawdie-hw-probe was collected, read
  external-mcp.json to detect a 'mother' server entry.
- If configured, SSH to mother and call node_register via colibri-mcp
  with 3 retries / 5s backoff (tailscale auth can lag).
- Runs in a detached tokio task so SSH retries never block the daemon.

The probe data is already collected at autospawn time and passed to
the agent via CLAWDIE_HW_PROFILE; this addition closes the loop by
actually sending it to the mother node as a best-effort side effect.

Sam & Claude
This commit is contained in:
123kupola 2026-06-26 08:25:18 +02:00
parent d76504c5bb
commit 4d7210a492
3 changed files with 190 additions and 2 deletions

View file

@ -605,6 +605,9 @@ pub async fn autospawn_agent_if_configured(state: &SharedState) {
debug!("autospawn: clawdie-hw-probe not found at {probe_binary}; skipping hw profile");
}
// Capture the hw profile before extra_env moves into cmd_spawn_agent.
let hw_profile_for_mother = extra_env.get("CLAWDIE_HW_PROFILE").cloned();
// provider=local → binary is the Pi executable; jail=None → host-spawn.
let resp = cmd_spawn_agent(
state,
@ -648,6 +651,17 @@ pub async fn autospawn_agent_if_configured(state: &SharedState) {
}
}
}
// Register hardware profile with mother if configured (best-effort).
if let Some(hw_json) = hw_profile_for_mother {
let hostname = std::env::var("HOSTNAME")
.unwrap_or_else(|_| "unknown".to_string());
// Spawn a detached tokio task so SSH retries don't block the daemon.
let hw_json = hw_json.clone();
tokio::spawn(async move {
try_register_hw_with_mother(&hostname, &hw_json);
});
}
} else {
warn!(
error = resp.error.as_deref().unwrap_or("unknown"),
@ -656,6 +670,180 @@ pub async fn autospawn_agent_if_configured(state: &SharedState) {
}
}
/// Attempt to register hardware profile with mother node via SSH/MCP.
///
/// Reads `/usr/local/etc/colibri/external-mcp.json` to detect mother config.
/// If configured, SSH's to mother and calls `node_register` via colibri-mcp,
/// retrying up to 3 times with 5s backoff (tailscale can take a few seconds).
///
/// This runs inline in the autospawn path — the agent is already spawned and
/// running; the registration is a best-effort side effect that completes
/// asynchronously.
fn try_register_hw_with_mother(hostname: &str, hw_profile: &str) {
use std::io::Write;
const CONFIG_PATH: &str = "/usr/local/etc/colibri/external-mcp.json";
const MAX_RETRIES: u32 = 3;
const RETRY_DELAY_SECS: u64 = 5;
const REGISTER_TOOL: &str = "node_register";
// Only relevant when clawdie-hw-probe was actually shipped.
if hw_profile.is_empty() {
return;
}
// Parse external MCP registry to find the mother server entry.
let config: serde_json::Value = match std::fs::read_to_string(CONFIG_PATH) {
Ok(raw) => match serde_json::from_str(&raw) {
Ok(v) => v,
Err(_) => {
debug!("autospawn: external-mcp.json unparseable; skipping mother register");
return;
}
},
Err(_) => {
debug!("autospawn: external-mcp.json not found; skipping mother register");
return;
}
};
let mother = match config
.get("servers")
.and_then(|s| s.get("mother"))
.and_then(serde_json::Value::as_object)
{
Some(m) => m,
None => {
debug!("autospawn: no 'mother' server in external-mcp.json; skipping register");
return;
}
};
// Build the SSH argv from the server entry. Expected shape:
// {"command":"ssh","args":["-i","/key","-o","BatchMode=yes","...","host","cmd"]}
let command = match mother.get("command").and_then(|c| c.as_str()) {
Some("ssh") => "ssh",
_ => {
warn!("autospawn: mother server entry is not an SSH command; skipping register");
return;
}
};
let args: Vec<String> = mother
.get("args")
.and_then(|a| a.as_array())
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(str::to_string)).collect())
.unwrap_or_default();
if args.is_empty() {
warn!("autospawn: mother server entry has no args; skipping register");
return;
}
// Build the JSON-RPC tools/call payload for node_register.
let rpc_request = serde_json::json!({
"jsonrpc": "2.0",
"method": "tools/call",
"id": 1,
"params": {
"name": REGISTER_TOOL,
"arguments": {
"hostname": hostname,
"node_type": "live-usb",
"hw_profile": serde_json::from_str::<serde_json::Value>(hw_profile)
.unwrap_or(serde_json::json!({"error": "unparseable"}))
}
}
});
let rpc_line = serde_json::to_string(&rpc_request).unwrap_or_default();
info!(
hostname = %hostname,
hw_bytes = hw_profile.len(),
mother_args = ?args,
"autospawn: attempting hw register with mother"
);
// Retry loop — tailscale can lag behind daemon startup.
for attempt in 1..=MAX_RETRIES {
let mut child = match std::process::Command::new(command)
.args(&args)
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
{
Ok(c) => c,
Err(e) => {
if attempt < MAX_RETRIES {
warn!(
attempt,
error = %e,
"autospawn: mother SSH spawn failed; retrying in {RETRY_DELAY_SECS}s"
);
std::thread::sleep(std::time::Duration::from_secs(RETRY_DELAY_SECS));
continue;
}
warn!(attempt, error = %e, "autospawn: mother SSH spawn failed after {MAX_RETRIES} attempts");
return;
}
};
// Feed the JSON-RPC request to colibri-mcp on mother.
if let Some(ref mut stdin) = child.stdin {
let _ = writeln!(stdin, "{rpc_line}");
// Closing stdin signals EOF to colibri-mcp.
}
let output = child.wait_with_output();
match output {
Ok(ref out) if out.status.success() => {
let stdout = String::from_utf8_lossy(&out.stdout);
info!(
attempt,
response = %stdout.trim(),
"autospawn: hw registered with mother"
);
return;
}
Ok(out) => {
let stderr = String::from_utf8_lossy(&out.stderr);
if attempt < MAX_RETRIES {
warn!(
attempt,
status = %out.status,
stderr = %stderr.trim(),
"autospawn: mother register failed; retrying in {RETRY_DELAY_SECS}s"
);
std::thread::sleep(std::time::Duration::from_secs(RETRY_DELAY_SECS));
continue;
}
warn!(
attempt,
status = %out.status,
stderr = %stderr.trim(),
"autospawn: mother register failed after {MAX_RETRIES} attempts"
);
return;
}
Err(e) => {
if attempt < MAX_RETRIES {
warn!(
attempt,
error = %e,
"autospawn: mother SSH error; retrying in {RETRY_DELAY_SECS}s"
);
std::thread::sleep(std::time::Duration::from_secs(RETRY_DELAY_SECS));
continue;
}
warn!(attempt, error = %e, "autospawn: mother SSH error after {MAX_RETRIES} attempts");
return;
}
}
}
}
fn basename(path: &str) -> String {
std::path::Path::new(path)
.file_name()

View file

@ -28,7 +28,7 @@
# - colibri user/group (privilege drop target)
# PROVIDE: colibri_daemon
# REQUIRE: LOGIN cleanvar
# REQUIRE: LOGIN cleanvar tailscaled
# KEYWORD: shutdown
. /etc/rc.subr

View file

@ -28,7 +28,7 @@
# - colibri user/group (privilege drop target)
# PROVIDE: colibri_daemon
# REQUIRE: LOGIN cleanvar
# REQUIRE: LOGIN cleanvar tailscaled
# KEYWORD: shutdown
. /etc/rc.subr