feat(daemon): hw register with mother on autospawn (belt + suspenders)
Layer 1 — rc.d ordering: - Add tailscaled to colibri_daemon REQUIRE so the daemon doesn't start before the tailscale daemon is running. Layer 2 — autospawn hook: - After agent spawn, if clawdie-hw-probe was collected, read external-mcp.json to detect a 'mother' server entry. - If configured, SSH to mother and call node_register via colibri-mcp with 3 retries / 5s backoff (tailscale auth can lag). - Runs in a detached tokio task so SSH retries never block the daemon. The probe data is already collected at autospawn time and passed to the agent via CLAWDIE_HW_PROFILE; this addition closes the loop by actually sending it to the mother node as a best-effort side effect. Sam & Claude
This commit is contained in:
parent
d76504c5bb
commit
4d7210a492
3 changed files with 190 additions and 2 deletions
|
|
@ -605,6 +605,9 @@ pub async fn autospawn_agent_if_configured(state: &SharedState) {
|
|||
debug!("autospawn: clawdie-hw-probe not found at {probe_binary}; skipping hw profile");
|
||||
}
|
||||
|
||||
// Capture the hw profile before extra_env moves into cmd_spawn_agent.
|
||||
let hw_profile_for_mother = extra_env.get("CLAWDIE_HW_PROFILE").cloned();
|
||||
|
||||
// provider=local → binary is the Pi executable; jail=None → host-spawn.
|
||||
let resp = cmd_spawn_agent(
|
||||
state,
|
||||
|
|
@ -648,6 +651,17 @@ pub async fn autospawn_agent_if_configured(state: &SharedState) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Register hardware profile with mother if configured (best-effort).
|
||||
if let Some(hw_json) = hw_profile_for_mother {
|
||||
let hostname = std::env::var("HOSTNAME")
|
||||
.unwrap_or_else(|_| "unknown".to_string());
|
||||
// Spawn a detached tokio task so SSH retries don't block the daemon.
|
||||
let hw_json = hw_json.clone();
|
||||
tokio::spawn(async move {
|
||||
try_register_hw_with_mother(&hostname, &hw_json);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
warn!(
|
||||
error = resp.error.as_deref().unwrap_or("unknown"),
|
||||
|
|
@ -656,6 +670,180 @@ pub async fn autospawn_agent_if_configured(state: &SharedState) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Attempt to register hardware profile with mother node via SSH/MCP.
|
||||
///
|
||||
/// Reads `/usr/local/etc/colibri/external-mcp.json` to detect mother config.
|
||||
/// If configured, SSH's to mother and calls `node_register` via colibri-mcp,
|
||||
/// retrying up to 3 times with 5s backoff (tailscale can take a few seconds).
|
||||
///
|
||||
/// This runs inline in the autospawn path — the agent is already spawned and
|
||||
/// running; the registration is a best-effort side effect that completes
|
||||
/// asynchronously.
|
||||
fn try_register_hw_with_mother(hostname: &str, hw_profile: &str) {
|
||||
use std::io::Write;
|
||||
|
||||
const CONFIG_PATH: &str = "/usr/local/etc/colibri/external-mcp.json";
|
||||
const MAX_RETRIES: u32 = 3;
|
||||
const RETRY_DELAY_SECS: u64 = 5;
|
||||
const REGISTER_TOOL: &str = "node_register";
|
||||
|
||||
// Only relevant when clawdie-hw-probe was actually shipped.
|
||||
if hw_profile.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse external MCP registry to find the mother server entry.
|
||||
let config: serde_json::Value = match std::fs::read_to_string(CONFIG_PATH) {
|
||||
Ok(raw) => match serde_json::from_str(&raw) {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
debug!("autospawn: external-mcp.json unparseable; skipping mother register");
|
||||
return;
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
debug!("autospawn: external-mcp.json not found; skipping mother register");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mother = match config
|
||||
.get("servers")
|
||||
.and_then(|s| s.get("mother"))
|
||||
.and_then(serde_json::Value::as_object)
|
||||
{
|
||||
Some(m) => m,
|
||||
None => {
|
||||
debug!("autospawn: no 'mother' server in external-mcp.json; skipping register");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Build the SSH argv from the server entry. Expected shape:
|
||||
// {"command":"ssh","args":["-i","/key","-o","BatchMode=yes","...","host","cmd"]}
|
||||
let command = match mother.get("command").and_then(|c| c.as_str()) {
|
||||
Some("ssh") => "ssh",
|
||||
_ => {
|
||||
warn!("autospawn: mother server entry is not an SSH command; skipping register");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let args: Vec<String> = mother
|
||||
.get("args")
|
||||
.and_then(|a| a.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(str::to_string)).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
if args.is_empty() {
|
||||
warn!("autospawn: mother server entry has no args; skipping register");
|
||||
return;
|
||||
}
|
||||
|
||||
// Build the JSON-RPC tools/call payload for node_register.
|
||||
let rpc_request = serde_json::json!({
|
||||
"jsonrpc": "2.0",
|
||||
"method": "tools/call",
|
||||
"id": 1,
|
||||
"params": {
|
||||
"name": REGISTER_TOOL,
|
||||
"arguments": {
|
||||
"hostname": hostname,
|
||||
"node_type": "live-usb",
|
||||
"hw_profile": serde_json::from_str::<serde_json::Value>(hw_profile)
|
||||
.unwrap_or(serde_json::json!({"error": "unparseable"}))
|
||||
}
|
||||
}
|
||||
});
|
||||
let rpc_line = serde_json::to_string(&rpc_request).unwrap_or_default();
|
||||
|
||||
info!(
|
||||
hostname = %hostname,
|
||||
hw_bytes = hw_profile.len(),
|
||||
mother_args = ?args,
|
||||
"autospawn: attempting hw register with mother"
|
||||
);
|
||||
|
||||
// Retry loop — tailscale can lag behind daemon startup.
|
||||
for attempt in 1..=MAX_RETRIES {
|
||||
let mut child = match std::process::Command::new(command)
|
||||
.args(&args)
|
||||
.stdin(std::process::Stdio::piped())
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
{
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
if attempt < MAX_RETRIES {
|
||||
warn!(
|
||||
attempt,
|
||||
error = %e,
|
||||
"autospawn: mother SSH spawn failed; retrying in {RETRY_DELAY_SECS}s"
|
||||
);
|
||||
std::thread::sleep(std::time::Duration::from_secs(RETRY_DELAY_SECS));
|
||||
continue;
|
||||
}
|
||||
warn!(attempt, error = %e, "autospawn: mother SSH spawn failed after {MAX_RETRIES} attempts");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Feed the JSON-RPC request to colibri-mcp on mother.
|
||||
if let Some(ref mut stdin) = child.stdin {
|
||||
let _ = writeln!(stdin, "{rpc_line}");
|
||||
// Closing stdin signals EOF to colibri-mcp.
|
||||
}
|
||||
|
||||
let output = child.wait_with_output();
|
||||
|
||||
match output {
|
||||
Ok(ref out) if out.status.success() => {
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
info!(
|
||||
attempt,
|
||||
response = %stdout.trim(),
|
||||
"autospawn: hw registered with mother"
|
||||
);
|
||||
return;
|
||||
}
|
||||
Ok(out) => {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
if attempt < MAX_RETRIES {
|
||||
warn!(
|
||||
attempt,
|
||||
status = %out.status,
|
||||
stderr = %stderr.trim(),
|
||||
"autospawn: mother register failed; retrying in {RETRY_DELAY_SECS}s"
|
||||
);
|
||||
std::thread::sleep(std::time::Duration::from_secs(RETRY_DELAY_SECS));
|
||||
continue;
|
||||
}
|
||||
warn!(
|
||||
attempt,
|
||||
status = %out.status,
|
||||
stderr = %stderr.trim(),
|
||||
"autospawn: mother register failed after {MAX_RETRIES} attempts"
|
||||
);
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt < MAX_RETRIES {
|
||||
warn!(
|
||||
attempt,
|
||||
error = %e,
|
||||
"autospawn: mother SSH error; retrying in {RETRY_DELAY_SECS}s"
|
||||
);
|
||||
std::thread::sleep(std::time::Duration::from_secs(RETRY_DELAY_SECS));
|
||||
continue;
|
||||
}
|
||||
warn!(attempt, error = %e, "autospawn: mother SSH error after {MAX_RETRIES} attempts");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn basename(path: &str) -> String {
|
||||
std::path::Path::new(path)
|
||||
.file_name()
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
# - colibri user/group (privilege drop target)
|
||||
|
||||
# PROVIDE: colibri_daemon
|
||||
# REQUIRE: LOGIN cleanvar
|
||||
# REQUIRE: LOGIN cleanvar tailscaled
|
||||
# KEYWORD: shutdown
|
||||
|
||||
. /etc/rc.subr
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
# - colibri user/group (privilege drop target)
|
||||
|
||||
# PROVIDE: colibri_daemon
|
||||
# REQUIRE: LOGIN cleanvar
|
||||
# REQUIRE: LOGIN cleanvar tailscaled
|
||||
# KEYWORD: shutdown
|
||||
|
||||
. /etc/rc.subr
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue