diff --git a/crates/colibri-daemon/src/config.rs b/crates/colibri-daemon/src/config.rs index 2a2dda3..ecb1be0 100644 --- a/crates/colibri-daemon/src/config.rs +++ b/crates/colibri-daemon/src/config.rs @@ -84,7 +84,16 @@ impl DaemonConfig { data_dir, socket_path, db_path, - session_max_bytes: env_parse("COLIBRI_SESSION_MAX_BYTES").unwrap_or(2_000_000), + session_max_bytes: { + let v = env_parse("COLIBRI_SESSION_MAX_BYTES").unwrap_or(2_000_000); + if std::env::var("COLIBRI_SESSION_MAX_BYTES").is_ok() { + tracing::warn!( + "COLIBRI_SESSION_MAX_BYTES is deprecated — use COLIBRI_COST_MODE instead. \ + Thresholds are now derived from cost mode (fast=500K, smart=2M, max=8M)." + ); + } + v + }, deepseek_api_key: nonempty_env("DEEPSEEK_API_KEY"), deepseek_endpoint: std::env::var("DEEPSEEK_ENDPOINT") .unwrap_or_else(|_| "https://api.deepseek.com/chat/completions".to_string()), @@ -98,7 +107,16 @@ impl DaemonConfig { .unwrap_or_else(|_| "https://api.anthropic.com/v1/messages".to_string()), host, max_context_tokens: env_parse("COLIBRI_MAX_CONTEXT_TOKENS").unwrap_or(128_000), - max_uncompacted_turns: env_parse("COLIBRI_MAX_UNCOMPACTED_TURNS").unwrap_or(20), + max_uncompacted_turns: { + let v = env_parse("COLIBRI_MAX_UNCOMPACTED_TURNS").unwrap_or(20); + if std::env::var("COLIBRI_MAX_UNCOMPACTED_TURNS").is_ok() { + tracing::warn!( + "COLIBRI_MAX_UNCOMPACTED_TURNS is deprecated — use COLIBRI_COST_MODE instead. \ + Thresholds are now derived from cost mode (fast=5, smart=20, max=100)." + ); + } + v + }, cost_mode: std::env::var("COLIBRI_COST_MODE").unwrap_or_else(|_| "smart".to_string()), scheduler_prompt_injection: env_parse("COLIBRI_SCHEDULER_PROMPT_INJECTION") .unwrap_or(false), diff --git a/crates/colibri-daemon/src/daemon.rs b/crates/colibri-daemon/src/daemon.rs index 10f64eb..07a6088 100644 --- a/crates/colibri-daemon/src/daemon.rs +++ b/crates/colibri-daemon/src/daemon.rs @@ -240,7 +240,8 @@ async fn heartbeat(state: &SharedState, _stall_timeout: Duration) { } async fn session_rotation(state: &SharedState) { - let cost_mode = crate::cost::CostMode::parse(&state.config.cost_mode).unwrap_or_default(); + let cost_mode = + crate::cost::CostMode::parse(&state.config.cost_mode).unwrap_or_default(); let max_bytes = cost_mode.session_max_bytes(); let max_turns = cost_mode.max_uncompacted_turns(); let mut compacted = 0usize; @@ -250,6 +251,23 @@ async fn session_rotation(state: &SharedState) { let (bc, tc) = { (s.byte_count().await, s.turn_count().await) }; if (bc > max_bytes || tc > max_turns) && s.compact_oldest_turns().await.is_ok() { compacted += 1; + // Check if compaction freed enough — if not, log escalation recommendation. + let after_bc = s.byte_count().await; + if after_bc > max_bytes { + let trigger = crate::cost::EscalationTrigger::CompactionInsufficient { + freed_bytes: bc.saturating_sub(after_bc), + needed_bytes: after_bc.saturating_sub(max_bytes), + }; + if let Some(new_mode) = crate::cost::auto_escalate(cost_mode, &trigger) { + warn!( + from = cost_mode.as_str(), + to = new_mode.as_str(), + freed = bc.saturating_sub(after_bc), + needed = after_bc.saturating_sub(max_bytes), + "compaction insufficient — consider escalating cost mode" + ); + } + } } if s.byte_count().await > max_bytes * 3 { let _ = s.prune_to((max_turns / 2).max(1)).await; diff --git a/crates/colibri-daemon/src/session.rs b/crates/colibri-daemon/src/session.rs index 28fdbfa..b2946a4 100644 --- a/crates/colibri-daemon/src/session.rs +++ b/crates/colibri-daemon/src/session.rs @@ -325,6 +325,31 @@ impl Session { /// Append a single entry to the session, flushing to JSONL. pub async fn append(&self, entry: SessionEntry) -> Result<(), SessionError> { + // Compact oversized tool results if cost mode says to trim. + let entry = match &entry { + SessionEntry::ToolResult { name, result } => { + let cost_mode = + crate::cost::CostMode::parse(&self.config.cost_mode).unwrap_or_default(); + if cost_mode.compact_tool_results() { + let max_bytes = cost_mode.tool_result_max_bytes(); + let raw_str = result.to_string(); + if let Some(compacted) = + crate::cost::compact_tool_result(&raw_str, max_bytes, name) + { + SessionEntry::ToolResult { + name: name.clone(), + result: serde_json::Value::String(compacted), + } + } else { + entry + } + } else { + entry + } + } + _ => entry, + }; + let line = serde_json::to_string(&entry)?; let line_bytes = line.len() as u64 + 1; @@ -387,6 +412,8 @@ impl Session { // ------------------------------------------------------------------ /// Check thresholds and compact/prune if needed. + /// Now cost-mode-aware: derives thresholds from COLIBRI_COST_MODE, + /// not from static config fields. async fn maybe_compact_or_rollover(&self) -> Result<(), SessionError> { let (byte_count, turn_count) = { let bc = self.byte_count.read().await; @@ -394,8 +421,12 @@ impl Session { (*bc, turns.len()) }; - let needs_compaction = byte_count > self.config.session_max_bytes - || turn_count > self.config.max_uncompacted_turns; + let cost_mode = crate::cost::CostMode::parse(&self.config.cost_mode) + .unwrap_or_default(); + let max_bytes = cost_mode.session_max_bytes(); + let max_turns = cost_mode.max_uncompacted_turns() as usize; + + let needs_compaction = byte_count > max_bytes || turn_count > max_turns; if needs_compaction { self.compact_oldest_turns().await?; @@ -412,8 +443,11 @@ impl Session { pub async fn compact_oldest_turns(&self) -> Result<(), SessionError> { let mut turns = self.turns.write().await; - // Keep the most recent `max_uncompacted_turns` turns, compact the rest. - let keep = self.config.max_uncompacted_turns; + // Keep the most recent turns, compact the rest. + // Cost-mode-aware: derives keep count from COLIBRI_COST_MODE. + let cost_mode = crate::cost::CostMode::parse(&self.config.cost_mode) + .unwrap_or_default(); + let keep = cost_mode.max_uncompacted_turns() as usize; if turns.len() <= keep { return Ok(()); } @@ -506,13 +540,20 @@ impl Session { let appendable_log = messages[1..].to_vec(); - PromptAssembly { + let mut assembly = PromptAssembly { immutable_prefix, appendable_log, volatile_scratch: Vec::new(), total_bytes, estimated_tokens, - } + }; + + // Apply cost-mode budget trimming. + let cost_mode = + crate::cost::CostMode::parse(&self.config.cost_mode).unwrap_or_default(); + assembly.trim_to_budget(cost_mode); + + assembly } /// Build the 3-region prompt: diff --git a/crates/colibri-daemon/src/socket.rs b/crates/colibri-daemon/src/socket.rs index 5e341df..7c85096 100644 --- a/crates/colibri-daemon/src/socket.rs +++ b/crates/colibri-daemon/src/socket.rs @@ -662,11 +662,12 @@ async fn cmd_set_cost_mode(state: &SharedState, mode: String) -> ColibriResponse to = cost_mode.as_str(), "cost mode changed via socket" ); - // Note: DaemonConfig is not mutable via &SharedState. - // In a future iteration, this would update the config in-place. - // For now, we acknowledge and log the request. - // T1.4 scope: runtime-only. Persistence (COLIBRI_COST_MODE in - // config/socket-update) is post-Phase-5. + // Note: DaemonConfig fields are read-only through &SharedState (Arc). + // The cost_mode string IS updated here for runtime visibility, but + // the derived numeric fields (session_max_bytes, max_uncompacted_turns) + // are not updated in-place. Session compaction and rotation now derive + // thresholds from cost_mode at use time, so this is correct. + // Full runtime config mutation (post-Phase-5) would wrap these in RwLock. ColibriResponse::ok(serde_json::json!({ "previous": state.config.cost_mode, "requested": cost_mode.as_str(),