diff --git a/crates/colibri-daemon/src/cost.rs b/crates/colibri-daemon/src/cost.rs index b37833e..f6e487f 100644 --- a/crates/colibri-daemon/src/cost.rs +++ b/crates/colibri-daemon/src/cost.rs @@ -94,7 +94,7 @@ pub fn escalate(current: CostMode) -> CostMode { let next = match current { CostMode::Fast => CostMode::Smart, CostMode::Smart => CostMode::Max, - CostMode::Max => CostMode::Max, // already at ceiling + CostMode::Max => CostMode::Max, }; if next != current { @@ -108,6 +108,47 @@ pub fn escalate(current: CostMode) -> CostMode { next } +/// What triggered an escalation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum EscalationTrigger { + /// Session byte count exceeded current mode budget. + BudgetExceeded { + current_bytes: u64, + budget_bytes: u64, + }, + /// Compaction didn't free enough space. + CompactionInsufficient { freed_bytes: u64, needed_bytes: u64 }, +} + +/// Auto-escalate if the trigger warrants it, returning the new mode. +/// Returns `None` if no escalation is needed (already at Max, or trigger +/// doesn't cross a threshold). +pub fn auto_escalate(current: CostMode, trigger: &EscalationTrigger) -> Option { + if current == CostMode::Max { + return None; // already at ceiling + } + + let should_escalate = match trigger { + EscalationTrigger::BudgetExceeded { + current_bytes, + budget_bytes, + } => current_bytes > budget_bytes, + EscalationTrigger::CompactionInsufficient { + freed_bytes, + needed_bytes, + } => freed_bytes < needed_bytes, + }; + + if should_escalate { + let next = escalate(current); + if next != current { + return Some(next); + } + } + + None +} + // --------------------------------------------------------------------------- // Tool result compaction // --------------------------------------------------------------------------- diff --git a/crates/colibri-daemon/src/session.rs b/crates/colibri-daemon/src/session.rs index b618708..5e9d763 100644 --- a/crates/colibri-daemon/src/session.rs +++ b/crates/colibri-daemon/src/session.rs @@ -95,7 +95,6 @@ pub struct PromptAssembly { pub total_bytes: u64, pub estimated_tokens: u64, } - impl PromptAssembly { pub fn to_messages(&self) -> Vec { let mut messages = @@ -108,6 +107,78 @@ impl PromptAssembly { messages.extend(self.volatile_scratch.clone()); messages } + + /// Trim the assembly to fit within a cost mode budget. + /// + /// Returns the number of turns compacted (0 if under budget already). + /// The immutable prefix is never trimmed — only the appendable log and + /// volatile scratch are candidates. + pub fn trim_to_budget(&mut self, mode: crate::cost::CostMode) -> usize { + let budget = mode.session_max_bytes(); + let prefix_bytes = self.immutable_prefix.len() as u64; + let remaining = budget.saturating_sub(prefix_bytes); + + // How many bytes in appendable + volatile + let log_bytes: u64 = self + .appendable_log + .iter() + .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64) + .sum(); + let scratch_bytes: u64 = self + .volatile_scratch + .iter() + .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64) + .sum(); + let total_var = log_bytes + scratch_bytes; + + if total_var <= remaining { + return 0; + } + + // Drop volatile scratch first (cheapest) + let mut trimmed = 0; + if !self.volatile_scratch.is_empty() { + self.volatile_scratch.clear(); + trimmed += 1; + } + + // If still over budget, trim oldest appendable entries + let bytes_after_scratch: u64 = self + .appendable_log + .iter() + .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64) + .sum(); + if bytes_after_scratch > remaining { + let overflow = bytes_after_scratch - remaining; + let mut removed = 0u64; + // Keep at least 2 messages (system prefix already excluded) + let min_keep = 2usize; + while self.appendable_log.len() > min_keep && removed < overflow { + let front_bytes = serde_json::to_string(&self.appendable_log[0]) + .unwrap_or_default() + .len() as u64; + self.appendable_log.remove(0); + removed += front_bytes; + trimmed += 1; + } + } + + // Recompute totals + self.total_bytes = prefix_bytes + + self + .appendable_log + .iter() + .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64) + .sum::() + + self + .volatile_scratch + .iter() + .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64) + .sum::(); + self.estimated_tokens = self.total_bytes.div_ceil(3); + + trimmed + } } #[derive(Debug, Clone, Default)] @@ -515,6 +586,7 @@ impl Session { #[cfg(test)] mod t14_tests { use super::*; + use crate::cost::CostMode; #[test] fn prompt_assembly_to_messages_includes_all_regions() { @@ -565,6 +637,150 @@ mod t14_tests { m.record(200, 300); assert!((m.hit_rate() - 0.3).abs() < 0.001); } + + // ── trim_to_budget tests ───────────────────────────────────── + + fn make_assembly(prefix: &str, log_count: usize, scratch_count: usize) -> PromptAssembly { + let prefix = prefix.to_string(); + let prefix_bytes = prefix.len() as u64; + let mut log = Vec::new(); + let mut log_bytes = 0u64; + let content = format!("msg {}", "x".repeat(10000)); // ~10KB per message + for i in 0..log_count { + let m = serde_json::json!({"role": "user", "content": format!("{content} #{i}")}); + log_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64; + log.push(m); + } + let mut scratch = Vec::new(); + let mut scratch_bytes = 0u64; + for i in 0..scratch_count { + let m = + serde_json::json!({"role": "user", "content": format!("scratch {content} #{i}")}); + scratch_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64; + scratch.push(m); + } + PromptAssembly { + immutable_prefix: prefix, + appendable_log: log, + volatile_scratch: scratch, + total_bytes: prefix_bytes + log_bytes + scratch_bytes, + estimated_tokens: (prefix_bytes + log_bytes + scratch_bytes).div_ceil(3), + } + } + + #[test] + fn trim_to_budget_under_limit_returns_zero() { + let mut a = make_assembly("SYS", 2, 1); + // "SYS" = 3 bytes + 2 messages (~50 bytes each) + 1 scratch = well under Smart 2M + let trimmed = a.trim_to_budget(CostMode::Smart); + assert_eq!(trimmed, 0); + } + + #[test] + fn trim_to_budget_reduces_size() { + let big = "x".repeat(50000); + let mut a = make_assembly(&big, 80, 10); + let orig_bytes = a.total_bytes; + assert!( + orig_bytes > CostMode::Fast.session_max_bytes(), + "orig {orig_bytes} > budget {}", + CostMode::Fast.session_max_bytes() + ); + let trimmed = a.trim_to_budget(CostMode::Fast); + assert!(trimmed > 0, "should have trimmed"); + assert!( + a.total_bytes <= CostMode::Fast.session_max_bytes(), + "under budget after trim" + ); + } + + #[test] + fn trim_to_budget_preserves_prefix() { + let prefix = "IMMUTABLE_PREFIX_DO_NOT_TOUCH"; + let mut a = make_assembly(prefix, 1000, 5); + let _ = a.trim_to_budget(CostMode::Fast); + assert_eq!(a.immutable_prefix, prefix, "prefix untouched"); + } + + #[test] + fn trim_to_budget_deterministic() { + let mut a1 = make_assembly("SYS", 20, 0); + let mut a2 = a1.clone(); + a1.trim_to_budget(CostMode::Smart); + a2.trim_to_budget(CostMode::Smart); + assert_eq!(a1.appendable_log.len(), a2.appendable_log.len()); + assert_eq!(a1.total_bytes, a2.total_bytes); + } +} + +// ── auto-escalation tests ─────────────────────────────────────────── + +#[cfg(test)] +mod cost_tests { + use super::super::cost::*; + + #[test] + fn auto_escalate_fast_budget_exceeded() { + let trigger = EscalationTrigger::BudgetExceeded { + current_bytes: 600_000, + budget_bytes: 500_000, + }; + assert_eq!( + auto_escalate(CostMode::Fast, &trigger), + Some(CostMode::Smart) + ); + } + + #[test] + fn auto_escalate_smart_budget_exceeded() { + let trigger = EscalationTrigger::BudgetExceeded { + current_bytes: 3_000_000, + budget_bytes: 2_000_000, + }; + assert_eq!( + auto_escalate(CostMode::Smart, &trigger), + Some(CostMode::Max) + ); + } + + #[test] + fn auto_escalate_max_no_further() { + let trigger = EscalationTrigger::BudgetExceeded { + current_bytes: 10_000_000, + budget_bytes: 8_000_000, + }; + assert_eq!(auto_escalate(CostMode::Max, &trigger), None); + } + + #[test] + fn auto_escalate_under_budget_no_escalation() { + let trigger = EscalationTrigger::BudgetExceeded { + current_bytes: 100_000, + budget_bytes: 500_000, + }; + assert_eq!(auto_escalate(CostMode::Fast, &trigger), None); + } + + #[test] + fn auto_escalate_compaction_insufficient() { + let trigger = EscalationTrigger::CompactionInsufficient { + freed_bytes: 50, + needed_bytes: 500, + }; + assert_eq!( + auto_escalate(CostMode::Smart, &trigger), + Some(CostMode::Max) + ); + } + + #[test] + fn auto_escalate_compaction_sufficient_no_escalation() { + let trigger = EscalationTrigger::CompactionInsufficient { + freed_bytes: 500, + needed_bytes: 50, + }; + assert_eq!(auto_escalate(CostMode::Smart, &trigger), None); + } } // Tests deferred to crate-level integration tests (tests/ directory).