Merge pull request 'feat: cost-aware trimming + auto-escalation (T1.4 PR 2)' (#4) from t14-pr2-trimming into main

This commit is contained in:
clawdie 2026-05-31 16:13:29 +02:00
commit d360dde1b9
2 changed files with 259 additions and 2 deletions

View file

@ -94,7 +94,7 @@ pub fn escalate(current: CostMode) -> CostMode {
let next = match current {
CostMode::Fast => CostMode::Smart,
CostMode::Smart => CostMode::Max,
CostMode::Max => CostMode::Max, // already at ceiling
CostMode::Max => CostMode::Max,
};
if next != current {
@ -108,6 +108,47 @@ pub fn escalate(current: CostMode) -> CostMode {
next
}
/// What triggered an escalation.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EscalationTrigger {
/// Session byte count exceeded current mode budget.
BudgetExceeded {
current_bytes: u64,
budget_bytes: u64,
},
/// Compaction didn't free enough space.
CompactionInsufficient { freed_bytes: u64, needed_bytes: u64 },
}
/// Auto-escalate if the trigger warrants it, returning the new mode.
/// Returns `None` if no escalation is needed (already at Max, or trigger
/// doesn't cross a threshold).
pub fn auto_escalate(current: CostMode, trigger: &EscalationTrigger) -> Option<CostMode> {
if current == CostMode::Max {
return None; // already at ceiling
}
let should_escalate = match trigger {
EscalationTrigger::BudgetExceeded {
current_bytes,
budget_bytes,
} => current_bytes > budget_bytes,
EscalationTrigger::CompactionInsufficient {
freed_bytes,
needed_bytes,
} => freed_bytes < needed_bytes,
};
if should_escalate {
let next = escalate(current);
if next != current {
return Some(next);
}
}
None
}
// ---------------------------------------------------------------------------
// Tool result compaction
// ---------------------------------------------------------------------------

View file

@ -95,7 +95,6 @@ pub struct PromptAssembly {
pub total_bytes: u64,
pub estimated_tokens: u64,
}
impl PromptAssembly {
pub fn to_messages(&self) -> Vec<serde_json::Value> {
let mut messages =
@ -108,6 +107,78 @@ impl PromptAssembly {
messages.extend(self.volatile_scratch.clone());
messages
}
/// Trim the assembly to fit within a cost mode budget.
///
/// Returns the number of turns compacted (0 if under budget already).
/// The immutable prefix is never trimmed — only the appendable log and
/// volatile scratch are candidates.
pub fn trim_to_budget(&mut self, mode: crate::cost::CostMode) -> usize {
let budget = mode.session_max_bytes();
let prefix_bytes = self.immutable_prefix.len() as u64;
let remaining = budget.saturating_sub(prefix_bytes);
// How many bytes in appendable + volatile
let log_bytes: u64 = self
.appendable_log
.iter()
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
.sum();
let scratch_bytes: u64 = self
.volatile_scratch
.iter()
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
.sum();
let total_var = log_bytes + scratch_bytes;
if total_var <= remaining {
return 0;
}
// Drop volatile scratch first (cheapest)
let mut trimmed = 0;
if !self.volatile_scratch.is_empty() {
self.volatile_scratch.clear();
trimmed += 1;
}
// If still over budget, trim oldest appendable entries
let bytes_after_scratch: u64 = self
.appendable_log
.iter()
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
.sum();
if bytes_after_scratch > remaining {
let overflow = bytes_after_scratch - remaining;
let mut removed = 0u64;
// Keep at least 2 messages (system prefix already excluded)
let min_keep = 2usize;
while self.appendable_log.len() > min_keep && removed < overflow {
let front_bytes = serde_json::to_string(&self.appendable_log[0])
.unwrap_or_default()
.len() as u64;
self.appendable_log.remove(0);
removed += front_bytes;
trimmed += 1;
}
}
// Recompute totals
self.total_bytes = prefix_bytes
+ self
.appendable_log
.iter()
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
.sum::<u64>()
+ self
.volatile_scratch
.iter()
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
.sum::<u64>();
self.estimated_tokens = self.total_bytes.div_ceil(3);
trimmed
}
}
#[derive(Debug, Clone, Default)]
@ -515,6 +586,7 @@ impl Session {
#[cfg(test)]
mod t14_tests {
use super::*;
use crate::cost::CostMode;
#[test]
fn prompt_assembly_to_messages_includes_all_regions() {
@ -565,6 +637,150 @@ mod t14_tests {
m.record(200, 300);
assert!((m.hit_rate() - 0.3).abs() < 0.001);
}
// ── trim_to_budget tests ─────────────────────────────────────
fn make_assembly(prefix: &str, log_count: usize, scratch_count: usize) -> PromptAssembly {
let prefix = prefix.to_string();
let prefix_bytes = prefix.len() as u64;
let mut log = Vec::new();
let mut log_bytes = 0u64;
let content = format!("msg {}", "x".repeat(10000)); // ~10KB per message
for i in 0..log_count {
let m = serde_json::json!({"role": "user", "content": format!("{content} #{i}")});
log_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
log.push(m);
}
let mut scratch = Vec::new();
let mut scratch_bytes = 0u64;
for i in 0..scratch_count {
let m =
serde_json::json!({"role": "user", "content": format!("scratch {content} #{i}")});
scratch_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
scratch.push(m);
}
PromptAssembly {
immutable_prefix: prefix,
appendable_log: log,
volatile_scratch: scratch,
total_bytes: prefix_bytes + log_bytes + scratch_bytes,
estimated_tokens: (prefix_bytes + log_bytes + scratch_bytes).div_ceil(3),
}
}
#[test]
fn trim_to_budget_under_limit_returns_zero() {
let mut a = make_assembly("SYS", 2, 1);
// "SYS" = 3 bytes + 2 messages (~50 bytes each) + 1 scratch = well under Smart 2M
let trimmed = a.trim_to_budget(CostMode::Smart);
assert_eq!(trimmed, 0);
}
#[test]
fn trim_to_budget_reduces_size() {
let big = "x".repeat(50000);
let mut a = make_assembly(&big, 80, 10);
let orig_bytes = a.total_bytes;
assert!(
orig_bytes > CostMode::Fast.session_max_bytes(),
"orig {orig_bytes} > budget {}",
CostMode::Fast.session_max_bytes()
);
let trimmed = a.trim_to_budget(CostMode::Fast);
assert!(trimmed > 0, "should have trimmed");
assert!(
a.total_bytes <= CostMode::Fast.session_max_bytes(),
"under budget after trim"
);
}
#[test]
fn trim_to_budget_preserves_prefix() {
let prefix = "IMMUTABLE_PREFIX_DO_NOT_TOUCH";
let mut a = make_assembly(prefix, 1000, 5);
let _ = a.trim_to_budget(CostMode::Fast);
assert_eq!(a.immutable_prefix, prefix, "prefix untouched");
}
#[test]
fn trim_to_budget_deterministic() {
let mut a1 = make_assembly("SYS", 20, 0);
let mut a2 = a1.clone();
a1.trim_to_budget(CostMode::Smart);
a2.trim_to_budget(CostMode::Smart);
assert_eq!(a1.appendable_log.len(), a2.appendable_log.len());
assert_eq!(a1.total_bytes, a2.total_bytes);
}
}
// ── auto-escalation tests ───────────────────────────────────────────
#[cfg(test)]
mod cost_tests {
use super::super::cost::*;
#[test]
fn auto_escalate_fast_budget_exceeded() {
let trigger = EscalationTrigger::BudgetExceeded {
current_bytes: 600_000,
budget_bytes: 500_000,
};
assert_eq!(
auto_escalate(CostMode::Fast, &trigger),
Some(CostMode::Smart)
);
}
#[test]
fn auto_escalate_smart_budget_exceeded() {
let trigger = EscalationTrigger::BudgetExceeded {
current_bytes: 3_000_000,
budget_bytes: 2_000_000,
};
assert_eq!(
auto_escalate(CostMode::Smart, &trigger),
Some(CostMode::Max)
);
}
#[test]
fn auto_escalate_max_no_further() {
let trigger = EscalationTrigger::BudgetExceeded {
current_bytes: 10_000_000,
budget_bytes: 8_000_000,
};
assert_eq!(auto_escalate(CostMode::Max, &trigger), None);
}
#[test]
fn auto_escalate_under_budget_no_escalation() {
let trigger = EscalationTrigger::BudgetExceeded {
current_bytes: 100_000,
budget_bytes: 500_000,
};
assert_eq!(auto_escalate(CostMode::Fast, &trigger), None);
}
#[test]
fn auto_escalate_compaction_insufficient() {
let trigger = EscalationTrigger::CompactionInsufficient {
freed_bytes: 50,
needed_bytes: 500,
};
assert_eq!(
auto_escalate(CostMode::Smart, &trigger),
Some(CostMode::Max)
);
}
#[test]
fn auto_escalate_compaction_sufficient_no_escalation() {
let trigger = EscalationTrigger::CompactionInsufficient {
freed_bytes: 500,
needed_bytes: 50,
};
assert_eq!(auto_escalate(CostMode::Smart, &trigger), None);
}
}
// Tests deferred to crate-level integration tests (tests/ directory).