Merge pull request 'feat: cost-aware trimming + auto-escalation (T1.4 PR 2)' (#4) from t14-pr2-trimming into main
This commit is contained in:
commit
d360dde1b9
2 changed files with 259 additions and 2 deletions
|
|
@ -94,7 +94,7 @@ pub fn escalate(current: CostMode) -> CostMode {
|
|||
let next = match current {
|
||||
CostMode::Fast => CostMode::Smart,
|
||||
CostMode::Smart => CostMode::Max,
|
||||
CostMode::Max => CostMode::Max, // already at ceiling
|
||||
CostMode::Max => CostMode::Max,
|
||||
};
|
||||
|
||||
if next != current {
|
||||
|
|
@ -108,6 +108,47 @@ pub fn escalate(current: CostMode) -> CostMode {
|
|||
next
|
||||
}
|
||||
|
||||
/// What triggered an escalation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum EscalationTrigger {
|
||||
/// Session byte count exceeded current mode budget.
|
||||
BudgetExceeded {
|
||||
current_bytes: u64,
|
||||
budget_bytes: u64,
|
||||
},
|
||||
/// Compaction didn't free enough space.
|
||||
CompactionInsufficient { freed_bytes: u64, needed_bytes: u64 },
|
||||
}
|
||||
|
||||
/// Auto-escalate if the trigger warrants it, returning the new mode.
|
||||
/// Returns `None` if no escalation is needed (already at Max, or trigger
|
||||
/// doesn't cross a threshold).
|
||||
pub fn auto_escalate(current: CostMode, trigger: &EscalationTrigger) -> Option<CostMode> {
|
||||
if current == CostMode::Max {
|
||||
return None; // already at ceiling
|
||||
}
|
||||
|
||||
let should_escalate = match trigger {
|
||||
EscalationTrigger::BudgetExceeded {
|
||||
current_bytes,
|
||||
budget_bytes,
|
||||
} => current_bytes > budget_bytes,
|
||||
EscalationTrigger::CompactionInsufficient {
|
||||
freed_bytes,
|
||||
needed_bytes,
|
||||
} => freed_bytes < needed_bytes,
|
||||
};
|
||||
|
||||
if should_escalate {
|
||||
let next = escalate(current);
|
||||
if next != current {
|
||||
return Some(next);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool result compaction
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -95,7 +95,6 @@ pub struct PromptAssembly {
|
|||
pub total_bytes: u64,
|
||||
pub estimated_tokens: u64,
|
||||
}
|
||||
|
||||
impl PromptAssembly {
|
||||
pub fn to_messages(&self) -> Vec<serde_json::Value> {
|
||||
let mut messages =
|
||||
|
|
@ -108,6 +107,78 @@ impl PromptAssembly {
|
|||
messages.extend(self.volatile_scratch.clone());
|
||||
messages
|
||||
}
|
||||
|
||||
/// Trim the assembly to fit within a cost mode budget.
|
||||
///
|
||||
/// Returns the number of turns compacted (0 if under budget already).
|
||||
/// The immutable prefix is never trimmed — only the appendable log and
|
||||
/// volatile scratch are candidates.
|
||||
pub fn trim_to_budget(&mut self, mode: crate::cost::CostMode) -> usize {
|
||||
let budget = mode.session_max_bytes();
|
||||
let prefix_bytes = self.immutable_prefix.len() as u64;
|
||||
let remaining = budget.saturating_sub(prefix_bytes);
|
||||
|
||||
// How many bytes in appendable + volatile
|
||||
let log_bytes: u64 = self
|
||||
.appendable_log
|
||||
.iter()
|
||||
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||
.sum();
|
||||
let scratch_bytes: u64 = self
|
||||
.volatile_scratch
|
||||
.iter()
|
||||
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||
.sum();
|
||||
let total_var = log_bytes + scratch_bytes;
|
||||
|
||||
if total_var <= remaining {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Drop volatile scratch first (cheapest)
|
||||
let mut trimmed = 0;
|
||||
if !self.volatile_scratch.is_empty() {
|
||||
self.volatile_scratch.clear();
|
||||
trimmed += 1;
|
||||
}
|
||||
|
||||
// If still over budget, trim oldest appendable entries
|
||||
let bytes_after_scratch: u64 = self
|
||||
.appendable_log
|
||||
.iter()
|
||||
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||
.sum();
|
||||
if bytes_after_scratch > remaining {
|
||||
let overflow = bytes_after_scratch - remaining;
|
||||
let mut removed = 0u64;
|
||||
// Keep at least 2 messages (system prefix already excluded)
|
||||
let min_keep = 2usize;
|
||||
while self.appendable_log.len() > min_keep && removed < overflow {
|
||||
let front_bytes = serde_json::to_string(&self.appendable_log[0])
|
||||
.unwrap_or_default()
|
||||
.len() as u64;
|
||||
self.appendable_log.remove(0);
|
||||
removed += front_bytes;
|
||||
trimmed += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Recompute totals
|
||||
self.total_bytes = prefix_bytes
|
||||
+ self
|
||||
.appendable_log
|
||||
.iter()
|
||||
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||
.sum::<u64>()
|
||||
+ self
|
||||
.volatile_scratch
|
||||
.iter()
|
||||
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||
.sum::<u64>();
|
||||
self.estimated_tokens = self.total_bytes.div_ceil(3);
|
||||
|
||||
trimmed
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
|
|
@ -515,6 +586,7 @@ impl Session {
|
|||
#[cfg(test)]
|
||||
mod t14_tests {
|
||||
use super::*;
|
||||
use crate::cost::CostMode;
|
||||
|
||||
#[test]
|
||||
fn prompt_assembly_to_messages_includes_all_regions() {
|
||||
|
|
@ -565,6 +637,150 @@ mod t14_tests {
|
|||
m.record(200, 300);
|
||||
assert!((m.hit_rate() - 0.3).abs() < 0.001);
|
||||
}
|
||||
|
||||
// ── trim_to_budget tests ─────────────────────────────────────
|
||||
|
||||
fn make_assembly(prefix: &str, log_count: usize, scratch_count: usize) -> PromptAssembly {
|
||||
let prefix = prefix.to_string();
|
||||
let prefix_bytes = prefix.len() as u64;
|
||||
let mut log = Vec::new();
|
||||
let mut log_bytes = 0u64;
|
||||
let content = format!("msg {}", "x".repeat(10000)); // ~10KB per message
|
||||
for i in 0..log_count {
|
||||
let m = serde_json::json!({"role": "user", "content": format!("{content} #{i}")});
|
||||
log_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
|
||||
log.push(m);
|
||||
}
|
||||
let mut scratch = Vec::new();
|
||||
let mut scratch_bytes = 0u64;
|
||||
for i in 0..scratch_count {
|
||||
let m =
|
||||
serde_json::json!({"role": "user", "content": format!("scratch {content} #{i}")});
|
||||
scratch_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
|
||||
scratch.push(m);
|
||||
}
|
||||
PromptAssembly {
|
||||
immutable_prefix: prefix,
|
||||
appendable_log: log,
|
||||
volatile_scratch: scratch,
|
||||
total_bytes: prefix_bytes + log_bytes + scratch_bytes,
|
||||
estimated_tokens: (prefix_bytes + log_bytes + scratch_bytes).div_ceil(3),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_to_budget_under_limit_returns_zero() {
|
||||
let mut a = make_assembly("SYS", 2, 1);
|
||||
// "SYS" = 3 bytes + 2 messages (~50 bytes each) + 1 scratch = well under Smart 2M
|
||||
let trimmed = a.trim_to_budget(CostMode::Smart);
|
||||
assert_eq!(trimmed, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_to_budget_reduces_size() {
|
||||
let big = "x".repeat(50000);
|
||||
let mut a = make_assembly(&big, 80, 10);
|
||||
let orig_bytes = a.total_bytes;
|
||||
assert!(
|
||||
orig_bytes > CostMode::Fast.session_max_bytes(),
|
||||
"orig {orig_bytes} > budget {}",
|
||||
CostMode::Fast.session_max_bytes()
|
||||
);
|
||||
let trimmed = a.trim_to_budget(CostMode::Fast);
|
||||
assert!(trimmed > 0, "should have trimmed");
|
||||
assert!(
|
||||
a.total_bytes <= CostMode::Fast.session_max_bytes(),
|
||||
"under budget after trim"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_to_budget_preserves_prefix() {
|
||||
let prefix = "IMMUTABLE_PREFIX_DO_NOT_TOUCH";
|
||||
let mut a = make_assembly(prefix, 1000, 5);
|
||||
let _ = a.trim_to_budget(CostMode::Fast);
|
||||
assert_eq!(a.immutable_prefix, prefix, "prefix untouched");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_to_budget_deterministic() {
|
||||
let mut a1 = make_assembly("SYS", 20, 0);
|
||||
let mut a2 = a1.clone();
|
||||
a1.trim_to_budget(CostMode::Smart);
|
||||
a2.trim_to_budget(CostMode::Smart);
|
||||
assert_eq!(a1.appendable_log.len(), a2.appendable_log.len());
|
||||
assert_eq!(a1.total_bytes, a2.total_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
// ── auto-escalation tests ───────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod cost_tests {
|
||||
use super::super::cost::*;
|
||||
|
||||
#[test]
|
||||
fn auto_escalate_fast_budget_exceeded() {
|
||||
let trigger = EscalationTrigger::BudgetExceeded {
|
||||
current_bytes: 600_000,
|
||||
budget_bytes: 500_000,
|
||||
};
|
||||
assert_eq!(
|
||||
auto_escalate(CostMode::Fast, &trigger),
|
||||
Some(CostMode::Smart)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_escalate_smart_budget_exceeded() {
|
||||
let trigger = EscalationTrigger::BudgetExceeded {
|
||||
current_bytes: 3_000_000,
|
||||
budget_bytes: 2_000_000,
|
||||
};
|
||||
assert_eq!(
|
||||
auto_escalate(CostMode::Smart, &trigger),
|
||||
Some(CostMode::Max)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_escalate_max_no_further() {
|
||||
let trigger = EscalationTrigger::BudgetExceeded {
|
||||
current_bytes: 10_000_000,
|
||||
budget_bytes: 8_000_000,
|
||||
};
|
||||
assert_eq!(auto_escalate(CostMode::Max, &trigger), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_escalate_under_budget_no_escalation() {
|
||||
let trigger = EscalationTrigger::BudgetExceeded {
|
||||
current_bytes: 100_000,
|
||||
budget_bytes: 500_000,
|
||||
};
|
||||
assert_eq!(auto_escalate(CostMode::Fast, &trigger), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_escalate_compaction_insufficient() {
|
||||
let trigger = EscalationTrigger::CompactionInsufficient {
|
||||
freed_bytes: 50,
|
||||
needed_bytes: 500,
|
||||
};
|
||||
assert_eq!(
|
||||
auto_escalate(CostMode::Smart, &trigger),
|
||||
Some(CostMode::Max)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_escalate_compaction_sufficient_no_escalation() {
|
||||
let trigger = EscalationTrigger::CompactionInsufficient {
|
||||
freed_bytes: 500,
|
||||
needed_bytes: 50,
|
||||
};
|
||||
assert_eq!(auto_escalate(CostMode::Smart, &trigger), None);
|
||||
}
|
||||
}
|
||||
|
||||
// Tests deferred to crate-level integration tests (tests/ directory).
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue