Merge pull request 'feat: cost-aware trimming + auto-escalation (T1.4 PR 2)' (#4) from t14-pr2-trimming into main
This commit is contained in:
commit
d360dde1b9
2 changed files with 259 additions and 2 deletions
|
|
@ -94,7 +94,7 @@ pub fn escalate(current: CostMode) -> CostMode {
|
||||||
let next = match current {
|
let next = match current {
|
||||||
CostMode::Fast => CostMode::Smart,
|
CostMode::Fast => CostMode::Smart,
|
||||||
CostMode::Smart => CostMode::Max,
|
CostMode::Smart => CostMode::Max,
|
||||||
CostMode::Max => CostMode::Max, // already at ceiling
|
CostMode::Max => CostMode::Max,
|
||||||
};
|
};
|
||||||
|
|
||||||
if next != current {
|
if next != current {
|
||||||
|
|
@ -108,6 +108,47 @@ pub fn escalate(current: CostMode) -> CostMode {
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// What triggered an escalation.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub enum EscalationTrigger {
|
||||||
|
/// Session byte count exceeded current mode budget.
|
||||||
|
BudgetExceeded {
|
||||||
|
current_bytes: u64,
|
||||||
|
budget_bytes: u64,
|
||||||
|
},
|
||||||
|
/// Compaction didn't free enough space.
|
||||||
|
CompactionInsufficient { freed_bytes: u64, needed_bytes: u64 },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Auto-escalate if the trigger warrants it, returning the new mode.
|
||||||
|
/// Returns `None` if no escalation is needed (already at Max, or trigger
|
||||||
|
/// doesn't cross a threshold).
|
||||||
|
pub fn auto_escalate(current: CostMode, trigger: &EscalationTrigger) -> Option<CostMode> {
|
||||||
|
if current == CostMode::Max {
|
||||||
|
return None; // already at ceiling
|
||||||
|
}
|
||||||
|
|
||||||
|
let should_escalate = match trigger {
|
||||||
|
EscalationTrigger::BudgetExceeded {
|
||||||
|
current_bytes,
|
||||||
|
budget_bytes,
|
||||||
|
} => current_bytes > budget_bytes,
|
||||||
|
EscalationTrigger::CompactionInsufficient {
|
||||||
|
freed_bytes,
|
||||||
|
needed_bytes,
|
||||||
|
} => freed_bytes < needed_bytes,
|
||||||
|
};
|
||||||
|
|
||||||
|
if should_escalate {
|
||||||
|
let next = escalate(current);
|
||||||
|
if next != current {
|
||||||
|
return Some(next);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Tool result compaction
|
// Tool result compaction
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,6 @@ pub struct PromptAssembly {
|
||||||
pub total_bytes: u64,
|
pub total_bytes: u64,
|
||||||
pub estimated_tokens: u64,
|
pub estimated_tokens: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PromptAssembly {
|
impl PromptAssembly {
|
||||||
pub fn to_messages(&self) -> Vec<serde_json::Value> {
|
pub fn to_messages(&self) -> Vec<serde_json::Value> {
|
||||||
let mut messages =
|
let mut messages =
|
||||||
|
|
@ -108,6 +107,78 @@ impl PromptAssembly {
|
||||||
messages.extend(self.volatile_scratch.clone());
|
messages.extend(self.volatile_scratch.clone());
|
||||||
messages
|
messages
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Trim the assembly to fit within a cost mode budget.
|
||||||
|
///
|
||||||
|
/// Returns the number of turns compacted (0 if under budget already).
|
||||||
|
/// The immutable prefix is never trimmed — only the appendable log and
|
||||||
|
/// volatile scratch are candidates.
|
||||||
|
pub fn trim_to_budget(&mut self, mode: crate::cost::CostMode) -> usize {
|
||||||
|
let budget = mode.session_max_bytes();
|
||||||
|
let prefix_bytes = self.immutable_prefix.len() as u64;
|
||||||
|
let remaining = budget.saturating_sub(prefix_bytes);
|
||||||
|
|
||||||
|
// How many bytes in appendable + volatile
|
||||||
|
let log_bytes: u64 = self
|
||||||
|
.appendable_log
|
||||||
|
.iter()
|
||||||
|
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||||
|
.sum();
|
||||||
|
let scratch_bytes: u64 = self
|
||||||
|
.volatile_scratch
|
||||||
|
.iter()
|
||||||
|
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||||
|
.sum();
|
||||||
|
let total_var = log_bytes + scratch_bytes;
|
||||||
|
|
||||||
|
if total_var <= remaining {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop volatile scratch first (cheapest)
|
||||||
|
let mut trimmed = 0;
|
||||||
|
if !self.volatile_scratch.is_empty() {
|
||||||
|
self.volatile_scratch.clear();
|
||||||
|
trimmed += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If still over budget, trim oldest appendable entries
|
||||||
|
let bytes_after_scratch: u64 = self
|
||||||
|
.appendable_log
|
||||||
|
.iter()
|
||||||
|
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||||
|
.sum();
|
||||||
|
if bytes_after_scratch > remaining {
|
||||||
|
let overflow = bytes_after_scratch - remaining;
|
||||||
|
let mut removed = 0u64;
|
||||||
|
// Keep at least 2 messages (system prefix already excluded)
|
||||||
|
let min_keep = 2usize;
|
||||||
|
while self.appendable_log.len() > min_keep && removed < overflow {
|
||||||
|
let front_bytes = serde_json::to_string(&self.appendable_log[0])
|
||||||
|
.unwrap_or_default()
|
||||||
|
.len() as u64;
|
||||||
|
self.appendable_log.remove(0);
|
||||||
|
removed += front_bytes;
|
||||||
|
trimmed += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recompute totals
|
||||||
|
self.total_bytes = prefix_bytes
|
||||||
|
+ self
|
||||||
|
.appendable_log
|
||||||
|
.iter()
|
||||||
|
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||||
|
.sum::<u64>()
|
||||||
|
+ self
|
||||||
|
.volatile_scratch
|
||||||
|
.iter()
|
||||||
|
.map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
|
||||||
|
.sum::<u64>();
|
||||||
|
self.estimated_tokens = self.total_bytes.div_ceil(3);
|
||||||
|
|
||||||
|
trimmed
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
|
|
@ -515,6 +586,7 @@ impl Session {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod t14_tests {
|
mod t14_tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::cost::CostMode;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn prompt_assembly_to_messages_includes_all_regions() {
|
fn prompt_assembly_to_messages_includes_all_regions() {
|
||||||
|
|
@ -565,6 +637,150 @@ mod t14_tests {
|
||||||
m.record(200, 300);
|
m.record(200, 300);
|
||||||
assert!((m.hit_rate() - 0.3).abs() < 0.001);
|
assert!((m.hit_rate() - 0.3).abs() < 0.001);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── trim_to_budget tests ─────────────────────────────────────
|
||||||
|
|
||||||
|
fn make_assembly(prefix: &str, log_count: usize, scratch_count: usize) -> PromptAssembly {
|
||||||
|
let prefix = prefix.to_string();
|
||||||
|
let prefix_bytes = prefix.len() as u64;
|
||||||
|
let mut log = Vec::new();
|
||||||
|
let mut log_bytes = 0u64;
|
||||||
|
let content = format!("msg {}", "x".repeat(10000)); // ~10KB per message
|
||||||
|
for i in 0..log_count {
|
||||||
|
let m = serde_json::json!({"role": "user", "content": format!("{content} #{i}")});
|
||||||
|
log_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
|
||||||
|
log.push(m);
|
||||||
|
}
|
||||||
|
let mut scratch = Vec::new();
|
||||||
|
let mut scratch_bytes = 0u64;
|
||||||
|
for i in 0..scratch_count {
|
||||||
|
let m =
|
||||||
|
serde_json::json!({"role": "user", "content": format!("scratch {content} #{i}")});
|
||||||
|
scratch_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
|
||||||
|
scratch.push(m);
|
||||||
|
}
|
||||||
|
PromptAssembly {
|
||||||
|
immutable_prefix: prefix,
|
||||||
|
appendable_log: log,
|
||||||
|
volatile_scratch: scratch,
|
||||||
|
total_bytes: prefix_bytes + log_bytes + scratch_bytes,
|
||||||
|
estimated_tokens: (prefix_bytes + log_bytes + scratch_bytes).div_ceil(3),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trim_to_budget_under_limit_returns_zero() {
|
||||||
|
let mut a = make_assembly("SYS", 2, 1);
|
||||||
|
// "SYS" = 3 bytes + 2 messages (~50 bytes each) + 1 scratch = well under Smart 2M
|
||||||
|
let trimmed = a.trim_to_budget(CostMode::Smart);
|
||||||
|
assert_eq!(trimmed, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trim_to_budget_reduces_size() {
|
||||||
|
let big = "x".repeat(50000);
|
||||||
|
let mut a = make_assembly(&big, 80, 10);
|
||||||
|
let orig_bytes = a.total_bytes;
|
||||||
|
assert!(
|
||||||
|
orig_bytes > CostMode::Fast.session_max_bytes(),
|
||||||
|
"orig {orig_bytes} > budget {}",
|
||||||
|
CostMode::Fast.session_max_bytes()
|
||||||
|
);
|
||||||
|
let trimmed = a.trim_to_budget(CostMode::Fast);
|
||||||
|
assert!(trimmed > 0, "should have trimmed");
|
||||||
|
assert!(
|
||||||
|
a.total_bytes <= CostMode::Fast.session_max_bytes(),
|
||||||
|
"under budget after trim"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trim_to_budget_preserves_prefix() {
|
||||||
|
let prefix = "IMMUTABLE_PREFIX_DO_NOT_TOUCH";
|
||||||
|
let mut a = make_assembly(prefix, 1000, 5);
|
||||||
|
let _ = a.trim_to_budget(CostMode::Fast);
|
||||||
|
assert_eq!(a.immutable_prefix, prefix, "prefix untouched");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trim_to_budget_deterministic() {
|
||||||
|
let mut a1 = make_assembly("SYS", 20, 0);
|
||||||
|
let mut a2 = a1.clone();
|
||||||
|
a1.trim_to_budget(CostMode::Smart);
|
||||||
|
a2.trim_to_budget(CostMode::Smart);
|
||||||
|
assert_eq!(a1.appendable_log.len(), a2.appendable_log.len());
|
||||||
|
assert_eq!(a1.total_bytes, a2.total_bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── auto-escalation tests ───────────────────────────────────────────
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod cost_tests {
|
||||||
|
use super::super::cost::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn auto_escalate_fast_budget_exceeded() {
|
||||||
|
let trigger = EscalationTrigger::BudgetExceeded {
|
||||||
|
current_bytes: 600_000,
|
||||||
|
budget_bytes: 500_000,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
auto_escalate(CostMode::Fast, &trigger),
|
||||||
|
Some(CostMode::Smart)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn auto_escalate_smart_budget_exceeded() {
|
||||||
|
let trigger = EscalationTrigger::BudgetExceeded {
|
||||||
|
current_bytes: 3_000_000,
|
||||||
|
budget_bytes: 2_000_000,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
auto_escalate(CostMode::Smart, &trigger),
|
||||||
|
Some(CostMode::Max)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn auto_escalate_max_no_further() {
|
||||||
|
let trigger = EscalationTrigger::BudgetExceeded {
|
||||||
|
current_bytes: 10_000_000,
|
||||||
|
budget_bytes: 8_000_000,
|
||||||
|
};
|
||||||
|
assert_eq!(auto_escalate(CostMode::Max, &trigger), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn auto_escalate_under_budget_no_escalation() {
|
||||||
|
let trigger = EscalationTrigger::BudgetExceeded {
|
||||||
|
current_bytes: 100_000,
|
||||||
|
budget_bytes: 500_000,
|
||||||
|
};
|
||||||
|
assert_eq!(auto_escalate(CostMode::Fast, &trigger), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn auto_escalate_compaction_insufficient() {
|
||||||
|
let trigger = EscalationTrigger::CompactionInsufficient {
|
||||||
|
freed_bytes: 50,
|
||||||
|
needed_bytes: 500,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
auto_escalate(CostMode::Smart, &trigger),
|
||||||
|
Some(CostMode::Max)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn auto_escalate_compaction_sufficient_no_escalation() {
|
||||||
|
let trigger = EscalationTrigger::CompactionInsufficient {
|
||||||
|
freed_bytes: 500,
|
||||||
|
needed_bytes: 50,
|
||||||
|
};
|
||||||
|
assert_eq!(auto_escalate(CostMode::Smart, &trigger), None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests deferred to crate-level integration tests (tests/ directory).
|
// Tests deferred to crate-level integration tests (tests/ directory).
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue