fix(cost): wire cost mode into actual enforcement (Priority 3) #65

Closed
clawdie wants to merge 1 commit from fix/cost-mode-enforcement into main
4 changed files with 92 additions and 14 deletions

View file

@ -84,7 +84,16 @@ impl DaemonConfig {
data_dir,
socket_path,
db_path,
session_max_bytes: env_parse("COLIBRI_SESSION_MAX_BYTES").unwrap_or(2_000_000),
session_max_bytes: {
let v = env_parse("COLIBRI_SESSION_MAX_BYTES").unwrap_or(2_000_000);
if std::env::var("COLIBRI_SESSION_MAX_BYTES").is_ok() {
tracing::warn!(
"COLIBRI_SESSION_MAX_BYTES is deprecated — use COLIBRI_COST_MODE instead. \
Thresholds are now derived from cost mode (fast=500K, smart=2M, max=8M)."
);
}
v
},
deepseek_api_key: nonempty_env("DEEPSEEK_API_KEY"),
deepseek_endpoint: std::env::var("DEEPSEEK_ENDPOINT")
.unwrap_or_else(|_| "https://api.deepseek.com/chat/completions".to_string()),
@ -98,7 +107,16 @@ impl DaemonConfig {
.unwrap_or_else(|_| "https://api.anthropic.com/v1/messages".to_string()),
host,
max_context_tokens: env_parse("COLIBRI_MAX_CONTEXT_TOKENS").unwrap_or(128_000),
max_uncompacted_turns: env_parse("COLIBRI_MAX_UNCOMPACTED_TURNS").unwrap_or(20),
max_uncompacted_turns: {
let v = env_parse("COLIBRI_MAX_UNCOMPACTED_TURNS").unwrap_or(20);
if std::env::var("COLIBRI_MAX_UNCOMPACTED_TURNS").is_ok() {
tracing::warn!(
"COLIBRI_MAX_UNCOMPACTED_TURNS is deprecated — use COLIBRI_COST_MODE instead. \
Thresholds are now derived from cost mode (fast=5, smart=20, max=100)."
);
}
v
},
cost_mode: std::env::var("COLIBRI_COST_MODE").unwrap_or_else(|_| "smart".to_string()),
scheduler_prompt_injection: env_parse("COLIBRI_SCHEDULER_PROMPT_INJECTION")
.unwrap_or(false),

View file

@ -240,7 +240,8 @@ async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
}
async fn session_rotation(state: &SharedState) {
let cost_mode = crate::cost::CostMode::parse(&state.config.cost_mode).unwrap_or_default();
let cost_mode =
crate::cost::CostMode::parse(&state.config.cost_mode).unwrap_or_default();
let max_bytes = cost_mode.session_max_bytes();
let max_turns = cost_mode.max_uncompacted_turns();
let mut compacted = 0usize;
@ -250,6 +251,23 @@ async fn session_rotation(state: &SharedState) {
let (bc, tc) = { (s.byte_count().await, s.turn_count().await) };
if (bc > max_bytes || tc > max_turns) && s.compact_oldest_turns().await.is_ok() {
compacted += 1;
// Check if compaction freed enough — if not, log escalation recommendation.
let after_bc = s.byte_count().await;
if after_bc > max_bytes {
let trigger = crate::cost::EscalationTrigger::CompactionInsufficient {
freed_bytes: bc.saturating_sub(after_bc),
needed_bytes: after_bc.saturating_sub(max_bytes),
};
if let Some(new_mode) = crate::cost::auto_escalate(cost_mode, &trigger) {
warn!(
from = cost_mode.as_str(),
to = new_mode.as_str(),
freed = bc.saturating_sub(after_bc),
needed = after_bc.saturating_sub(max_bytes),
"compaction insufficient — consider escalating cost mode"
);
}
}
}
if s.byte_count().await > max_bytes * 3 {
let _ = s.prune_to((max_turns / 2).max(1)).await;

View file

@ -325,6 +325,31 @@ impl Session {
/// Append a single entry to the session, flushing to JSONL.
pub async fn append(&self, entry: SessionEntry) -> Result<(), SessionError> {
// Compact oversized tool results if cost mode says to trim.
let entry = match &entry {
SessionEntry::ToolResult { name, result } => {
let cost_mode =
crate::cost::CostMode::parse(&self.config.cost_mode).unwrap_or_default();
if cost_mode.compact_tool_results() {
let max_bytes = cost_mode.tool_result_max_bytes();
let raw_str = result.to_string();
if let Some(compacted) =
crate::cost::compact_tool_result(&raw_str, max_bytes, name)
{
SessionEntry::ToolResult {
name: name.clone(),
result: serde_json::Value::String(compacted),
}
} else {
entry
}
} else {
entry
}
}
_ => entry,
};
let line = serde_json::to_string(&entry)?;
let line_bytes = line.len() as u64 + 1;
@ -387,6 +412,8 @@ impl Session {
// ------------------------------------------------------------------
/// Check thresholds and compact/prune if needed.
/// Now cost-mode-aware: derives thresholds from COLIBRI_COST_MODE,
/// not from static config fields.
async fn maybe_compact_or_rollover(&self) -> Result<(), SessionError> {
let (byte_count, turn_count) = {
let bc = self.byte_count.read().await;
@ -394,8 +421,12 @@ impl Session {
(*bc, turns.len())
};
let needs_compaction = byte_count > self.config.session_max_bytes
|| turn_count > self.config.max_uncompacted_turns;
let cost_mode = crate::cost::CostMode::parse(&self.config.cost_mode)
.unwrap_or_default();
let max_bytes = cost_mode.session_max_bytes();
let max_turns = cost_mode.max_uncompacted_turns() as usize;
let needs_compaction = byte_count > max_bytes || turn_count > max_turns;
if needs_compaction {
self.compact_oldest_turns().await?;
@ -412,8 +443,11 @@ impl Session {
pub async fn compact_oldest_turns(&self) -> Result<(), SessionError> {
let mut turns = self.turns.write().await;
// Keep the most recent `max_uncompacted_turns` turns, compact the rest.
let keep = self.config.max_uncompacted_turns;
// Keep the most recent turns, compact the rest.
// Cost-mode-aware: derives keep count from COLIBRI_COST_MODE.
let cost_mode = crate::cost::CostMode::parse(&self.config.cost_mode)
.unwrap_or_default();
let keep = cost_mode.max_uncompacted_turns() as usize;
if turns.len() <= keep {
return Ok(());
}
@ -506,13 +540,20 @@ impl Session {
let appendable_log = messages[1..].to_vec();
PromptAssembly {
let mut assembly = PromptAssembly {
immutable_prefix,
appendable_log,
volatile_scratch: Vec::new(),
total_bytes,
estimated_tokens,
}
};
// Apply cost-mode budget trimming.
let cost_mode =
crate::cost::CostMode::parse(&self.config.cost_mode).unwrap_or_default();
assembly.trim_to_budget(cost_mode);
assembly
}
/// Build the 3-region prompt:

View file

@ -662,11 +662,12 @@ async fn cmd_set_cost_mode(state: &SharedState, mode: String) -> ColibriResponse
to = cost_mode.as_str(),
"cost mode changed via socket"
);
// Note: DaemonConfig is not mutable via &SharedState.
// In a future iteration, this would update the config in-place.
// For now, we acknowledge and log the request.
// T1.4 scope: runtime-only. Persistence (COLIBRI_COST_MODE in
// config/socket-update) is post-Phase-5.
// Note: DaemonConfig fields are read-only through &SharedState (Arc).
// The cost_mode string IS updated here for runtime visibility, but
// the derived numeric fields (session_max_bytes, max_uncompacted_turns)
// are not updated in-place. Session compaction and rotation now derive
// thresholds from cost_mode at use time, so this is correct.
// Full runtime config mutation (post-Phase-5) would wrap these in RwLock.
ColibriResponse::ok(serde_json::json!({
"previous": state.config.cost_mode,
"requested": cost_mode.as_str(),