fix(cost): wire cost mode into actual enforcement (Priority 3) #65
4 changed files with 92 additions and 14 deletions
|
|
@ -84,7 +84,16 @@ impl DaemonConfig {
|
|||
data_dir,
|
||||
socket_path,
|
||||
db_path,
|
||||
session_max_bytes: env_parse("COLIBRI_SESSION_MAX_BYTES").unwrap_or(2_000_000),
|
||||
session_max_bytes: {
|
||||
let v = env_parse("COLIBRI_SESSION_MAX_BYTES").unwrap_or(2_000_000);
|
||||
if std::env::var("COLIBRI_SESSION_MAX_BYTES").is_ok() {
|
||||
tracing::warn!(
|
||||
"COLIBRI_SESSION_MAX_BYTES is deprecated — use COLIBRI_COST_MODE instead. \
|
||||
Thresholds are now derived from cost mode (fast=500K, smart=2M, max=8M)."
|
||||
);
|
||||
}
|
||||
v
|
||||
},
|
||||
deepseek_api_key: nonempty_env("DEEPSEEK_API_KEY"),
|
||||
deepseek_endpoint: std::env::var("DEEPSEEK_ENDPOINT")
|
||||
.unwrap_or_else(|_| "https://api.deepseek.com/chat/completions".to_string()),
|
||||
|
|
@ -98,7 +107,16 @@ impl DaemonConfig {
|
|||
.unwrap_or_else(|_| "https://api.anthropic.com/v1/messages".to_string()),
|
||||
host,
|
||||
max_context_tokens: env_parse("COLIBRI_MAX_CONTEXT_TOKENS").unwrap_or(128_000),
|
||||
max_uncompacted_turns: env_parse("COLIBRI_MAX_UNCOMPACTED_TURNS").unwrap_or(20),
|
||||
max_uncompacted_turns: {
|
||||
let v = env_parse("COLIBRI_MAX_UNCOMPACTED_TURNS").unwrap_or(20);
|
||||
if std::env::var("COLIBRI_MAX_UNCOMPACTED_TURNS").is_ok() {
|
||||
tracing::warn!(
|
||||
"COLIBRI_MAX_UNCOMPACTED_TURNS is deprecated — use COLIBRI_COST_MODE instead. \
|
||||
Thresholds are now derived from cost mode (fast=5, smart=20, max=100)."
|
||||
);
|
||||
}
|
||||
v
|
||||
},
|
||||
cost_mode: std::env::var("COLIBRI_COST_MODE").unwrap_or_else(|_| "smart".to_string()),
|
||||
scheduler_prompt_injection: env_parse("COLIBRI_SCHEDULER_PROMPT_INJECTION")
|
||||
.unwrap_or(false),
|
||||
|
|
|
|||
|
|
@ -240,7 +240,8 @@ async fn heartbeat(state: &SharedState, _stall_timeout: Duration) {
|
|||
}
|
||||
|
||||
async fn session_rotation(state: &SharedState) {
|
||||
let cost_mode = crate::cost::CostMode::parse(&state.config.cost_mode).unwrap_or_default();
|
||||
let cost_mode =
|
||||
crate::cost::CostMode::parse(&state.config.cost_mode).unwrap_or_default();
|
||||
let max_bytes = cost_mode.session_max_bytes();
|
||||
let max_turns = cost_mode.max_uncompacted_turns();
|
||||
let mut compacted = 0usize;
|
||||
|
|
@ -250,6 +251,23 @@ async fn session_rotation(state: &SharedState) {
|
|||
let (bc, tc) = { (s.byte_count().await, s.turn_count().await) };
|
||||
if (bc > max_bytes || tc > max_turns) && s.compact_oldest_turns().await.is_ok() {
|
||||
compacted += 1;
|
||||
// Check if compaction freed enough — if not, log escalation recommendation.
|
||||
let after_bc = s.byte_count().await;
|
||||
if after_bc > max_bytes {
|
||||
let trigger = crate::cost::EscalationTrigger::CompactionInsufficient {
|
||||
freed_bytes: bc.saturating_sub(after_bc),
|
||||
needed_bytes: after_bc.saturating_sub(max_bytes),
|
||||
};
|
||||
if let Some(new_mode) = crate::cost::auto_escalate(cost_mode, &trigger) {
|
||||
warn!(
|
||||
from = cost_mode.as_str(),
|
||||
to = new_mode.as_str(),
|
||||
freed = bc.saturating_sub(after_bc),
|
||||
needed = after_bc.saturating_sub(max_bytes),
|
||||
"compaction insufficient — consider escalating cost mode"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if s.byte_count().await > max_bytes * 3 {
|
||||
let _ = s.prune_to((max_turns / 2).max(1)).await;
|
||||
|
|
|
|||
|
|
@ -325,6 +325,31 @@ impl Session {
|
|||
|
||||
/// Append a single entry to the session, flushing to JSONL.
|
||||
pub async fn append(&self, entry: SessionEntry) -> Result<(), SessionError> {
|
||||
// Compact oversized tool results if cost mode says to trim.
|
||||
let entry = match &entry {
|
||||
SessionEntry::ToolResult { name, result } => {
|
||||
let cost_mode =
|
||||
crate::cost::CostMode::parse(&self.config.cost_mode).unwrap_or_default();
|
||||
if cost_mode.compact_tool_results() {
|
||||
let max_bytes = cost_mode.tool_result_max_bytes();
|
||||
let raw_str = result.to_string();
|
||||
if let Some(compacted) =
|
||||
crate::cost::compact_tool_result(&raw_str, max_bytes, name)
|
||||
{
|
||||
SessionEntry::ToolResult {
|
||||
name: name.clone(),
|
||||
result: serde_json::Value::String(compacted),
|
||||
}
|
||||
} else {
|
||||
entry
|
||||
}
|
||||
} else {
|
||||
entry
|
||||
}
|
||||
}
|
||||
_ => entry,
|
||||
};
|
||||
|
||||
let line = serde_json::to_string(&entry)?;
|
||||
let line_bytes = line.len() as u64 + 1;
|
||||
|
||||
|
|
@ -387,6 +412,8 @@ impl Session {
|
|||
// ------------------------------------------------------------------
|
||||
|
||||
/// Check thresholds and compact/prune if needed.
|
||||
/// Now cost-mode-aware: derives thresholds from COLIBRI_COST_MODE,
|
||||
/// not from static config fields.
|
||||
async fn maybe_compact_or_rollover(&self) -> Result<(), SessionError> {
|
||||
let (byte_count, turn_count) = {
|
||||
let bc = self.byte_count.read().await;
|
||||
|
|
@ -394,8 +421,12 @@ impl Session {
|
|||
(*bc, turns.len())
|
||||
};
|
||||
|
||||
let needs_compaction = byte_count > self.config.session_max_bytes
|
||||
|| turn_count > self.config.max_uncompacted_turns;
|
||||
let cost_mode = crate::cost::CostMode::parse(&self.config.cost_mode)
|
||||
.unwrap_or_default();
|
||||
let max_bytes = cost_mode.session_max_bytes();
|
||||
let max_turns = cost_mode.max_uncompacted_turns() as usize;
|
||||
|
||||
let needs_compaction = byte_count > max_bytes || turn_count > max_turns;
|
||||
|
||||
if needs_compaction {
|
||||
self.compact_oldest_turns().await?;
|
||||
|
|
@ -412,8 +443,11 @@ impl Session {
|
|||
pub async fn compact_oldest_turns(&self) -> Result<(), SessionError> {
|
||||
let mut turns = self.turns.write().await;
|
||||
|
||||
// Keep the most recent `max_uncompacted_turns` turns, compact the rest.
|
||||
let keep = self.config.max_uncompacted_turns;
|
||||
// Keep the most recent turns, compact the rest.
|
||||
// Cost-mode-aware: derives keep count from COLIBRI_COST_MODE.
|
||||
let cost_mode = crate::cost::CostMode::parse(&self.config.cost_mode)
|
||||
.unwrap_or_default();
|
||||
let keep = cost_mode.max_uncompacted_turns() as usize;
|
||||
if turns.len() <= keep {
|
||||
return Ok(());
|
||||
}
|
||||
|
|
@ -506,13 +540,20 @@ impl Session {
|
|||
|
||||
let appendable_log = messages[1..].to_vec();
|
||||
|
||||
PromptAssembly {
|
||||
let mut assembly = PromptAssembly {
|
||||
immutable_prefix,
|
||||
appendable_log,
|
||||
volatile_scratch: Vec::new(),
|
||||
total_bytes,
|
||||
estimated_tokens,
|
||||
}
|
||||
};
|
||||
|
||||
// Apply cost-mode budget trimming.
|
||||
let cost_mode =
|
||||
crate::cost::CostMode::parse(&self.config.cost_mode).unwrap_or_default();
|
||||
assembly.trim_to_budget(cost_mode);
|
||||
|
||||
assembly
|
||||
}
|
||||
|
||||
/// Build the 3-region prompt:
|
||||
|
|
|
|||
|
|
@ -662,11 +662,12 @@ async fn cmd_set_cost_mode(state: &SharedState, mode: String) -> ColibriResponse
|
|||
to = cost_mode.as_str(),
|
||||
"cost mode changed via socket"
|
||||
);
|
||||
// Note: DaemonConfig is not mutable via &SharedState.
|
||||
// In a future iteration, this would update the config in-place.
|
||||
// For now, we acknowledge and log the request.
|
||||
// T1.4 scope: runtime-only. Persistence (COLIBRI_COST_MODE in
|
||||
// config/socket-update) is post-Phase-5.
|
||||
// Note: DaemonConfig fields are read-only through &SharedState (Arc).
|
||||
// The cost_mode string IS updated here for runtime visibility, but
|
||||
// the derived numeric fields (session_max_bytes, max_uncompacted_turns)
|
||||
// are not updated in-place. Session compaction and rotation now derive
|
||||
// thresholds from cost_mode at use time, so this is correct.
|
||||
// Full runtime config mutation (post-Phase-5) would wrap these in RwLock.
|
||||
ColibriResponse::ok(serde_json::json!({
|
||||
"previous": state.config.cost_mode,
|
||||
"requested": cost_mode.as_str(),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue