diff --git a/crates/colibri-daemon/src/cost.rs b/crates/colibri-daemon/src/cost.rs
index b37833e..f6e487f 100644
--- a/crates/colibri-daemon/src/cost.rs
+++ b/crates/colibri-daemon/src/cost.rs
@@ -94,7 +94,7 @@ pub fn escalate(current: CostMode) -> CostMode {
     let next = match current {
         CostMode::Fast => CostMode::Smart,
         CostMode::Smart => CostMode::Max,
-        CostMode::Max => CostMode::Max, // already at ceiling
+        CostMode::Max => CostMode::Max,
     };
 
     if next != current {
@@ -108,6 +108,47 @@ pub fn escalate(current: CostMode) -> CostMode {
     next
 }
 
+/// What triggered an escalation.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum EscalationTrigger {
+    /// Session byte count exceeded current mode budget.
+    BudgetExceeded {
+        current_bytes: u64,
+        budget_bytes: u64,
+    },
+    /// Compaction didn't free enough space.
+    CompactionInsufficient { freed_bytes: u64, needed_bytes: u64 },
+}
+
+/// Auto-escalate if the trigger warrants it, returning the new mode.
+/// Returns `None` if no escalation is needed (already at Max, or trigger
+/// doesn't cross a threshold).
+pub fn auto_escalate(current: CostMode, trigger: &EscalationTrigger) -> Option<CostMode> {
+    if current == CostMode::Max {
+        return None; // already at ceiling
+    }
+
+    let should_escalate = match trigger {
+        EscalationTrigger::BudgetExceeded {
+            current_bytes,
+            budget_bytes,
+        } => current_bytes > budget_bytes,
+        EscalationTrigger::CompactionInsufficient {
+            freed_bytes,
+            needed_bytes,
+        } => freed_bytes < needed_bytes,
+    };
+
+    if should_escalate {
+        let next = escalate(current);
+        if next != current {
+            return Some(next);
+        }
+    }
+
+    None
+}
+
 // ---------------------------------------------------------------------------
 // Tool result compaction
 // ---------------------------------------------------------------------------
diff --git a/crates/colibri-daemon/src/session.rs b/crates/colibri-daemon/src/session.rs
index b618708..5e9d763 100644
--- a/crates/colibri-daemon/src/session.rs
+++ b/crates/colibri-daemon/src/session.rs
@@ -95,7 +95,6 @@ pub struct PromptAssembly {
     pub total_bytes: u64,
     pub estimated_tokens: u64,
 }
-
 impl PromptAssembly {
     pub fn to_messages(&self) -> Vec<serde_json::Value> {
         let mut messages =
@@ -108,6 +107,78 @@ impl PromptAssembly {
         messages.extend(self.volatile_scratch.clone());
         messages
     }
+
+    /// Trim the assembly to fit within a cost mode budget.
+    ///
+    /// Returns the number of turns compacted (0 if under budget already).
+    /// The immutable prefix is never trimmed — only the appendable log and
+    /// volatile scratch are candidates.
+    pub fn trim_to_budget(&mut self, mode: crate::cost::CostMode) -> usize {
+        let budget = mode.session_max_bytes();
+        let prefix_bytes = self.immutable_prefix.len() as u64;
+        let remaining = budget.saturating_sub(prefix_bytes);
+
+        // How many bytes in appendable + volatile
+        let log_bytes: u64 = self
+            .appendable_log
+            .iter()
+            .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
+            .sum();
+        let scratch_bytes: u64 = self
+            .volatile_scratch
+            .iter()
+            .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
+            .sum();
+        let total_var = log_bytes + scratch_bytes;
+
+        if total_var <= remaining {
+            return 0;
+        }
+
+        // Drop volatile scratch first (cheapest)
+        let mut trimmed = 0;
+        if !self.volatile_scratch.is_empty() {
+            self.volatile_scratch.clear();
+            trimmed += 1;
+        }
+
+        // If still over budget, trim oldest appendable entries
+        let bytes_after_scratch: u64 = self
+            .appendable_log
+            .iter()
+            .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
+            .sum();
+        if bytes_after_scratch > remaining {
+            let overflow = bytes_after_scratch - remaining;
+            let mut removed = 0u64;
+            // Keep at least 2 messages (system prefix already excluded)
+            let min_keep = 2usize;
+            while self.appendable_log.len() > min_keep && removed < overflow {
+                let front_bytes = serde_json::to_string(&self.appendable_log[0])
+                    .unwrap_or_default()
+                    .len() as u64;
+                self.appendable_log.remove(0);
+                removed += front_bytes;
+                trimmed += 1;
+            }
+        }
+
+        // Recompute totals
+        self.total_bytes = prefix_bytes
+            + self
+                .appendable_log
+                .iter()
+                .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
+                .sum::<u64>()
+            + self
+                .volatile_scratch
+                .iter()
+                .map(|m| serde_json::to_string(m).unwrap_or_default().len() as u64)
+                .sum::<u64>();
+        self.estimated_tokens = self.total_bytes.div_ceil(3);
+
+        trimmed
+    }
 }
 
 #[derive(Debug, Clone, Default)]
@@ -515,6 +586,7 @@ impl Session {
 #[cfg(test)]
 mod t14_tests {
     use super::*;
+    use crate::cost::CostMode;
 
     #[test]
     fn prompt_assembly_to_messages_includes_all_regions() {
@@ -565,6 +637,150 @@ mod t14_tests {
         m.record(200, 300);
         assert!((m.hit_rate() - 0.3).abs() < 0.001);
     }
+
+    // ── trim_to_budget tests ─────────────────────────────────────
+
+    fn make_assembly(prefix: &str, log_count: usize, scratch_count: usize) -> PromptAssembly {
+        let prefix = prefix.to_string();
+        let prefix_bytes = prefix.len() as u64;
+        let mut log = Vec::new();
+        let mut log_bytes = 0u64;
+        let content = format!("msg {}", "x".repeat(10000)); // ~10KB per message
+        for i in 0..log_count {
+            let m = serde_json::json!({"role": "user", "content": format!("{content} #{i}")});
+            log_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
+            log.push(m);
+        }
+        let mut scratch = Vec::new();
+        let mut scratch_bytes = 0u64;
+        for i in 0..scratch_count {
+            let m =
+                serde_json::json!({"role": "user", "content": format!("scratch {content} #{i}")});
+            scratch_bytes += serde_json::to_string(&m).unwrap_or_default().len() as u64;
+            scratch.push(m);
+        }
+        PromptAssembly {
+            immutable_prefix: prefix,
+            appendable_log: log,
+            volatile_scratch: scratch,
+            total_bytes: prefix_bytes + log_bytes + scratch_bytes,
+            estimated_tokens: (prefix_bytes + log_bytes + scratch_bytes).div_ceil(3),
+        }
+    }
+
+    #[test]
+    fn trim_to_budget_under_limit_returns_zero() {
+        let mut a = make_assembly("SYS", 2, 1);
+        // "SYS" = 3 bytes + 2 messages (~50 bytes each) + 1 scratch = well under Smart 2M
+        let trimmed = a.trim_to_budget(CostMode::Smart);
+        assert_eq!(trimmed, 0);
+    }
+
+    #[test]
+    fn trim_to_budget_reduces_size() {
+        let big = "x".repeat(50000);
+        let mut a = make_assembly(&big, 80, 10);
+        let orig_bytes = a.total_bytes;
+        assert!(
+            orig_bytes > CostMode::Fast.session_max_bytes(),
+            "orig {orig_bytes} > budget {}",
+            CostMode::Fast.session_max_bytes()
+        );
+        let trimmed = a.trim_to_budget(CostMode::Fast);
+        assert!(trimmed > 0, "should have trimmed");
+        assert!(
+            a.total_bytes <= CostMode::Fast.session_max_bytes(),
+            "under budget after trim"
+        );
+    }
+
+    #[test]
+    fn trim_to_budget_preserves_prefix() {
+        let prefix = "IMMUTABLE_PREFIX_DO_NOT_TOUCH";
+        let mut a = make_assembly(prefix, 1000, 5);
+        let _ = a.trim_to_budget(CostMode::Fast);
+        assert_eq!(a.immutable_prefix, prefix, "prefix untouched");
+    }
+
+    #[test]
+    fn trim_to_budget_deterministic() {
+        let mut a1 = make_assembly("SYS", 20, 0);
+        let mut a2 = a1.clone();
+        a1.trim_to_budget(CostMode::Smart);
+        a2.trim_to_budget(CostMode::Smart);
+        assert_eq!(a1.appendable_log.len(), a2.appendable_log.len());
+        assert_eq!(a1.total_bytes, a2.total_bytes);
+    }
+}
+
+// ── auto-escalation tests ───────────────────────────────────────────
+
+#[cfg(test)]
+mod cost_tests {
+    use super::super::cost::*;
+
+    #[test]
+    fn auto_escalate_fast_budget_exceeded() {
+        let trigger = EscalationTrigger::BudgetExceeded {
+            current_bytes: 600_000,
+            budget_bytes: 500_000,
+        };
+        assert_eq!(
+            auto_escalate(CostMode::Fast, &trigger),
+            Some(CostMode::Smart)
+        );
+    }
+
+    #[test]
+    fn auto_escalate_smart_budget_exceeded() {
+        let trigger = EscalationTrigger::BudgetExceeded {
+            current_bytes: 3_000_000,
+            budget_bytes: 2_000_000,
+        };
+        assert_eq!(
+            auto_escalate(CostMode::Smart, &trigger),
+            Some(CostMode::Max)
+        );
+    }
+
+    #[test]
+    fn auto_escalate_max_no_further() {
+        let trigger = EscalationTrigger::BudgetExceeded {
+            current_bytes: 10_000_000,
+            budget_bytes: 8_000_000,
+        };
+        assert_eq!(auto_escalate(CostMode::Max, &trigger), None);
+    }
+
+    #[test]
+    fn auto_escalate_under_budget_no_escalation() {
+        let trigger = EscalationTrigger::BudgetExceeded {
+            current_bytes: 100_000,
+            budget_bytes: 500_000,
+        };
+        assert_eq!(auto_escalate(CostMode::Fast, &trigger), None);
+    }
+
+    #[test]
+    fn auto_escalate_compaction_insufficient() {
+        let trigger = EscalationTrigger::CompactionInsufficient {
+            freed_bytes: 50,
+            needed_bytes: 500,
+        };
+        assert_eq!(
+            auto_escalate(CostMode::Smart, &trigger),
+            Some(CostMode::Max)
+        );
+    }
+
+    #[test]
+    fn auto_escalate_compaction_sufficient_no_escalation() {
+        let trigger = EscalationTrigger::CompactionInsufficient {
+            freed_bytes: 500,
+            needed_bytes: 50,
+        };
+        assert_eq!(auto_escalate(CostMode::Smart, &trigger), None);
+    }
 }
 
 // Tests deferred to crate-level integration tests (tests/ directory).