Probe: large stable prefix yields live DeepSeek cache hit (Sam & Claude)

Short prompts don't engage DeepSeek prefix caching. Inflated the immutable prefix past the cacheable threshold (~3.7k tokens, byte-stable, COLIBRI_PROBE_PREFIX_REPEAT) + a 2s commit delay (COLIBRI_PROBE_DELAY_MS). Live run on domedog: second call served 3584/3661 prompt tokens from cache (~98% hit), cache_hit_observed=true, model deepseek-v4-flash. Records the key-free result as manifests/2026-05-26-domedog-deepseek-cache-result.json (proof gate #2 evidence) and adds it as a colibri-contracts golden test (6 pass). No key in repo; .env stays gitignored. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-26 14:51:27 +02:00 · 2026-05-26 14:51:27 +02:00 · aa2b851748
commit aa2b851748
parent c14fe3ff79
4 changed files with 58 additions and 2 deletions
--- a/crates/colibri-contracts/tests/golden.rs
+++ b/crates/colibri-contracts/tests/golden.rs
@ -9,6 +9,8 @@ const DOMEDOG_INV: &str =
 const OSA_RUN: &str = include_str!("../../../manifests/2026-05-26-osa-pi-bump-run-manifest.json");
 const DOMEDOG_RUN: &str =
    include_str!("../../../manifests/2026-05-26-domedog-linux-lane-run-manifest.json");
+const DOMEDOG_CACHE: &str =
+    include_str!("../../../manifests/2026-05-26-domedog-deepseek-cache-result.json");

 fn roundtrip_eq<T>(value: &T)
 where
@ -58,6 +60,17 @@ fn parses_domedog_run_manifest() {
    roundtrip_eq(&run);
 }

+#[test]
+fn parses_domedog_live_cache_result() {
+    let res: ProviderSmokeResult =
+        serde_json::from_str(DOMEDOG_CACHE).expect("parse live cache result");
+    assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
+    assert_eq!(res.status, "ok");
+    assert!(res.cache_hit_observed, "expected a cache hit");
+    assert!(res.cache_hit_tokens > 0);
+    roundtrip_eq(&res);
+}
+
 #[test]
 fn provider_smoke_skipped_shape() {
    // No live result committed yet; assert the struct accepts a skipped result.
--- a/crates/colibri-deepseek/Cargo.toml
+++ b/crates/colibri-deepseek/Cargo.toml
@ -7,7 +7,7 @@ description = "DeepSeek client + prefix-cache accounting for Colibri"

 [dependencies]
 colibri-contracts = { path = "../colibri-contracts" }
-tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
+tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] }
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
--- a/crates/colibri-deepseek/src/lib.rs
+++ b/crates/colibri-deepseek/src/lib.rs
@ -112,12 +112,19 @@ async fn one_call(
    cfg: &ProbeConfig,
    key: &str,
 ) -> Result<(Option<String>, ProviderUsage), Box<dyn std::error::Error>> {
+    // Inflate the immutable prefix past DeepSeek's cacheable threshold (short
+    // prompts don't engage prefix caching). Byte-stable across both calls.
+    let repeat: usize = std::env::var("COLIBRI_PROBE_PREFIX_REPEAT")
+        .ok()
+        .and_then(|v| v.parse().ok())
+        .unwrap_or(48);
+    let system_prefix = STABLE_SYSTEM_PREFIX.repeat(repeat.max(1));
    let body = ChatRequest {
        model: &cfg.model,
        messages: vec![
            ChatMessage {
                role: "system",
-                content: STABLE_SYSTEM_PREFIX,
+                content: &system_prefix,
            },
            ChatMessage {
                role: "user",
@ -184,6 +191,12 @@ pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult {
                        result.model = model;
                    }
                    result.warm_usage = Some(warm);
+                    // Give DeepSeek a moment to commit the prefix to cache.
+                    let delay_ms: u64 = std::env::var("COLIBRI_PROBE_DELAY_MS")
+                        .ok()
+                        .and_then(|v| v.parse().ok())
+                        .unwrap_or(2000);
+                    tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
                    match one_call(&client, cfg, key).await {
                        Err(e) => {
                            result.status = "error".to_string();
--- a/manifests/2026-05-26-domedog-deepseek-cache-result.json
+++ b/manifests/2026-05-26-domedog-deepseek-cache-result.json
@ -0,0 +1,30 @@
+{
+  "schema": "clawdie.provider-smoke.result.v1",
+  "test_id": "colibri-probe-20260526T125049Z",
+  "host": "domedog",
+  "agent": "claude-domedog",
+  "provider": "deepseek",
+  "model": "deepseek-v4-flash",
+  "started_at": "2026-05-26T12:50:49.094525958+00:00",
+  "ended_at": "2026-05-26T12:50:52.934293738+00:00",
+  "status": "ok",
+  "warm_usage": {
+    "prompt_tokens": 3661,
+    "completion_tokens": 1,
+    "total_tokens": 3662,
+    "prompt_cache_hit_tokens": 3584,
+    "prompt_cache_miss_tokens": 77
+  },
+  "probe_usage": {
+    "prompt_tokens": 3661,
+    "completion_tokens": 1,
+    "total_tokens": 3662,
+    "prompt_cache_hit_tokens": 3584,
+    "prompt_cache_miss_tokens": 77
+  },
+  "cache_hit_tokens": 3584,
+  "cache_hit_observed": true,
+  "notes": [
+    "prefix cache HIT on second request"
+  ]
+}