Probe: large stable prefix yields live DeepSeek cache hit (Sam & Claude)

Short prompts don't engage DeepSeek prefix caching. Inflated the immutable prefix past the cacheable threshold (~3.7k tokens, byte-stable, COLIBRI_PROBE_PREFIX_REPEAT) + a 2s commit delay (COLIBRI_PROBE_DELAY_MS). Live run on domedog: second call served 3584/3661 prompt tokens from cache (~98% hit), cache_hit_observed=true, model deepseek-v4-flash.

Records the key-free result as manifests/2026-05-26-domedog-deepseek-cache-result.json (proof gate #2 evidence) and adds it as a colibri-contracts golden test (6 pass). No key in repo; .env stays gitignored.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Sam & Claude 2026-05-26 14:51:27 +02:00
parent c14fe3ff79
commit aa2b851748
4 changed files with 58 additions and 2 deletions

View file

@ -9,6 +9,8 @@ const DOMEDOG_INV: &str =
const OSA_RUN: &str = include_str!("../../../manifests/2026-05-26-osa-pi-bump-run-manifest.json");
const DOMEDOG_RUN: &str =
include_str!("../../../manifests/2026-05-26-domedog-linux-lane-run-manifest.json");
const DOMEDOG_CACHE: &str =
include_str!("../../../manifests/2026-05-26-domedog-deepseek-cache-result.json");
fn roundtrip_eq<T>(value: &T)
where
@ -58,6 +60,17 @@ fn parses_domedog_run_manifest() {
roundtrip_eq(&run);
}
#[test]
fn parses_domedog_live_cache_result() {
let res: ProviderSmokeResult =
serde_json::from_str(DOMEDOG_CACHE).expect("parse live cache result");
assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA);
assert_eq!(res.status, "ok");
assert!(res.cache_hit_observed, "expected a cache hit");
assert!(res.cache_hit_tokens > 0);
roundtrip_eq(&res);
}
#[test]
fn provider_smoke_skipped_shape() {
// No live result committed yet; assert the struct accepts a skipped result.

View file

@ -7,7 +7,7 @@ description = "DeepSeek client + prefix-cache accounting for Colibri"
[dependencies]
colibri-contracts = { path = "../colibri-contracts" }
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] }
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"

View file

@ -112,12 +112,19 @@ async fn one_call(
cfg: &ProbeConfig,
key: &str,
) -> Result<(Option<String>, ProviderUsage), Box<dyn std::error::Error>> {
// Inflate the immutable prefix past DeepSeek's cacheable threshold (short
// prompts don't engage prefix caching). Byte-stable across both calls.
let repeat: usize = std::env::var("COLIBRI_PROBE_PREFIX_REPEAT")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(48);
let system_prefix = STABLE_SYSTEM_PREFIX.repeat(repeat.max(1));
let body = ChatRequest {
model: &cfg.model,
messages: vec![
ChatMessage {
role: "system",
content: STABLE_SYSTEM_PREFIX,
content: &system_prefix,
},
ChatMessage {
role: "user",
@ -184,6 +191,12 @@ pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult {
result.model = model;
}
result.warm_usage = Some(warm);
// Give DeepSeek a moment to commit the prefix to cache.
let delay_ms: u64 = std::env::var("COLIBRI_PROBE_DELAY_MS")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(2000);
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
match one_call(&client, cfg, key).await {
Err(e) => {
result.status = "error".to_string();

View file

@ -0,0 +1,30 @@
{
"schema": "clawdie.provider-smoke.result.v1",
"test_id": "colibri-probe-20260526T125049Z",
"host": "domedog",
"agent": "claude-domedog",
"provider": "deepseek",
"model": "deepseek-v4-flash",
"started_at": "2026-05-26T12:50:49.094525958+00:00",
"ended_at": "2026-05-26T12:50:52.934293738+00:00",
"status": "ok",
"warm_usage": {
"prompt_tokens": 3661,
"completion_tokens": 1,
"total_tokens": 3662,
"prompt_cache_hit_tokens": 3584,
"prompt_cache_miss_tokens": 77
},
"probe_usage": {
"prompt_tokens": 3661,
"completion_tokens": 1,
"total_tokens": 3662,
"prompt_cache_hit_tokens": 3584,
"prompt_cache_miss_tokens": 77
},
"cache_hit_tokens": 3584,
"cache_hit_observed": true,
"notes": [
"prefix cache HIT on second request"
]
}