diff --git a/crates/colibri-contracts/tests/golden.rs b/crates/colibri-contracts/tests/golden.rs index 8edd186..7ed7a6a 100644 --- a/crates/colibri-contracts/tests/golden.rs +++ b/crates/colibri-contracts/tests/golden.rs @@ -9,6 +9,8 @@ const DOMEDOG_INV: &str = const OSA_RUN: &str = include_str!("../../../manifests/2026-05-26-osa-pi-bump-run-manifest.json"); const DOMEDOG_RUN: &str = include_str!("../../../manifests/2026-05-26-domedog-linux-lane-run-manifest.json"); +const DOMEDOG_CACHE: &str = + include_str!("../../../manifests/2026-05-26-domedog-deepseek-cache-result.json"); fn roundtrip_eq(value: &T) where @@ -58,6 +60,17 @@ fn parses_domedog_run_manifest() { roundtrip_eq(&run); } +#[test] +fn parses_domedog_live_cache_result() { + let res: ProviderSmokeResult = + serde_json::from_str(DOMEDOG_CACHE).expect("parse live cache result"); + assert_eq!(res.schema, PROVIDER_SMOKE_SCHEMA); + assert_eq!(res.status, "ok"); + assert!(res.cache_hit_observed, "expected a cache hit"); + assert!(res.cache_hit_tokens > 0); + roundtrip_eq(&res); +} + #[test] fn provider_smoke_skipped_shape() { // No live result committed yet; assert the struct accepts a skipped result. diff --git a/crates/colibri-deepseek/Cargo.toml b/crates/colibri-deepseek/Cargo.toml index 6606a77..259d82f 100644 --- a/crates/colibri-deepseek/Cargo.toml +++ b/crates/colibri-deepseek/Cargo.toml @@ -7,7 +7,7 @@ description = "DeepSeek client + prefix-cache accounting for Colibri" [dependencies] colibri-contracts = { path = "../colibri-contracts" } -tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } serde = { version = "1", features = ["derive"] } serde_json = "1" diff --git a/crates/colibri-deepseek/src/lib.rs b/crates/colibri-deepseek/src/lib.rs index e8c51c1..8a64f5b 100644 --- a/crates/colibri-deepseek/src/lib.rs +++ b/crates/colibri-deepseek/src/lib.rs @@ -112,12 +112,19 @@ async fn one_call( cfg: &ProbeConfig, key: &str, ) -> Result<(Option, ProviderUsage), Box> { + // Inflate the immutable prefix past DeepSeek's cacheable threshold (short + // prompts don't engage prefix caching). Byte-stable across both calls. + let repeat: usize = std::env::var("COLIBRI_PROBE_PREFIX_REPEAT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(48); + let system_prefix = STABLE_SYSTEM_PREFIX.repeat(repeat.max(1)); let body = ChatRequest { model: &cfg.model, messages: vec![ ChatMessage { role: "system", - content: STABLE_SYSTEM_PREFIX, + content: &system_prefix, }, ChatMessage { role: "user", @@ -184,6 +191,12 @@ pub async fn run_cache_probe(cfg: &ProbeConfig) -> ProviderSmokeResult { result.model = model; } result.warm_usage = Some(warm); + // Give DeepSeek a moment to commit the prefix to cache. + let delay_ms: u64 = std::env::var("COLIBRI_PROBE_DELAY_MS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(2000); + tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await; match one_call(&client, cfg, key).await { Err(e) => { result.status = "error".to_string(); diff --git a/manifests/2026-05-26-domedog-deepseek-cache-result.json b/manifests/2026-05-26-domedog-deepseek-cache-result.json new file mode 100644 index 0000000..ad8a846 --- /dev/null +++ b/manifests/2026-05-26-domedog-deepseek-cache-result.json @@ -0,0 +1,30 @@ +{ + "schema": "clawdie.provider-smoke.result.v1", + "test_id": "colibri-probe-20260526T125049Z", + "host": "domedog", + "agent": "claude-domedog", + "provider": "deepseek", + "model": "deepseek-v4-flash", + "started_at": "2026-05-26T12:50:49.094525958+00:00", + "ended_at": "2026-05-26T12:50:52.934293738+00:00", + "status": "ok", + "warm_usage": { + "prompt_tokens": 3661, + "completion_tokens": 1, + "total_tokens": 3662, + "prompt_cache_hit_tokens": 3584, + "prompt_cache_miss_tokens": 77 + }, + "probe_usage": { + "prompt_tokens": 3661, + "completion_tokens": 1, + "total_tokens": 3662, + "prompt_cache_hit_tokens": 3584, + "prompt_cache_miss_tokens": 77 + }, + "cache_hit_tokens": 3584, + "cache_hit_observed": true, + "notes": [ + "prefix cache HIT on second request" + ] +}