feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
"""Tests for agent.models_dev — models.dev registry integration."""
|
|
|
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
|
|
|
|
|
|
from agent.models_dev import (
|
|
|
|
|
PROVIDER_TO_MODELS_DEV,
|
|
|
|
|
_extract_context,
|
|
|
|
|
fetch_models_dev,
|
2026-04-11 11:07:18 -07:00
|
|
|
get_model_capabilities,
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
lookup_models_dev_context,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SAMPLE_REGISTRY = {
|
|
|
|
|
"anthropic": {
|
|
|
|
|
"id": "anthropic",
|
|
|
|
|
"name": "Anthropic",
|
|
|
|
|
"models": {
|
|
|
|
|
"claude-opus-4-6": {
|
|
|
|
|
"id": "claude-opus-4-6",
|
|
|
|
|
"limit": {"context": 1000000, "output": 128000},
|
|
|
|
|
},
|
|
|
|
|
"claude-sonnet-4-6": {
|
|
|
|
|
"id": "claude-sonnet-4-6",
|
|
|
|
|
"limit": {"context": 1000000, "output": 64000},
|
|
|
|
|
},
|
|
|
|
|
"claude-sonnet-4-0": {
|
|
|
|
|
"id": "claude-sonnet-4-0",
|
|
|
|
|
"limit": {"context": 200000, "output": 64000},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"github-copilot": {
|
|
|
|
|
"id": "github-copilot",
|
|
|
|
|
"name": "GitHub Copilot",
|
|
|
|
|
"models": {
|
|
|
|
|
"claude-opus-4.6": {
|
|
|
|
|
"id": "claude-opus-4.6",
|
|
|
|
|
"limit": {"context": 128000, "output": 32000},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-05-22 21:57:43 +04:00
|
|
|
"xai": {
|
|
|
|
|
"id": "xai",
|
|
|
|
|
"name": "xAI",
|
|
|
|
|
"models": {
|
|
|
|
|
"grok-build-0.1": {
|
|
|
|
|
"id": "grok-build-0.1",
|
|
|
|
|
"limit": {"context": 256000, "output": 64000},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
"kilo": {
|
|
|
|
|
"id": "kilo",
|
|
|
|
|
"name": "Kilo Gateway",
|
|
|
|
|
"models": {
|
|
|
|
|
"anthropic/claude-sonnet-4.6": {
|
|
|
|
|
"id": "anthropic/claude-sonnet-4.6",
|
|
|
|
|
"limit": {"context": 1000000, "output": 128000},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"deepseek": {
|
|
|
|
|
"id": "deepseek",
|
|
|
|
|
"name": "DeepSeek",
|
|
|
|
|
"models": {
|
|
|
|
|
"deepseek-chat": {
|
|
|
|
|
"id": "deepseek-chat",
|
|
|
|
|
"limit": {"context": 128000, "output": 8192},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"audio-only": {
|
|
|
|
|
"id": "audio-only",
|
|
|
|
|
"models": {
|
|
|
|
|
"tts-model": {
|
|
|
|
|
"id": "tts-model",
|
|
|
|
|
"limit": {"context": 0, "output": 0},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestProviderMapping:
|
2026-05-22 21:57:43 +04:00
|
|
|
def test_xai_oauth_uses_xai_catalog(self):
|
|
|
|
|
assert PROVIDER_TO_MODELS_DEV["xai"] == "xai"
|
|
|
|
|
assert PROVIDER_TO_MODELS_DEV["xai-oauth"] == "xai"
|
|
|
|
|
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
def test_unmapped_provider_not_in_dict(self):
|
|
|
|
|
assert "nous" not in PROVIDER_TO_MODELS_DEV
|
2026-04-12 01:44:18 -07:00
|
|
|
|
|
|
|
|
def test_openai_codex_mapped_to_openai(self):
|
|
|
|
|
assert PROVIDER_TO_MODELS_DEV["openai"] == "openai"
|
|
|
|
|
assert PROVIDER_TO_MODELS_DEV["openai-codex"] == "openai"
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestExtractContext:
|
|
|
|
|
def test_valid_entry(self):
|
|
|
|
|
assert _extract_context({"limit": {"context": 128000}}) == 128000
|
|
|
|
|
|
|
|
|
|
def test_zero_context_returns_none(self):
|
|
|
|
|
assert _extract_context({"limit": {"context": 0}}) is None
|
|
|
|
|
|
|
|
|
|
def test_missing_limit_returns_none(self):
|
|
|
|
|
assert _extract_context({"id": "test"}) is None
|
|
|
|
|
|
|
|
|
|
def test_missing_context_returns_none(self):
|
|
|
|
|
assert _extract_context({"limit": {"output": 8192}}) is None
|
|
|
|
|
|
|
|
|
|
def test_non_dict_returns_none(self):
|
|
|
|
|
assert _extract_context("not a dict") is None
|
|
|
|
|
|
|
|
|
|
def test_float_context_coerced_to_int(self):
|
|
|
|
|
assert _extract_context({"limit": {"context": 131072.0}}) == 131072
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLookupModelsDevContext:
|
|
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_exact_match(self, mock_fetch):
|
|
|
|
|
mock_fetch.return_value = SAMPLE_REGISTRY
|
|
|
|
|
assert lookup_models_dev_context("anthropic", "claude-opus-4-6") == 1000000
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_case_insensitive_match(self, mock_fetch):
|
|
|
|
|
mock_fetch.return_value = SAMPLE_REGISTRY
|
|
|
|
|
assert lookup_models_dev_context("anthropic", "Claude-Opus-4-6") == 1000000
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_provider_not_mapped(self, mock_fetch):
|
|
|
|
|
mock_fetch.return_value = SAMPLE_REGISTRY
|
|
|
|
|
assert lookup_models_dev_context("nous", "some-model") is None
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_model_not_found(self, mock_fetch):
|
|
|
|
|
mock_fetch.return_value = SAMPLE_REGISTRY
|
|
|
|
|
assert lookup_models_dev_context("anthropic", "nonexistent-model") is None
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_provider_aware_context(self, mock_fetch):
|
|
|
|
|
"""Same model, different context per provider."""
|
|
|
|
|
mock_fetch.return_value = SAMPLE_REGISTRY
|
|
|
|
|
# Anthropic direct: 1M
|
|
|
|
|
assert lookup_models_dev_context("anthropic", "claude-opus-4-6") == 1000000
|
|
|
|
|
# GitHub Copilot: only 128K for same model
|
|
|
|
|
assert lookup_models_dev_context("copilot", "claude-opus-4.6") == 128000
|
|
|
|
|
|
2026-05-22 21:57:43 +04:00
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_xai_oauth_resolves_xai_context(self, mock_fetch):
|
|
|
|
|
"""xAI OAuth is an auth path, not a separate model catalog."""
|
|
|
|
|
mock_fetch.return_value = SAMPLE_REGISTRY
|
|
|
|
|
assert lookup_models_dev_context("xai-oauth", "grok-build-0.1") == 256000
|
|
|
|
|
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_zero_context_filtered(self, mock_fetch):
|
|
|
|
|
mock_fetch.return_value = SAMPLE_REGISTRY
|
|
|
|
|
# audio-only is not a mapped provider, but test the filtering directly
|
|
|
|
|
data = SAMPLE_REGISTRY["audio-only"]["models"]["tts-model"]
|
|
|
|
|
assert _extract_context(data) is None
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.fetch_models_dev")
|
|
|
|
|
def test_empty_registry(self, mock_fetch):
|
|
|
|
|
mock_fetch.return_value = {}
|
|
|
|
|
assert lookup_models_dev_context("anthropic", "claude-opus-4-6") is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestFetchModelsDev:
|
|
|
|
|
@patch("agent.models_dev.requests.get")
|
|
|
|
|
def test_fetch_success(self, mock_get):
|
|
|
|
|
mock_resp = MagicMock()
|
|
|
|
|
mock_resp.status_code = 200
|
|
|
|
|
mock_resp.json.return_value = SAMPLE_REGISTRY
|
|
|
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
|
mock_get.return_value = mock_resp
|
|
|
|
|
|
|
|
|
|
# Clear caches
|
|
|
|
|
import agent.models_dev as md
|
|
|
|
|
md._models_dev_cache = {}
|
|
|
|
|
md._models_dev_cache_time = 0
|
|
|
|
|
|
|
|
|
|
with patch.object(md, "_save_disk_cache"):
|
|
|
|
|
result = fetch_models_dev(force_refresh=True)
|
|
|
|
|
|
|
|
|
|
assert "anthropic" in result
|
|
|
|
|
assert len(result) == len(SAMPLE_REGISTRY)
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.requests.get")
|
|
|
|
|
def test_fetch_failure_returns_stale_cache(self, mock_get):
|
|
|
|
|
mock_get.side_effect = Exception("network error")
|
|
|
|
|
|
|
|
|
|
import agent.models_dev as md
|
|
|
|
|
md._models_dev_cache = SAMPLE_REGISTRY
|
|
|
|
|
md._models_dev_cache_time = 0 # expired
|
|
|
|
|
|
|
|
|
|
with patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY):
|
|
|
|
|
result = fetch_models_dev(force_refresh=True)
|
|
|
|
|
|
|
|
|
|
assert "anthropic" in result
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.requests.get")
|
|
|
|
|
def test_in_memory_cache_used(self, mock_get):
|
|
|
|
|
import agent.models_dev as md
|
|
|
|
|
import time
|
|
|
|
|
md._models_dev_cache = SAMPLE_REGISTRY
|
|
|
|
|
md._models_dev_cache_time = time.time() # fresh
|
|
|
|
|
|
|
|
|
|
result = fetch_models_dev()
|
|
|
|
|
mock_get.assert_not_called()
|
|
|
|
|
assert result == SAMPLE_REGISTRY
|
2026-04-11 11:07:18 -07:00
|
|
|
|
perf(models_dev): cache-first lookup, skip network when disk cache is fresh (#22808)
`fetch_models_dev()` is on the hot path of every `AIAgent.__init__`
(via `context_compressor → get_model_context_length`). The previous
policy was "always try network first, only fall back to disk if
network fails," so every fresh `hermes chat` / `hermes gateway` /
batch / cron process paid 250-500 ms re-fetching a 2 MB JSON registry
that was already on disk from earlier runs.
Add a stage 2 between in-mem and network: if
`models_dev_cache.json` exists and its mtime is younger than the
existing `_MODELS_DEV_CACHE_TTL` (1 hour, same TTL the in-mem cache
already uses), load from disk and skip the network call.
The in-mem TTL is anchored to the disk file's age, so a 50-min-old
cache stays in-memory for only 10 more minutes — no surprise
extension of staleness window.
Invariants preserved:
- `force_refresh=True` still always hits the network and only falls
back to disk on failure (`hermes config refresh` semantics).
- Missing disk cache → fall through to network (first-ever run).
- Stale disk cache (mtime > TTL) → fall through to network.
- Negative file age (clock skew) → fall through to network.
- Network failure → existing stage-4 stale-disk fallback unchanged.
Measured impact (3-run medians, 9950X3D, fresh process per run):
fetch_models_dev cold: 256 → 17 ms (-93%)
hermes chat -q wall: 4.00 → 3.73 s (-7% median)
3.99 → 3.60 s (-10% min)
The chat-end-to-end win is bounded below by API latency variance, but
the fetch_models_dev microbenchmark is the cleanest signal: 239 ms
shaved off every fresh-process agent construction.
Win compounds with the previous perf PRs:
#22681 google_chat lazy-load
#22766 doctor parallel + IMDS off
#22790 gateway.platforms PEP 562
Tests: all 30 `tests/agent/test_models_dev.py` pass (added 4 new ones
covering the new disk-cache-first path, force_refresh override, stale
disk fallback, and missing-disk-cache fall-through). Full `tests/agent/`
suite: 2560 passed, 0 failed.
2026-05-09 13:32:38 -07:00
|
|
|
@patch("agent.models_dev.requests.get")
|
|
|
|
|
def test_fresh_disk_cache_skips_network(self, mock_get):
|
|
|
|
|
"""When in-mem cache is empty but disk cache exists and is fresh by
|
|
|
|
|
mtime (< TTL), fetch_models_dev returns disk data without ever
|
|
|
|
|
making the network call.
|
|
|
|
|
|
|
|
|
|
This is the cold-start fast path: every fresh process previously
|
|
|
|
|
paid ~500 ms re-fetching a registry that was already on disk
|
|
|
|
|
from an earlier run.
|
|
|
|
|
"""
|
|
|
|
|
import agent.models_dev as md
|
|
|
|
|
# Empty in-mem cache so stage 1 doesn't short-circuit.
|
|
|
|
|
md._models_dev_cache = {}
|
|
|
|
|
md._models_dev_cache_time = 0
|
|
|
|
|
|
|
|
|
|
with patch.object(md, "_disk_cache_age_seconds", return_value=60.0), \
|
|
|
|
|
patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY):
|
|
|
|
|
result = fetch_models_dev()
|
|
|
|
|
|
|
|
|
|
# The whole point: no network call.
|
|
|
|
|
mock_get.assert_not_called()
|
|
|
|
|
assert "anthropic" in result
|
|
|
|
|
# In-mem cache populated so subsequent calls within the same
|
|
|
|
|
# process stay on stage 1.
|
|
|
|
|
assert md._models_dev_cache == SAMPLE_REGISTRY
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.requests.get")
|
|
|
|
|
def test_stale_disk_cache_falls_through_to_network(self, mock_get):
|
|
|
|
|
"""When the disk cache is OLDER than TTL, we must hit the network
|
|
|
|
|
(and only fall back to the stale disk data if network fails)."""
|
|
|
|
|
import agent.models_dev as md
|
|
|
|
|
md._models_dev_cache = {}
|
|
|
|
|
md._models_dev_cache_time = 0
|
|
|
|
|
|
|
|
|
|
mock_resp = MagicMock()
|
|
|
|
|
mock_resp.status_code = 200
|
|
|
|
|
mock_resp.json.return_value = SAMPLE_REGISTRY
|
|
|
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
|
mock_get.return_value = mock_resp
|
|
|
|
|
|
|
|
|
|
# Disk cache exists but is older than the TTL — must NOT short-circuit.
|
|
|
|
|
with patch.object(md, "_disk_cache_age_seconds",
|
|
|
|
|
return_value=md._MODELS_DEV_CACHE_TTL + 60), \
|
|
|
|
|
patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY), \
|
|
|
|
|
patch.object(md, "_save_disk_cache"):
|
|
|
|
|
result = fetch_models_dev()
|
|
|
|
|
|
|
|
|
|
mock_get.assert_called_once()
|
|
|
|
|
assert "anthropic" in result
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.requests.get")
|
|
|
|
|
def test_force_refresh_skips_disk_cache(self, mock_get):
|
|
|
|
|
"""force_refresh=True bypasses BOTH the in-mem cache AND the
|
|
|
|
|
disk-cache fast path. Used by ``hermes config refresh`` and
|
|
|
|
|
anywhere else the user explicitly asked for fresh data.
|
|
|
|
|
"""
|
|
|
|
|
import agent.models_dev as md
|
|
|
|
|
md._models_dev_cache = {}
|
|
|
|
|
md._models_dev_cache_time = 0
|
|
|
|
|
|
|
|
|
|
mock_resp = MagicMock()
|
|
|
|
|
mock_resp.status_code = 200
|
|
|
|
|
mock_resp.json.return_value = SAMPLE_REGISTRY
|
|
|
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
|
mock_get.return_value = mock_resp
|
|
|
|
|
|
|
|
|
|
# Disk cache is fresh, but force_refresh must override it.
|
|
|
|
|
with patch.object(md, "_disk_cache_age_seconds", return_value=60.0), \
|
|
|
|
|
patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY), \
|
|
|
|
|
patch.object(md, "_save_disk_cache"):
|
|
|
|
|
result = fetch_models_dev(force_refresh=True)
|
|
|
|
|
|
|
|
|
|
mock_get.assert_called_once()
|
|
|
|
|
assert "anthropic" in result
|
|
|
|
|
|
|
|
|
|
@patch("agent.models_dev.requests.get")
|
|
|
|
|
def test_missing_disk_cache_falls_through_to_network(self, mock_get):
|
|
|
|
|
"""If the disk cache file doesn't exist (first-ever run, or it
|
|
|
|
|
was deleted), fall through cleanly to network."""
|
|
|
|
|
import agent.models_dev as md
|
|
|
|
|
md._models_dev_cache = {}
|
|
|
|
|
md._models_dev_cache_time = 0
|
|
|
|
|
|
|
|
|
|
mock_resp = MagicMock()
|
|
|
|
|
mock_resp.status_code = 200
|
|
|
|
|
mock_resp.json.return_value = SAMPLE_REGISTRY
|
|
|
|
|
mock_resp.raise_for_status = MagicMock()
|
|
|
|
|
mock_get.return_value = mock_resp
|
|
|
|
|
|
|
|
|
|
with patch.object(md, "_disk_cache_age_seconds", return_value=None), \
|
|
|
|
|
patch.object(md, "_save_disk_cache"):
|
|
|
|
|
result = fetch_models_dev()
|
|
|
|
|
|
|
|
|
|
mock_get.assert_called_once()
|
|
|
|
|
assert "anthropic" in result
|
|
|
|
|
|
2026-04-11 11:07:18 -07:00
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# get_model_capabilities — vision via modalities.input
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CAPS_REGISTRY = {
|
|
|
|
|
"google": {
|
|
|
|
|
"id": "google",
|
|
|
|
|
"models": {
|
|
|
|
|
"gemma-4-31b-it": {
|
|
|
|
|
"id": "gemma-4-31b-it",
|
|
|
|
|
"attachment": False,
|
|
|
|
|
"tool_call": True,
|
|
|
|
|
"modalities": {"input": ["text", "image"]},
|
|
|
|
|
"limit": {"context": 128000, "output": 8192},
|
|
|
|
|
},
|
|
|
|
|
"gemma-3-1b": {
|
|
|
|
|
"id": "gemma-3-1b",
|
|
|
|
|
"tool_call": True,
|
|
|
|
|
"limit": {"context": 32000, "output": 8192},
|
|
|
|
|
},
|
2026-05-03 19:32:26 +08:00
|
|
|
"text-only-with-stale-attachment": {
|
|
|
|
|
"id": "text-only-with-stale-attachment",
|
|
|
|
|
"attachment": True,
|
|
|
|
|
"tool_call": True,
|
|
|
|
|
"modalities": {"input": ["text"]},
|
|
|
|
|
"limit": {"context": 128000, "output": 8192},
|
|
|
|
|
},
|
2026-04-11 11:07:18 -07:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"anthropic": {
|
|
|
|
|
"id": "anthropic",
|
|
|
|
|
"models": {
|
|
|
|
|
"claude-sonnet-4": {
|
|
|
|
|
"id": "claude-sonnet-4",
|
|
|
|
|
"attachment": True,
|
|
|
|
|
"tool_call": True,
|
|
|
|
|
"limit": {"context": 200000, "output": 64000},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestGetModelCapabilities:
|
|
|
|
|
"""Tests for get_model_capabilities vision detection."""
|
|
|
|
|
|
|
|
|
|
def test_vision_from_attachment_flag(self):
|
2026-05-03 19:32:26 +08:00
|
|
|
"""Models with attachment=True and no modalities should report supports_vision=True."""
|
2026-04-11 11:07:18 -07:00
|
|
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
|
|
|
|
caps = get_model_capabilities("anthropic", "claude-sonnet-4")
|
|
|
|
|
assert caps is not None
|
|
|
|
|
assert caps.supports_vision is True
|
|
|
|
|
|
|
|
|
|
def test_vision_from_modalities_input_image(self):
|
|
|
|
|
"""Models with 'image' in modalities.input but attachment=False should
|
|
|
|
|
still report supports_vision=True (the core fix in this PR)."""
|
|
|
|
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
|
|
|
|
caps = get_model_capabilities("google", "gemma-4-31b-it")
|
|
|
|
|
assert caps is not None
|
|
|
|
|
assert caps.supports_vision is True
|
|
|
|
|
|
2026-05-03 19:32:26 +08:00
|
|
|
def test_text_only_modalities_override_stale_attachment_flag(self):
|
|
|
|
|
"""Text-only modalities must win over stale attachment=True metadata."""
|
|
|
|
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
|
|
|
|
caps = get_model_capabilities("google", "text-only-with-stale-attachment")
|
|
|
|
|
assert caps is not None
|
|
|
|
|
assert caps.supports_vision is False
|
|
|
|
|
|
2026-04-11 11:07:18 -07:00
|
|
|
def test_no_vision_without_attachment_or_modalities(self):
|
|
|
|
|
"""Models with neither attachment nor image modality should be non-vision."""
|
|
|
|
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
|
|
|
|
caps = get_model_capabilities("google", "gemma-3-1b")
|
|
|
|
|
assert caps is not None
|
|
|
|
|
assert caps.supports_vision is False
|
|
|
|
|
|
|
|
|
|
def test_modalities_non_dict_handled(self):
|
|
|
|
|
"""Non-dict modalities field should not crash."""
|
|
|
|
|
registry = {
|
|
|
|
|
"google": {"id": "google", "models": {
|
|
|
|
|
"weird-model": {
|
|
|
|
|
"id": "weird-model",
|
|
|
|
|
"modalities": "text", # not a dict
|
|
|
|
|
"limit": {"context": 200000, "output": 8192},
|
|
|
|
|
},
|
|
|
|
|
}},
|
|
|
|
|
}
|
|
|
|
|
with patch("agent.models_dev.fetch_models_dev", return_value=registry):
|
|
|
|
|
caps = get_model_capabilities("gemini", "weird-model")
|
|
|
|
|
assert caps is not None
|
|
|
|
|
assert caps.supports_vision is False
|
|
|
|
|
|
|
|
|
|
def test_model_not_found_returns_none(self):
|
|
|
|
|
"""Unknown model should return None."""
|
|
|
|
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
|
|
|
|
caps = get_model_capabilities("anthropic", "nonexistent-model")
|
|
|
|
|
assert caps is None
|