hermes-bsd/tests/hermes_cli/test_codex_cli_model_picker.py

193 lines
6.5 KiB
Python
Raw Permalink Normal View History

fix(tests): unstick CI — sweep stale tests from recent merges (#12670) One source fix (web_server category merge) + five test updates that didn't travel with their feature PRs. All 13 failures on the 04-19 CI run on main are now accounted for (5 already self-healed on main; 8 fixed here). Changes - web_server.py: add code_execution → agent to _CATEGORY_MERGE (new singleton section from #11971 broke no-single-field-category invariant). - test_browser_camofox_state: bump hardcoded _config_version 18 → 19 (also from #11971). - test_registry: add browser_cdp_tool (#12369) and discord_tool (#4753) to the expected built-in tool set. - test_run_agent::test_tool_call_accumulation: rewrite fragment chunks — #0f778f77 switched streaming name-accumulation from += to = to fix MiniMax/NIM duplication; the test still encoded the old fragment-per-chunk premise. - test_concurrent_interrupt::_Stub: no-op _apply_pending_steer_to_tool_results — #12116 added this call after concurrent tool batches; the hand-rolled stub was missing it. - test_codex_cli_model_picker: drop the two obsolete tests that asserted auto-import from ~/.codex/auth.json into the Hermes auth store. #12360 explicitly removed that behavior (refresh-token reuse races with Codex CLI / VS Code); adoption is now explicit via `hermes auth openai-codex`. Remaining 3 tests in the file (normal path, Claude Code fallback, negative case) still cover the picker. Validation - scripts/run_tests.sh across all 6 affected files + surrounding tests (54 tests total) all green locally.
2026-04-19 12:39:58 -07:00
"""Regression tests for the /model picker's credential-discovery paths.
fix(tests): unstick CI — sweep stale tests from recent merges (#12670) One source fix (web_server category merge) + five test updates that didn't travel with their feature PRs. All 13 failures on the 04-19 CI run on main are now accounted for (5 already self-healed on main; 8 fixed here). Changes - web_server.py: add code_execution → agent to _CATEGORY_MERGE (new singleton section from #11971 broke no-single-field-category invariant). - test_browser_camofox_state: bump hardcoded _config_version 18 → 19 (also from #11971). - test_registry: add browser_cdp_tool (#12369) and discord_tool (#4753) to the expected built-in tool set. - test_run_agent::test_tool_call_accumulation: rewrite fragment chunks — #0f778f77 switched streaming name-accumulation from += to = to fix MiniMax/NIM duplication; the test still encoded the old fragment-per-chunk premise. - test_concurrent_interrupt::_Stub: no-op _apply_pending_steer_to_tool_results — #12116 added this call after concurrent tool batches; the hand-rolled stub was missing it. - test_codex_cli_model_picker: drop the two obsolete tests that asserted auto-import from ~/.codex/auth.json into the Hermes auth store. #12360 explicitly removed that behavior (refresh-token reuse races with Codex CLI / VS Code); adoption is now explicit via `hermes auth openai-codex`. Remaining 3 tests in the file (normal path, Claude Code fallback, negative case) still cover the picker. Validation - scripts/run_tests.sh across all 6 affected files + surrounding tests (54 tests total) all green locally.
2026-04-19 12:39:58 -07:00
Covers:
- Normal path (tokens already in Hermes auth store)
- Claude Code fallback (tokens only in ~/.claude/.credentials.json)
- Negative case (no credentials anywhere)
fix(tests): unstick CI — sweep stale tests from recent merges (#12670) One source fix (web_server category merge) + five test updates that didn't travel with their feature PRs. All 13 failures on the 04-19 CI run on main are now accounted for (5 already self-healed on main; 8 fixed here). Changes - web_server.py: add code_execution → agent to _CATEGORY_MERGE (new singleton section from #11971 broke no-single-field-category invariant). - test_browser_camofox_state: bump hardcoded _config_version 18 → 19 (also from #11971). - test_registry: add browser_cdp_tool (#12369) and discord_tool (#4753) to the expected built-in tool set. - test_run_agent::test_tool_call_accumulation: rewrite fragment chunks — #0f778f77 switched streaming name-accumulation from += to = to fix MiniMax/NIM duplication; the test still encoded the old fragment-per-chunk premise. - test_concurrent_interrupt::_Stub: no-op _apply_pending_steer_to_tool_results — #12116 added this call after concurrent tool batches; the hand-rolled stub was missing it. - test_codex_cli_model_picker: drop the two obsolete tests that asserted auto-import from ~/.codex/auth.json into the Hermes auth store. #12360 explicitly removed that behavior (refresh-token reuse races with Codex CLI / VS Code); adoption is now explicit via `hermes auth openai-codex`. Remaining 3 tests in the file (normal path, Claude Code fallback, negative case) still cover the picker. Validation - scripts/run_tests.sh across all 6 affected files + surrounding tests (54 tests total) all green locally.
2026-04-19 12:39:58 -07:00
Note: auto-import from ~/.codex/auth.json was removed in #12360 — Hermes
now owns its own openai-codex auth state, and users explicitly adopt
existing Codex CLI tokens via `hermes auth openai-codex`. The old
"Codex CLI shared file" discovery tests were removed with that change.
"""
import base64
import json
import time
from pathlib import Path
import pytest
def _make_fake_jwt(expiry_offset: int = 3600) -> str:
"""Build a fake JWT with a future expiry."""
header = base64.urlsafe_b64encode(b'{"alg":"RS256"}').rstrip(b"=").decode()
exp = int(time.time()) + expiry_offset
payload_bytes = json.dumps({"exp": exp, "sub": "test"}).encode()
payload = base64.urlsafe_b64encode(payload_bytes).rstrip(b"=").decode()
return f"{header}.{payload}.fakesig"
@pytest.fixture()
def hermes_auth_only_env(tmp_path, monkeypatch):
"""Tokens already in Hermes auth store (no Codex CLI needed)."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Point CODEX_HOME to nonexistent dir to prove it's not needed
monkeypatch.setenv("CODEX_HOME", str(tmp_path / "no_codex"))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 2,
"providers": {
"openai-codex": {
"tokens": {
"access_token": _make_fake_jwt(),
"refresh_token": "fake-refresh",
},
"last_refresh": "2026-04-12T00:00:00Z",
}
},
}))
for var in [
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
"NOUS_API_KEY", "DEEPSEEK_API_KEY",
]:
monkeypatch.delenv(var, raising=False)
return hermes_home
def test_normal_path_still_works(hermes_auth_only_env):
"""openai-codex appears when tokens are already in Hermes auth store."""
from hermes_cli.model_switch import list_authenticated_providers
providers = list_authenticated_providers(
current_provider="openai-codex",
max_models=10,
)
slugs = [p["slug"] for p in providers]
assert "openai-codex" in slugs
2026-05-04 01:45:32 +00:00
def test_codex_picker_uses_live_codex_catalog(hermes_auth_only_env, tmp_path, monkeypatch):
"""The gateway /model picker should surface Codex CLI-only listed models."""
from hermes_cli.model_switch import list_authenticated_providers
codex_home = tmp_path / "codex-home"
codex_home.mkdir()
(codex_home / "models_cache.json").write_text(json.dumps({
"models": [
{"slug": "gpt-5.5", "priority": 0, "supported_in_api": True},
{"slug": "gpt-5.3-codex-spark", "priority": 7, "supported_in_api": False},
]
}))
monkeypatch.setenv("CODEX_HOME", str(codex_home))
# Force the cache fallback path — without this the test issues a real
# 10s HTTP probe to chatgpt.com/backend-api/codex/models which is both
# slow and non-deterministic in CI/sandboxed environments.
monkeypatch.setattr(
"hermes_cli.codex_models._fetch_models_from_api",
lambda access_token: [],
)
2026-05-04 01:45:32 +00:00
providers = list_authenticated_providers(
current_provider="openai-codex",
max_models=10,
)
codex = next(p for p in providers if p["slug"] == "openai-codex")
assert "gpt-5.3-codex-spark" in codex["models"]
assert codex["total_models"] == len(codex["models"])
@pytest.fixture()
def claude_code_only_env(tmp_path, monkeypatch):
"""Set up an environment where Anthropic credentials only exist in
~/.claude/.credentials.json (Claude Code) not in env vars or Hermes
auth store."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# No Codex CLI
monkeypatch.setenv("CODEX_HOME", str(tmp_path / "no_codex"))
(hermes_home / "auth.json").write_text(
json.dumps({"version": 2, "providers": {}})
)
# Claude Code credentials in the correct format
claude_dir = tmp_path / ".claude"
claude_dir.mkdir()
(claude_dir / ".credentials.json").write_text(json.dumps({
"claudeAiOauth": {
"accessToken": _make_fake_jwt(),
"refreshToken": "fake-refresh",
"expiresAt": int(time.time() * 1000) + 3_600_000,
}
}))
# Patch Path.home() so the adapter finds the file
monkeypatch.setattr(Path, "home", classmethod(lambda cls: tmp_path))
for var in [
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
"ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
"NOUS_API_KEY", "DEEPSEEK_API_KEY",
]:
monkeypatch.delenv(var, raising=False)
return hermes_home
def test_claude_code_file_detected_by_model_picker(claude_code_only_env):
"""anthropic should appear when credentials only exist in ~/.claude/.credentials.json."""
from hermes_cli.model_switch import list_authenticated_providers
providers = list_authenticated_providers(
current_provider="anthropic",
max_models=10,
)
slugs = [p["slug"] for p in providers]
assert "anthropic" in slugs, (
f"anthropic not found in /model picker providers: {slugs}"
)
anthropic = next(p for p in providers if p["slug"] == "anthropic")
assert anthropic["is_current"] is True
assert anthropic["total_models"] > 0
def test_no_codex_when_no_credentials(tmp_path, monkeypatch):
"""openai-codex should NOT appear when no credentials exist anywhere."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("CODEX_HOME", str(tmp_path / "no_codex"))
(hermes_home / "auth.json").write_text(
json.dumps({"version": 2, "providers": {}})
)
for var in [
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
"NOUS_API_KEY", "DEEPSEEK_API_KEY", "COPILOT_GITHUB_TOKEN",
"GH_TOKEN", "GEMINI_API_KEY",
]:
monkeypatch.delenv(var, raising=False)
from hermes_cli.model_switch import list_authenticated_providers
providers = list_authenticated_providers(
current_provider="openrouter",
max_models=10,
)
slugs = [p["slug"] for p in providers]
assert "openai-codex" not in slugs, (
"openai-codex should not appear without any credentials"
)