mirror of
https://github.com/patriceckhart/zot.git
synced 2026-06-26 21:36:31 +02:00
Adds baseUrl support in models.json for local models (ollama, vLLM, etc). Migrates all install URLs and references from zot.patriceckhart.com to www.zot.sh.
316 lines
11 KiB
Go
316 lines
11 KiB
Go
package provider
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
)
|
|
|
|
// Model describes a single LLM we know about.
|
|
type Model struct {
|
|
Provider string // "anthropic" | "openai"
|
|
ID string // API id
|
|
DisplayName string
|
|
ContextWindow int
|
|
MaxOutput int
|
|
Reasoning bool // supports reasoning/thinking
|
|
|
|
// Prices are USD per 1M tokens.
|
|
PriceInput float64
|
|
PriceOutput float64
|
|
PriceCacheRead float64
|
|
PriceCacheWrite float64
|
|
|
|
// Speculative marks models whose ids are known from the upstream
|
|
// vendor's CLI but not yet live on their public API. They'll 404
|
|
// today but start working the moment the provider flips the switch.
|
|
Speculative bool
|
|
|
|
// BaseURL overrides the provider's default API endpoint for this
|
|
// model. Optional; when empty the provider's default (or the
|
|
// --base-url flag) is used. Useful for local models served by
|
|
// ollama, vLLM, LM Studio, etc.
|
|
BaseURL string
|
|
|
|
// Source is where this model entry came from: "catalog" (baked in),
|
|
// "live" (discovered via /v1/models), or "cache" (loaded from the
|
|
// on-disk cache). Informational.
|
|
Source string
|
|
}
|
|
|
|
// Catalog is the hardcoded, read-only list of supported models.
|
|
// Prices are USD per 1M tokens. The list is curated to what zot's
|
|
// clients (Anthropic Messages + OpenAI Chat Completions) can actually
|
|
// talk to; models that are only reachable through the OpenAI Responses
|
|
// API (o1-pro, o3-pro, gpt-5-pro) are omitted.
|
|
var Catalog = []Model{
|
|
// ---- Anthropic / Claude 4.x ----
|
|
{
|
|
Provider: "anthropic", ID: "claude-sonnet-4-5", DisplayName: "Claude Sonnet 4.5",
|
|
ContextWindow: 200000, MaxOutput: 64000, Reasoning: true,
|
|
PriceInput: 3.00, PriceOutput: 15.00, PriceCacheRead: 0.30, PriceCacheWrite: 3.75,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-opus-4-1", DisplayName: "Claude Opus 4.1",
|
|
ContextWindow: 200000, MaxOutput: 32000, Reasoning: true,
|
|
PriceInput: 15.00, PriceOutput: 75.00, PriceCacheRead: 1.50, PriceCacheWrite: 18.75,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-opus-4-0", DisplayName: "Claude Opus 4",
|
|
ContextWindow: 200000, MaxOutput: 32000, Reasoning: true,
|
|
PriceInput: 15.00, PriceOutput: 75.00, PriceCacheRead: 1.50, PriceCacheWrite: 18.75,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-sonnet-4-0", DisplayName: "Claude Sonnet 4",
|
|
ContextWindow: 200000, MaxOutput: 64000, Reasoning: true,
|
|
PriceInput: 3.00, PriceOutput: 15.00, PriceCacheRead: 0.30, PriceCacheWrite: 3.75,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-haiku-4-5", DisplayName: "Claude Haiku 4.5",
|
|
ContextWindow: 200000, MaxOutput: 64000, Reasoning: true,
|
|
PriceInput: 1.00, PriceOutput: 5.00, PriceCacheRead: 0.10, PriceCacheWrite: 1.25,
|
|
},
|
|
|
|
// ---- Anthropic / Claude 3.x (legacy) ----
|
|
{
|
|
Provider: "anthropic", ID: "claude-3-7-sonnet-20250219", DisplayName: "Claude Sonnet 3.7",
|
|
ContextWindow: 200000, MaxOutput: 64000, Reasoning: true,
|
|
PriceInput: 3.00, PriceOutput: 15.00, PriceCacheRead: 0.30, PriceCacheWrite: 3.75,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-3-5-sonnet-20241022", DisplayName: "Claude Sonnet 3.5 v2",
|
|
ContextWindow: 200000, MaxOutput: 8192, Reasoning: false,
|
|
PriceInput: 3.00, PriceOutput: 15.00, PriceCacheRead: 0.30, PriceCacheWrite: 3.75,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-3-5-haiku-latest", DisplayName: "Claude Haiku 3.5",
|
|
ContextWindow: 200000, MaxOutput: 8192, Reasoning: false,
|
|
PriceInput: 0.80, PriceOutput: 4.00, PriceCacheRead: 0.08, PriceCacheWrite: 1.00,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-3-opus-20240229", DisplayName: "Claude Opus 3",
|
|
ContextWindow: 200000, MaxOutput: 4096, Reasoning: false,
|
|
PriceInput: 15.00, PriceOutput: 75.00, PriceCacheRead: 1.50, PriceCacheWrite: 18.75,
|
|
},
|
|
|
|
// ---- OpenAI / GPT-5 family ----
|
|
{
|
|
Provider: "openai", ID: "gpt-5", DisplayName: "GPT-5",
|
|
ContextWindow: 400000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 1.25, PriceOutput: 10.00, PriceCacheRead: 0.125,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5-mini", DisplayName: "GPT-5 mini",
|
|
ContextWindow: 400000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 0.25, PriceOutput: 2.00, PriceCacheRead: 0.025,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5-nano", DisplayName: "GPT-5 nano",
|
|
ContextWindow: 400000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 0.05, PriceOutput: 0.40, PriceCacheRead: 0.005,
|
|
},
|
|
|
|
// ---- OpenAI / GPT-4.1 family ----
|
|
{
|
|
Provider: "openai", ID: "gpt-4.1", DisplayName: "GPT-4.1",
|
|
ContextWindow: 1047576, MaxOutput: 32768, Reasoning: false,
|
|
PriceInput: 2.00, PriceOutput: 8.00, PriceCacheRead: 0.50,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-4.1-mini", DisplayName: "GPT-4.1 mini",
|
|
ContextWindow: 1047576, MaxOutput: 32768, Reasoning: false,
|
|
PriceInput: 0.40, PriceOutput: 1.60, PriceCacheRead: 0.10,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-4.1-nano", DisplayName: "GPT-4.1 nano",
|
|
ContextWindow: 1047576, MaxOutput: 32768, Reasoning: false,
|
|
PriceInput: 0.10, PriceOutput: 0.40, PriceCacheRead: 0.03,
|
|
},
|
|
|
|
// ---- OpenAI / GPT-4o family ----
|
|
{
|
|
Provider: "openai", ID: "gpt-4o", DisplayName: "GPT-4o",
|
|
ContextWindow: 128000, MaxOutput: 16384, Reasoning: false,
|
|
PriceInput: 2.50, PriceOutput: 10.00, PriceCacheRead: 1.25,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-4o-mini", DisplayName: "GPT-4o mini",
|
|
ContextWindow: 128000, MaxOutput: 16384, Reasoning: false,
|
|
PriceInput: 0.15, PriceOutput: 0.60, PriceCacheRead: 0.08,
|
|
},
|
|
|
|
// ---- OpenAI / reasoning models ----
|
|
{
|
|
Provider: "openai", ID: "o4-mini", DisplayName: "o4-mini",
|
|
ContextWindow: 200000, MaxOutput: 100000, Reasoning: true,
|
|
PriceInput: 1.10, PriceOutput: 4.40, PriceCacheRead: 0.275,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "o3", DisplayName: "o3",
|
|
ContextWindow: 200000, MaxOutput: 100000, Reasoning: true,
|
|
PriceInput: 2.00, PriceOutput: 8.00, PriceCacheRead: 0.50,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "o3-mini", DisplayName: "o3-mini",
|
|
ContextWindow: 200000, MaxOutput: 100000, Reasoning: true,
|
|
PriceInput: 1.10, PriceOutput: 4.40, PriceCacheRead: 0.55,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "o1", DisplayName: "o1",
|
|
ContextWindow: 200000, MaxOutput: 100000, Reasoning: true,
|
|
PriceInput: 15.00, PriceOutput: 60.00, PriceCacheRead: 7.50,
|
|
},
|
|
|
|
// ---- Speculative: Anthropic ----
|
|
{
|
|
Provider: "anthropic", ID: "claude-opus-4-5", DisplayName: "Claude Opus 4.5",
|
|
// 200k ctx / 64k maxOutput per Anthropic's published sizing
|
|
// for the opus-4-5 family; the 1M context is a 4.6+ thing.
|
|
ContextWindow: 200000, MaxOutput: 64000, Reasoning: true,
|
|
PriceInput: 5.00, PriceOutput: 25.00, PriceCacheRead: 0.50, PriceCacheWrite: 6.25,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-opus-4-6", DisplayName: "Claude Opus 4.6",
|
|
ContextWindow: 1000000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 5.00, PriceOutput: 25.00, PriceCacheRead: 0.50, PriceCacheWrite: 6.25,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-opus-4-7", DisplayName: "Claude Opus 4.7",
|
|
ContextWindow: 1000000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 5.00, PriceOutput: 25.00, PriceCacheRead: 0.50, PriceCacheWrite: 6.25,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "anthropic", ID: "claude-sonnet-4-6", DisplayName: "Claude Sonnet 4.6",
|
|
ContextWindow: 1000000, MaxOutput: 64000, Reasoning: true,
|
|
PriceInput: 3.00, PriceOutput: 15.00, PriceCacheRead: 0.30, PriceCacheWrite: 3.75,
|
|
Speculative: true,
|
|
},
|
|
|
|
// ---- Speculative: OpenAI ----
|
|
// Context windows on the OpenAI gpt-5.x family differ by route:
|
|
// the direct API advertises 400k, the ChatGPT Codex OAuth backend
|
|
// caps at 272k. zot serves both auth modes from one catalog row
|
|
// per id, so we pin to the smaller number to keep the context-usage
|
|
// meter honest under subscription auth. Users on the direct API
|
|
// simply see a conservative headroom estimate.
|
|
{
|
|
Provider: "openai", ID: "gpt-5.1", DisplayName: "GPT-5.1",
|
|
ContextWindow: 272000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 1.25, PriceOutput: 10.00, PriceCacheRead: 0.125,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5.2", DisplayName: "GPT-5.2",
|
|
ContextWindow: 272000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 1.75, PriceOutput: 14.00, PriceCacheRead: 0.175,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5.3", DisplayName: "GPT-5.3",
|
|
ContextWindow: 272000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 1.75, PriceOutput: 14.00, PriceCacheRead: 0.175,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5.4", DisplayName: "GPT-5.4",
|
|
// ContextWindow: 272k across every route we support (OpenAI
|
|
// direct API and the ChatGPT Codex OAuth backend).
|
|
ContextWindow: 272000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 2.50, PriceOutput: 15.00, PriceCacheRead: 0.25,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5.4-mini", DisplayName: "GPT-5.4 mini",
|
|
// ContextWindow: 400k on the OpenAI direct API, 272k on the
|
|
// ChatGPT Codex OAuth backend. We pin to the smaller Codex
|
|
// cap so the context-usage meter is honest under subscription
|
|
// auth; direct-API users simply see a conservative headroom
|
|
// estimate rather than an inflated one.
|
|
ContextWindow: 272000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 0.75, PriceOutput: 4.50, PriceCacheRead: 0.075,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5.5", DisplayName: "GPT-5.5",
|
|
ContextWindow: 400000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 2.50, PriceOutput: 15.00, PriceCacheRead: 0.25,
|
|
Speculative: true,
|
|
},
|
|
{
|
|
Provider: "openai", ID: "gpt-5.5-mini", DisplayName: "GPT-5.5 mini",
|
|
ContextWindow: 400000, MaxOutput: 128000, Reasoning: true,
|
|
PriceInput: 0.75, PriceOutput: 4.50, PriceCacheRead: 0.075,
|
|
Speculative: true,
|
|
},
|
|
}
|
|
|
|
// DefaultModel is used when the user does not specify one.
|
|
var DefaultModel = Catalog[0] // claude-sonnet-4-5
|
|
|
|
// ----- active (merged) catalog -----
|
|
//
|
|
// Callers should use Active() / FindModel / ModelsForProvider for
|
|
// lookups. They return the baked-in Catalog merged with any live
|
|
// models loaded via SetLiveModels.
|
|
|
|
var (
|
|
activeMu sync.RWMutex
|
|
active []Model = Catalog // default: just the static catalog
|
|
)
|
|
|
|
// SetLiveModels replaces the "live" overlay used by the active catalog.
|
|
// Typically called after a successful /v1/models discovery or on load
|
|
// from the on-disk cache.
|
|
func SetLiveModels(live []Model) {
|
|
activeMu.Lock()
|
|
defer activeMu.Unlock()
|
|
if len(live) == 0 {
|
|
active = Catalog
|
|
return
|
|
}
|
|
active = MergeCatalog(live)
|
|
}
|
|
|
|
// Active returns the current merged catalog.
|
|
func Active() []Model {
|
|
activeMu.RLock()
|
|
defer activeMu.RUnlock()
|
|
out := make([]Model, len(active))
|
|
copy(out, active)
|
|
return out
|
|
}
|
|
|
|
// FindModel returns a Model by id, optionally constrained by provider.
|
|
// If provider is empty, the first matching id is returned. Looks up
|
|
// against the merged active catalog.
|
|
func FindModel(provider, id string) (Model, error) {
|
|
for _, m := range Active() {
|
|
if m.ID == id && (provider == "" || m.Provider == provider) {
|
|
return m, nil
|
|
}
|
|
}
|
|
return Model{}, fmt.Errorf("unknown model %q (provider=%q)", id, provider)
|
|
}
|
|
|
|
// ModelsForProvider returns all models for the given provider, from the
|
|
// merged active catalog.
|
|
func ModelsForProvider(provider string) []Model {
|
|
var out []Model
|
|
for _, m := range Active() {
|
|
if m.Provider == provider {
|
|
out = append(out, m)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// ComputeCost returns the USD cost for the given usage on model m.
|
|
func ComputeCost(m Model, u Usage) float64 {
|
|
const per = 1_000_000.0
|
|
return float64(u.InputTokens)*m.PriceInput/per +
|
|
float64(u.OutputTokens)*m.PriceOutput/per +
|
|
float64(u.CacheReadTokens)*m.PriceCacheRead/per +
|
|
float64(u.CacheWriteTokens)*m.PriceCacheWrite/per
|
|
}
|