From d8a0cba4fc071a242713fe4eac67d596c1fe0c52 Mon Sep 17 00:00:00 2001 From: patriceckhart Date: Sat, 18 Apr 2026 09:15:46 +0200 Subject: [PATCH] add telegram bot bridge --- README.md | 53 ++- internal/agent/args.go | 8 + internal/agent/botcmd.go | 433 ++++++++++++++++++++++++ internal/agent/cli.go | 6 + internal/agent/config.go | 59 +++- internal/agent/modes/model_dialog.go | 126 +++++-- internal/agent/modes/telegram/api.go | 264 +++++++++++++++ internal/agent/modes/telegram/bot.go | 407 ++++++++++++++++++++++ internal/agent/modes/telegram/config.go | 64 ++++ internal/agent/modes/telegram/daemon.go | 109 ++++++ internal/agent/modes/telegram/fs.go | 10 + internal/agent/modes/telegram/io.go | 9 + 12 files changed, 1518 insertions(+), 30 deletions(-) create mode 100644 internal/agent/botcmd.go create mode 100644 internal/agent/modes/telegram/api.go create mode 100644 internal/agent/modes/telegram/bot.go create mode 100644 internal/agent/modes/telegram/config.go create mode 100644 internal/agent/modes/telegram/daemon.go create mode 100644 internal/agent/modes/telegram/fs.go create mode 100644 internal/agent/modes/telegram/io.go diff --git a/README.md b/README.md index 6111dbb..93a8d17 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,13 @@ yet another coding agent harness, lightweight and written (vibe-slopped) in go. -one static binary. -two providers atm (anthropic, openai/codex). -four tools (read, write, edit, bash). -three run modes (interactive tui, print, json). -no extensions. -no plugins. -no ceremony. -no community. +- one static binary. +- two providers atm (anthropic, openai/codex). +- four tools (read, write, edit, bash). +- three run modes (interactive tui, print, json). +- built-in telegram bot. +- no extensions atm. +- no community atm. ## install @@ -52,6 +51,14 @@ run `zot` and type `/login`. pick one of two methods: > **note on subscription login**: the oauth client ids used are the ones published in anthropic's claude code cli and openai's codex cli. reusing them from a third-party tool is against their terms of service and may be revoked at any time. use it at your own risk; the api-key flow is the safe default. +### token refresh + +oauth access tokens are short-lived (anthropic ~8h, openai ~30d). zot refreshes them automatically: + +- at every credential lookup, zot checks the stored `expiry` and — if past it (with a 60s safety margin) — hits the provider's `oauth/token` endpoint with the stored `refresh_token`, persists the new `access_token` + `refresh_token` + `expiry` back to `auth.json`, and hands the fresh token to the client. +- the telegram bridge additionally refreshes once per turn so a bot that runs for days keeps working without manual intervention. +- if the refresh itself fails (the `refresh_token` was revoked, or the account was logged out everywhere), the error bubbles up to the caller: the tui shows it in the status line, the bot replies with it in your dm. run `/login` to get a fresh token pair. + all data lives under `$ZOT_HOME`: ``` @@ -200,6 +207,36 @@ frames containing images are full-repainted (no differential diff) to prevent st | `pgup` / `pgdn` | scroll one page up / down | | `up` / `down` (editor empty) | scroll three lines up / down — this is how the mouse wheel reaches the scroll logic on most terminals | +## telegram bot (bridge) + +zot can run as a telegram bot so you can dm it from your phone. it's a built-in subcommand, not a plugin: + +```bash +zot telegram-bot setup # paste a BotFather token, verify, save +zot telegram-bot run # foreground: long-poll in this terminal (ctrl+c to stop) +zot telegram-bot start # background: detach and return immediately +zot telegram-bot stop # sigterm the background bot (sigkill after 5s) +zot telegram-bot logs -f # tail $ZOT_HOME/logs/bot.log (omit -f to just cat) +zot telegram-bot status # config (token masked) + running/stopped +zot telegram-bot reset # forget the token + paired user +# short alias: `zot tg ...` is accepted for every subcommand +``` + +the background flavor writes the child's pid to `$ZOT_HOME/bot.pid` and redirects stdout+stderr to `$ZOT_HOME/logs/bot.log`. `zot telegram-bot stop` reads that pid, sends sigterm, waits up to five seconds, then escalates to sigkill if the child is still alive. running two instances at once is refused at startup. + +> **use the installed binary for `start`.** `go run ./cmd/zot telegram-bot start` won't work — `go run` builds a binary in a temp directory and deletes it when it exits, which kills the detached child. run `make install` (or `go build`) first and invoke the installed binary. + +setup flow: + +1. talk to [@BotFather](https://t.me/BotFather) on telegram, run `/newbot`, copy the token it gives you. +2. run `zot bot setup` and paste the token when prompted. +3. run `zot bot run` in the directory you want the agent to operate in. +4. open your bot on telegram, send `/start`. the first user to do this claims the bridge (stored as `allowed_user_id`); every other user is rejected. + +from then on, any dm you send is forwarded to the agent as a user prompt. attached photos or image/* documents are downloaded and passed to vision-capable models. in-bot telegram commands: `/help`, `/status`, `/stop` (cancel the current turn). config lives in `$ZOT_HOME/bot.json` (mode 0600). + +bot mode respects the usual zot flags — `--provider`, `--model`, `--cwd`, `--reasoning`, `--continue`, `--no-session`, `--no-tools`, etc. run `zot bot run -c --model claude-opus-4-1` to resume the latest session on opus, for example. + ## development ```bash diff --git a/internal/agent/args.go b/internal/agent/args.go index e07ab27..02e0e32 100644 --- a/internal/agent/args.go +++ b/internal/agent/args.go @@ -188,6 +188,14 @@ usage: zot "prompt" interactive, pre-filled prompt zot -p "prompt" print final text, exit zot --json "prompt" newline-delimited json events, exit + zot telegram-bot setup configure a telegram bot (from BotFather) + zot telegram-bot run foreground bridge (ctrl+c to stop) + zot telegram-bot start background bridge (detached) + zot telegram-bot stop stop the background bridge + zot telegram-bot logs [-f] tail the background bridge's log + zot telegram-bot status config + running state + zot telegram-bot reset forget saved token + (short alias: zot tg ...) flags: --provider anthropic|openai diff --git a/internal/agent/botcmd.go b/internal/agent/botcmd.go new file mode 100644 index 0000000..7bfa4eb --- /dev/null +++ b/internal/agent/botcmd.go @@ -0,0 +1,433 @@ +package agent + +import ( + "bufio" + "context" + "errors" + "fmt" + "io" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" + + "github.com/patriceckhart/zot/internal/agent/modes/telegram" + "github.com/patriceckhart/zot/internal/core" +) + +// runBotCommand dispatches `zot telegram-bot ...` subcommands. The +// short alias "tg" is also accepted. Returns true if rawArgs begins +// with a recognised subcommand, false otherwise. +func runBotCommand(rawArgs []string, version string) (handled bool, err error) { + if len(rawArgs) == 0 { + return false, nil + } + switch rawArgs[0] { + case "telegram-bot", "tg": + // recognised + default: + return false, nil + } + sub := "" + var tail []string + if len(rawArgs) >= 2 { + sub = rawArgs[1] + tail = rawArgs[2:] + } + switch sub { + case "", "help", "-h", "--help": + printBotHelp() + return true, nil + case "setup": + return true, botSetup(tail) + case "status": + return true, botStatus() + case "reset": + return true, botReset() + case "run": + return true, botRun(tail, version) + case "start": + return true, botStart(tail) + case "stop": + return true, botStop() + case "logs": + return true, botLogs(tail) + default: + printBotHelp() + return true, fmt.Errorf("unknown bot subcommand %q", sub) + } +} + +// printBotHelp prints usage for `zot bot`. +func printBotHelp() { + fmt.Fprint(os.Stderr, `zot telegram-bot — telegram bridge + +usage: + zot telegram-bot setup paste a BotFather token, verify, save + zot telegram-bot status show bridge config and whether it's running + zot telegram-bot run [flags] run in the foreground (ctrl+c to stop) + zot telegram-bot start [flags] launch in background, detach, return immediately + zot telegram-bot stop sigterm the running background bot, sigkill if needed + zot telegram-bot logs [--follow] tail the background bot's log file + zot telegram-bot reset forget token + allowed user + +setup flow: + 1. talk to @BotFather on telegram, /newbot, copy the token + 2. run "zot telegram-bot setup" and paste the token + 3. run "zot telegram-bot start" (background) or "zot telegram-bot run" (foreground) + 4. send /start to your bot from telegram; the first sender claims it + +while the bot is running, dm it anything and the message is forwarded +to the agent the same way it would be from the tui. image attachments +(photos or image/* documents) are passed to vision-capable models. +telegram commands the bot handles directly: /help, /status, /stop. + +config & state: + $ZOT_HOME/bot.json # bot token + paired user (mode 0600) + $ZOT_HOME/bot.pid # pid of the running bot (written by run/start) + $ZOT_HOME/logs/bot.log # stdout+stderr from "zot telegram-bot start" +`) +} + +// botSetup interactively reads a bot token, verifies it via getMe, and saves it. +func botSetup(_ []string) error { + cfg, err := telegram.LoadConfig(ZotHome()) + if err != nil { + return err + } + + fmt.Print("telegram bot token (from @BotFather): ") + reader := bufio.NewReader(os.Stdin) + line, err := reader.ReadString('\n') + if err != nil { + return err + } + token := strings.TrimSpace(line) + if token == "" { + return fmt.Errorf("no token provided") + } + + client := telegram.NewClient(token) + me, err := client.GetMe(context.Background()) + if err != nil { + return fmt.Errorf("token rejected by telegram: %w", err) + } + cfg.BotToken = token + cfg.BotUsername = me.Username + cfg.BotID = me.ID + // Any stored pairing might be for a different bot; clear it. + cfg.AllowedUserID = 0 + cfg.LastUpdateID = 0 + if err := telegram.SaveConfig(ZotHome(), cfg); err != nil { + return err + } + fmt.Printf("\nsaved: @%s (id=%d) to %s\n", me.Username, me.ID, telegram.ConfigPath(ZotHome())) + fmt.Println("next: run `zot telegram-bot run`, then send /start to your bot from telegram.") + return nil +} + +// botStatus prints the current bot config without the token, plus +// whether the background process is alive. +func botStatus() error { + cfg, err := telegram.LoadConfig(ZotHome()) + if err != nil { + return err + } + if cfg.BotToken == "" { + fmt.Println("telegram: not configured (run `zot telegram-bot setup`)") + return nil + } + maskedTok := maskToken(cfg.BotToken) + fmt.Printf("telegram bot: @%s (id=%d)\n", cfg.BotUsername, cfg.BotID) + fmt.Printf("token: %s\n", maskedTok) + if cfg.AllowedUserID == 0 { + fmt.Println("paired with: (unpaired — send /start from telegram to claim)") + } else { + fmt.Printf("paired with: telegram user id %d\n", cfg.AllowedUserID) + } + fmt.Printf("last update: %d\n", cfg.LastUpdateID) + fmt.Printf("config file: %s\n", telegram.ConfigPath(ZotHome())) + + pid, alive, _ := telegram.IsRunning(ZotHome()) + switch { + case alive: + fmt.Printf("process: running (pid %d)\n", pid) + case pid > 0: + fmt.Printf("process: stopped (stale pid %d in %s)\n", pid, telegram.PIDPath(ZotHome())) + default: + fmt.Println("process: stopped") + } + logPath := telegram.LogPath(ZotHome()) + if fi, err := os.Stat(logPath); err == nil { + fmt.Printf("log file: %s (%d bytes)\n", logPath, fi.Size()) + } + return nil +} + +// botReset wipes the on-disk bot.json entry. +func botReset() error { + p := telegram.ConfigPath(ZotHome()) + if _, err := os.Stat(p); os.IsNotExist(err) { + fmt.Println("no bot config to reset") + return nil + } + if err := os.Remove(p); err != nil { + return err + } + fmt.Println("removed", p) + return nil +} + +// botStart launches `zot telegram-bot run` as a detached child process, writes +// its pid to $ZOT_HOME/bot.pid, and returns immediately. Stdout/stderr +// of the child are redirected to $ZOT_HOME/logs/bot.log. +func botStart(rawTail []string) error { + // Refuse to start if another bot is already running. + if pid, alive, _ := telegram.IsRunning(ZotHome()); alive { + return fmt.Errorf("bot is already running (pid %d); use `zot telegram-bot stop` first", pid) + } + _ = telegram.RemovePID(ZotHome()) // clear any stale pid file + + cfg, err := telegram.LoadConfig(ZotHome()) + if err != nil { + return err + } + if cfg.BotToken == "" { + return fmt.Errorf("no bot token configured — run `zot telegram-bot setup` first") + } + + self, err := os.Executable() + if err != nil { + return fmt.Errorf("locate zot binary: %w", err) + } + + logPath := telegram.LogPath(ZotHome()) + if err := os.MkdirAll(filepath.Dir(logPath), 0o755); err != nil { + return err + } + logFile, err := os.OpenFile(logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("open log file: %w", err) + } + defer logFile.Close() + + // Refuse to start from a `go run` temp binary: Go deletes the + // binary when `go run` exits, which breaks the detached child. + // Users hit cryptic tls / exec errors on that path; fail clearly. + if strings.Contains(self, string(os.PathSeparator)+"go-build") || + strings.Contains(self, string(os.PathSeparator)+"go-tmp") { + return fmt.Errorf("detected `go run` temp binary at %s — run `make install` (or copy ./bin/zot to your PATH) and use the installed binary for `start`", self) + } + + // Child argv: same flags the user passed to `zot telegram-bot start`, + // mapped to `zot telegram-bot run`. Preserves --provider, --model, --cwd, etc. + args := append([]string{"telegram-bot", "run"}, rawTail...) + cmd := exec.Command(self, args...) + cmd.Stdout = logFile + cmd.Stderr = logFile + cmd.Stdin = nil + // Detach: new session so terminal signals don't reach the child. + cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} + + if err := cmd.Start(); err != nil { + return fmt.Errorf("spawn: %w", err) + } + if err := telegram.WritePID(ZotHome(), cmd.Process.Pid); err != nil { + _ = cmd.Process.Kill() + return fmt.Errorf("write pid: %w", err) + } + // Don't wait() — detach. OS will reparent the child to init when we exit. + go func() { _ = cmd.Process.Release() }() + + fmt.Printf("started zot telegram-bot as pid %d (logs: %s)\n", cmd.Process.Pid, logPath) + fmt.Println("use `zot telegram-bot logs -f` to tail, `zot telegram-bot stop` to stop.") + return nil +} + +// botStop sends SIGTERM to the running bot (SIGKILL if it doesn't +// exit within 5s) and cleans up the pid file. +func botStop() error { + pid, alive, err := telegram.IsRunning(ZotHome()) + if err != nil { + return err + } + if !alive { + if pid > 0 { + _ = telegram.RemovePID(ZotHome()) + fmt.Printf("no live process; cleared stale pid %d\n", pid) + return nil + } + fmt.Println("bot is not running") + return nil + } + if err := telegram.StopProcess(pid, 5*time.Second); err != nil { + return fmt.Errorf("stop pid %d: %w", pid, err) + } + _ = telegram.RemovePID(ZotHome()) + fmt.Printf("stopped pid %d\n", pid) + return nil +} + +// botLogs prints (or tails with --follow) the bot log file. +func botLogs(rawTail []string) error { + follow := false + for _, a := range rawTail { + if a == "-f" || a == "--follow" { + follow = true + } + } + p := telegram.LogPath(ZotHome()) + f, err := os.Open(p) + if errors.Is(err, os.ErrNotExist) { + fmt.Println("no log file at", p) + return nil + } + if err != nil { + return err + } + defer f.Close() + + if _, err := io.Copy(os.Stdout, f); err != nil { + return err + } + if !follow { + return nil + } + + // Naive tail -f: poll for new bytes until ctrl+c. + sigc := make(chan os.Signal, 1) + signal.Notify(sigc, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigc) + for { + select { + case <-sigc: + return nil + case <-time.After(500 * time.Millisecond): + if _, err := io.Copy(os.Stdout, f); err != nil { + return err + } + } + } +} + +// botRun starts the polling loop in the foreground. Ctrl+C stops it. +func botRun(rawTail []string, version string) error { + // Parse only a small subset of flags relevant to bot run. We reuse + // the main args parser so --provider/--model/--cwd/--api-key/--reasoning + // behave the same as in the tui. + args, err := ParseArgs(rawTail) + if err != nil { + return err + } + + // Bot mode always requires credentials (can't pop a /login dialog). + resolved, err := Resolve(args, true) + if err != nil { + return err + } + + cfg, err := telegram.LoadConfig(ZotHome()) + if err != nil { + return err + } + if cfg.BotToken == "" { + return fmt.Errorf("no bot token configured — run `zot telegram-bot setup` first") + } + + agent := resolved.NewAgent() + + // Session: optional, same model as the tui. Persist so DMs build on + // prior context. --no-session disables. + var sess *core.Session + if !args.NoSess { + s, _, serr := openOrCreateSessionForBot(args, resolved, agent, version) + if serr == nil { + sess = s + defer sess.Close() + } else { + fmt.Fprintln(os.Stderr, "session:", serr) + } + } + + b := &telegram.Bot{ + Client: telegram.NewClient(cfg.BotToken), + Agent: agent, + Config: cfg, + ZotHome: ZotHome(), + Save: func(c telegram.Config) error { + return telegram.SaveConfig(ZotHome(), c) + }, + RefreshCreds: func() error { + // Re-run the same resolver the tui uses so we pick up + // refreshed oauth tokens, re-logins, and model switches. + // Only the provider client is swapped — tools, sandbox, + // system prompt, and transcript stay with the existing agent. + next, err := Resolve(args, true) + if err != nil { + return err + } + agent.Client = next.NewClient() + agent.Model = next.Model + return nil + }, + } + + // Record our pid so `zot telegram-bot status` / `zot telegram-bot stop` can find us, + // regardless of whether we were started directly or via `bot start`. + _ = telegram.WritePID(ZotHome(), os.Getpid()) + defer telegram.RemovePID(ZotHome()) + + // Translate sigterm/sigint into a context cancel so the bot's goroutines + // and the currently-running turn wind down cleanly. + ctx, cancel := context.WithCancel(context.Background()) + sigc := make(chan os.Signal, 1) + signal.Notify(sigc, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigc + cancel() + }() + defer cancel() + return b.Run(ctx) +} + +// openOrCreateSessionForBot reuses the same logic as interactive mode +// but never prompts (no TTY picker); falls back to latest or new. +func openOrCreateSessionForBot(args Args, r Resolved, ag *core.Agent, version string) (*core.Session, []any, error) { + if args.Continue { + if latest := core.LatestSession(ZotHome(), args.CWD); latest != "" { + s, msgs, err := core.OpenSession(latest) + if err != nil { + return nil, nil, err + } + ag.SetMessages(msgs) + return s, nil, nil + } + } + s, err := core.NewSession(ZotHome(), args.CWD, r.Provider, r.Model, version) + return s, nil, err +} + +// maskToken returns "123456:ABC…xyz" so copies of zot telegram-bot status can be +// pasted into bug reports without leaking the full token. +func maskToken(tok string) string { + if len(tok) <= 10 { + return "" + } + // telegram tokens look like "123456789:ABCD..." — keep the id, mask the body. + i := strings.IndexByte(tok, ':') + if i < 0 { + return tok[:4] + "…" + tok[len(tok)-4:] + } + body := tok[i+1:] + if len(body) < 8 { + return tok[:i+1] + "" + } + return tok[:i+1] + body[:3] + "…" + body[len(body)-3:] +} + +// _ compile-time hint so the strconv import stays if we later add numeric parsing. +var _ = strconv.Itoa diff --git a/internal/agent/cli.go b/internal/agent/cli.go index f724b48..38cbe02 100644 --- a/internal/agent/cli.go +++ b/internal/agent/cli.go @@ -17,6 +17,12 @@ import ( // Run is the top-level entrypoint for the zot binary. func Run(rawArgs []string, version string) error { + // Subcommand router: `zot bot ...` is handled separately so the + // generic flag parser doesn't reject "bot" as a positional arg. + if handled, err := runBotCommand(rawArgs, version); handled { + return err + } + args, err := ParseArgs(rawArgs) if err != nil { PrintHelp(version) diff --git a/internal/agent/config.go b/internal/agent/config.go index 8e69f59..a36b754 100644 --- a/internal/agent/config.go +++ b/internal/agent/config.go @@ -2,12 +2,14 @@ package agent import ( + "context" "encoding/json" "errors" "fmt" "os" "path/filepath" "runtime" + "time" "github.com/patriceckhart/zot/internal/auth" ) @@ -131,15 +133,68 @@ func ResolveCredentialFull(provider, explicit string) (cred, method, accountID s return c.Anthropic.APIKey, "apikey", "", nil } if c.Anthropic.OAuth != nil && c.Anthropic.OAuth.AccessToken != "" { - return c.Anthropic.OAuth.AccessToken, "oauth", "", nil + tok, _ := refreshIfExpired("anthropic", c.Anthropic.OAuth) + return tok.AccessToken, "oauth", "", nil } case "openai": if c.OpenAI.APIKey != "" { return c.OpenAI.APIKey, "apikey", "", nil } if c.OpenAI.OAuth != nil && c.OpenAI.OAuth.AccessToken != "" { - return c.OpenAI.OAuth.AccessToken, "oauth", c.OpenAI.OAuth.AccountID, nil + tok, _ := refreshIfExpired("openai", c.OpenAI.OAuth) + return tok.AccessToken, "oauth", tok.AccountID, nil } } return "", "", "", fmt.Errorf("no credential for %s", provider) } + +// refreshIfExpired returns a usable OAuth token for the given provider, +// refreshing it synchronously when it's past (or near) expiry. The +// refreshed token is persisted to auth.json. +// +// Failures return the original token unchanged — the caller then makes +// a request with the stale access_token, which will 401. That's still +// better than crashing at credential-resolution time. +func refreshIfExpired(providerName string, tok *auth.OAuthToken) (*auth.OAuthToken, error) { + if tok == nil { + return &auth.OAuthToken{}, fmt.Errorf("nil token") + } + if !tok.Expired() { + return tok, nil + } + if tok.RefreshToken == "" { + return tok, fmt.Errorf("%s oauth token expired and no refresh_token available — run /login again", providerName) + } + + var op auth.OAuthProvider + switch providerName { + case "anthropic": + op = auth.AnthropicOAuth + case "openai": + op = auth.OpenAIOAuth + default: + return tok, fmt.Errorf("unknown provider %q", providerName) + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + next, err := op.Refresh(ctx, tok.RefreshToken) + if err != nil { + return tok, fmt.Errorf("refresh %s: %w", providerName, err) + } + // Preserve the refresh token if the server omitted it (Anthropic often does). + if next.RefreshToken == "" { + next.RefreshToken = tok.RefreshToken + } + // Carry over account id (openai) / id_token across refreshes. + if next.AccountID == "" { + next.AccountID = tok.AccountID + } + if next.IDToken == "" { + next.IDToken = tok.IDToken + } + if err := AuthStoreFor().SetOAuth(providerName, *next); err != nil { + return next, fmt.Errorf("persist refreshed token: %w", err) + } + return next, nil +} diff --git a/internal/agent/modes/model_dialog.go b/internal/agent/modes/model_dialog.go index 6804111..fb3004e 100644 --- a/internal/agent/modes/model_dialog.go +++ b/internal/agent/modes/model_dialog.go @@ -2,6 +2,8 @@ package modes import ( "fmt" + "sort" + "strings" "github.com/patriceckhart/zot/internal/provider" "github.com/patriceckhart/zot/internal/tui" @@ -9,13 +11,17 @@ import ( // modelDialog is an inline picker for choosing the active model. // It lists all models known to the provider package (baked-in catalog -// + any live entries discovered via /v1/models) and lets the user pick -// one with arrow keys + enter. +// + any live entries discovered via /v1/models) sorted by provider +// then model id, and lets the user pick one with arrow keys + enter. +// Typing characters narrows the list via a fuzzy substring match that +// ignores punctuation (e.g. "opus46" matches "claude-opus-4-6"). type modelDialog struct { active bool - models []provider.Model + all []provider.Model // full catalog, sorted + view []provider.Model // filtered view shown to the user cursor int current string // currently selected model id (highlighted) + query string // live filter text typed by the user } // modelDialogAction is returned by HandleKey. @@ -34,15 +40,10 @@ func newModelDialog() *modelDialog { // it can be pre-selected. func (d *modelDialog) Open(current string) { d.active = true - d.models = provider.Active() + d.all = sortedModels(provider.Active()) d.current = current - d.cursor = 0 - for i, m := range d.models { - if m.ID == current { - d.cursor = i - break - } - } + d.query = "" + d.refilter() } // Close hides the dialog. @@ -51,6 +52,59 @@ func (d *modelDialog) Close() { d.active = false } // Active reports whether the dialog is visible and consumes input. func (d *modelDialog) Active() bool { return d != nil && d.active } +// refilter rebuilds view from all according to query, and snaps the +// cursor to either the current model (if visible) or the first row. +func (d *modelDialog) refilter() { + needle := normalizeModelQuery(d.query) + if needle == "" { + d.view = append([]provider.Model(nil), d.all...) + } else { + out := make([]provider.Model, 0, len(d.all)) + for _, m := range d.all { + if strings.Contains(normalizeModelQuery(m.Provider+" "+m.ID+" "+m.DisplayName), needle) { + out = append(out, m) + } + } + d.view = out + } + d.cursor = 0 + for i, m := range d.view { + if m.ID == d.current { + d.cursor = i + break + } + } +} + +// sortedModels returns a fresh slice sorted by provider, then model id. +func sortedModels(in []provider.Model) []provider.Model { + out := append([]provider.Model(nil), in...) + sort.SliceStable(out, func(i, j int) bool { + if out[i].Provider != out[j].Provider { + return out[i].Provider < out[j].Provider + } + return out[i].ID < out[j].ID + }) + return out +} + +// normalizeModelQuery lowercases and strips punctuation so fuzzy +// substring matching works on both the query and haystacks. "opus46" +// and "opus-4-6" both become "opus46". +func normalizeModelQuery(s string) string { + var sb strings.Builder + sb.Grow(len(s)) + for _, r := range s { + switch { + case r >= 'a' && r <= 'z', r >= '0' && r <= '9': + sb.WriteRune(r) + case r >= 'A' && r <= 'Z': + sb.WriteRune(r + ('a' - 'A')) + } + } + return sb.String() +} + // Render returns the dialog lines. func (d *modelDialog) Render(th tui.Theme, width int) []string { if !d.Active() { @@ -58,12 +112,28 @@ func (d *modelDialog) Render(th tui.Theme, width int) []string { } var lines []string lines = append(lines, frameHeader(th, "model", width)) - lines = append(lines, th.FG256(th.Muted, "pick a model (↑/↓, enter, esc to cancel)")) - // Simple scroll window so very tall catalogs still fit in a short tui. + hint := "pick a model (↑/↓, enter, esc to cancel)" + if d.query != "" { + hint = fmt.Sprintf("filter: %s (%d match)", d.query, len(d.view)) + if len(d.view) != 1 { + hint = fmt.Sprintf("filter: %s (%d matches)", d.query, len(d.view)) + } + } else { + hint += " · type to filter" + } + lines = append(lines, th.FG256(th.Muted, hint)) + + if len(d.view) == 0 { + lines = append(lines, th.FG256(th.Muted, " no models match "+fmt.Sprintf("%q", d.query))) + lines = append(lines, frameRule(th, width)) + return lines + } + + // Scroll window so very tall catalogs still fit in a short tui. const visible = 14 start := 0 - end := len(d.models) + end := len(d.view) if end > visible { start = d.cursor - visible/2 if start < 0 { @@ -76,7 +146,7 @@ func (d *modelDialog) Render(th tui.Theme, width int) []string { } for i := start; i < end; i++ { - m := d.models[i] + m := d.view[i] prov := m.Provider id := m.ID reason := " " @@ -106,8 +176,8 @@ func (d *modelDialog) Render(th tui.Theme, width int) []string { if start > 0 { lines = append(lines, th.FG256(th.Muted, fmt.Sprintf(" … %d more above", start))) } - if end < len(d.models) { - lines = append(lines, th.FG256(th.Muted, fmt.Sprintf(" … %d more below", len(d.models)-end))) + if end < len(d.view) { + lines = append(lines, th.FG256(th.Muted, fmt.Sprintf(" … %d more below", len(d.view)-end))) } lines = append(lines, frameRule(th, width)) @@ -122,18 +192,34 @@ func (d *modelDialog) HandleKey(k tui.Key) modelDialogAction { d.cursor-- } case tui.KeyDown: - if d.cursor < len(d.models)-1 { + if d.cursor < len(d.view)-1 { d.cursor++ } + case tui.KeyBackspace: + if len(d.query) > 0 { + // Drop one rune from the query. + r := []rune(d.query) + d.query = string(r[:len(r)-1]) + d.refilter() + } + case tui.KeyRune: + if k.Alt || k.Ctrl { + break + } + // Only printable ASCII is useful for narrowing. + if k.Rune >= 0x20 && k.Rune < 0x7f { + d.query += string(k.Rune) + d.refilter() + } case tui.KeyEsc: d.Close() return modelDialogAction{Close: true} case tui.KeyEnter: - if len(d.models) == 0 { + if len(d.view) == 0 { d.Close() return modelDialogAction{Close: true} } - m := d.models[d.cursor] + m := d.view[d.cursor] d.Close() return modelDialogAction{Select: true, Provider: m.Provider, Model: m.ID} } diff --git a/internal/agent/modes/telegram/api.go b/internal/agent/modes/telegram/api.go new file mode 100644 index 0000000..5be4fee --- /dev/null +++ b/internal/agent/modes/telegram/api.go @@ -0,0 +1,264 @@ +package telegram + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "mime/multipart" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +// Telegram Bot API types used by the bridge. Only the subset we need. + +type User struct { + ID int64 `json:"id"` + IsBot bool `json:"is_bot"` + FirstName string `json:"first_name"` + Username string `json:"username"` +} + +type Chat struct { + ID int64 `json:"id"` + Type string `json:"type"` // "private" | "group" | ... +} + +type PhotoSize struct { + FileID string `json:"file_id"` + FileSize int `json:"file_size,omitempty"` + Width int `json:"width,omitempty"` + Height int `json:"height,omitempty"` +} + +type Document struct { + FileID string `json:"file_id"` + FileName string `json:"file_name,omitempty"` + MimeType string `json:"mime_type,omitempty"` + FileSize int `json:"file_size,omitempty"` +} + +type Message struct { + MessageID int `json:"message_id"` + From *User `json:"from"` + Chat Chat `json:"chat"` + Date int64 `json:"date"` + Text string `json:"text,omitempty"` + Caption string `json:"caption,omitempty"` + Photo []PhotoSize `json:"photo,omitempty"` + Document *Document `json:"document,omitempty"` +} + +type Update struct { + UpdateID int64 `json:"update_id"` + Message *Message `json:"message"` + Edited *Message `json:"edited_message"` +} + +type File struct { + FileID string `json:"file_id"` + FilePath string `json:"file_path"` + FileSize int `json:"file_size,omitempty"` +} + +// apiResponse is Telegram's envelope. +type apiResponse[T any] struct { + OK bool `json:"ok"` + Description string `json:"description"` + Result T `json:"result"` +} + +// Client is a minimal Telegram Bot API client. +type Client struct { + token string + http *http.Client +} + +func NewClient(token string) *Client { + return &Client{token: token, http: &http.Client{Timeout: 0}} +} + +func (c *Client) baseURL() string { return "https://api.telegram.org/bot" + c.token } + +// GetMe verifies the token and returns the bot's own User. +func (c *Client) GetMe(ctx context.Context) (*User, error) { + var resp apiResponse[User] + if err := c.call(ctx, "getMe", nil, &resp); err != nil { + return nil, err + } + if !resp.OK { + return nil, fmt.Errorf("getMe: %s", resp.Description) + } + return &resp.Result, nil +} + +// GetUpdates polls for new updates since offset with a long-poll timeout. +func (c *Client) GetUpdates(ctx context.Context, offset int64, timeoutSec int) ([]Update, error) { + q := url.Values{} + if offset != 0 { + q.Set("offset", strconv.FormatInt(offset, 10)) + } + q.Set("timeout", strconv.Itoa(timeoutSec)) + q.Set("allowed_updates", `["message","edited_message"]`) + + var resp apiResponse[[]Update] + if err := c.call(ctx, "getUpdates?"+q.Encode(), nil, &resp); err != nil { + return nil, err + } + if !resp.OK { + return nil, fmt.Errorf("getUpdates: %s", resp.Description) + } + return resp.Result, nil +} + +// SendMessage sends a plain-text reply. +func (c *Client) SendMessage(ctx context.Context, chatID int64, text string, replyTo int) error { + body := map[string]any{ + "chat_id": chatID, + "text": text, + } + if replyTo > 0 { + body["reply_to_message_id"] = replyTo + } + var resp apiResponse[json.RawMessage] + if err := c.call(ctx, "sendMessage", body, &resp); err != nil { + return err + } + if !resp.OK { + return fmt.Errorf("sendMessage: %s", resp.Description) + } + return nil +} + +// SendChatAction keeps the "typing..." indicator alive. Call every ~4s. +func (c *Client) SendChatAction(ctx context.Context, chatID int64, action string) error { + body := map[string]any{"chat_id": chatID, "action": action} + var resp apiResponse[json.RawMessage] + _ = c.call(ctx, "sendChatAction", body, &resp) // ignore errors; it's advisory + return nil +} + +// SendDocument uploads a local file as a document attachment. +func (c *Client) SendDocument(ctx context.Context, chatID int64, path, caption string) error { + f, err := openFile(path) + if err != nil { + return err + } + defer f.Close() + + var buf bytes.Buffer + w := multipart.NewWriter(&buf) + _ = w.WriteField("chat_id", strconv.FormatInt(chatID, 10)) + if caption != "" { + _ = w.WriteField("caption", caption) + } + part, err := w.CreateFormFile("document", lastPathElem(path)) + if err != nil { + return err + } + if _, err := io.Copy(part, f); err != nil { + return err + } + w.Close() + + req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL()+"/sendDocument", &buf) + if err != nil { + return err + } + req.Header.Set("content-type", w.FormDataContentType()) + resp, err := c.http.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("sendDocument http %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + return nil +} + +// GetFile resolves a file_id to a downloadable path. +func (c *Client) GetFile(ctx context.Context, fileID string) (*File, error) { + q := url.Values{} + q.Set("file_id", fileID) + var resp apiResponse[File] + if err := c.call(ctx, "getFile?"+q.Encode(), nil, &resp); err != nil { + return nil, err + } + if !resp.OK { + return nil, fmt.Errorf("getFile: %s", resp.Description) + } + return &resp.Result, nil +} + +// DownloadFile downloads the file at filePath (from GetFile) into memory. +func (c *Client) DownloadFile(ctx context.Context, filePath string) ([]byte, error) { + u := "https://api.telegram.org/file/bot" + c.token + "/" + filePath + req, err := http.NewRequestWithContext(ctx, "GET", u, nil) + if err != nil { + return nil, err + } + resp, err := c.http.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return nil, fmt.Errorf("download %s: http %d", filePath, resp.StatusCode) + } + return io.ReadAll(resp.Body) +} + +// call issues a GET or POST request depending on whether body is nil. +func (c *Client) call(ctx context.Context, endpoint string, body map[string]any, out any) error { + var req *http.Request + var err error + if body == nil { + req, err = http.NewRequestWithContext(ctx, "GET", c.baseURL()+"/"+endpoint, nil) + } else { + b, _ := json.Marshal(body) + req, err = http.NewRequestWithContext(ctx, "POST", c.baseURL()+"/"+endpoint, bytes.NewReader(b)) + if err == nil { + req.Header.Set("content-type", "application/json") + } + } + if err != nil { + return err + } + resp, err := c.http.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + // Telegram returns 200 with ok=false for logical errors; read either way. + respBody, _ := io.ReadAll(resp.Body) + if len(respBody) == 0 { + return fmt.Errorf("%s: empty response (status %d)", endpoint, resp.StatusCode) + } + return json.Unmarshal(respBody, out) +} + +// small helpers kept here so api.go has no other deps. +func lastPathElem(p string) string { + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '/' || p[i] == '\\' { + return p[i+1:] + } + } + return p +} + +func openFile(path string) (io.ReadCloser, error) { + return osOpen(path) +} + +// overridden in tests. +var osOpen = defaultOpen + +// Sleep is a package-level hook so tests can stub it. +var sleep = func(d time.Duration) { time.Sleep(d) } diff --git a/internal/agent/modes/telegram/bot.go b/internal/agent/modes/telegram/bot.go new file mode 100644 index 0000000..7393e8a --- /dev/null +++ b/internal/agent/modes/telegram/bot.go @@ -0,0 +1,407 @@ +package telegram + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + "github.com/patriceckhart/zot/internal/core" + "github.com/patriceckhart/zot/internal/provider" +) + +// Bot owns the Telegram polling loop and dispatches inbound DMs to +// the agent. It is a long-running goroutine; Run blocks until ctx +// cancels. +type Bot struct { + Client *Client + Agent *core.Agent + Config Config + ZotHome string + // Save persists cfg to bot.json. Called whenever the bot pairs + // with a new allowed user or advances LastUpdateID. + Save func(Config) error + // RefreshCreds is called before every turn to pick up newly + // refreshed OAuth tokens. Optional; when nil, the bot uses the + // credential it was built with. Implementations typically call + // agent.ResolveCredentialFull which auto-refreshes expired tokens. + RefreshCreds func() error + + mu sync.Mutex + busy bool + activeCtx context.CancelFunc + queue []queuedTurn +} + +// queuedTurn is an inbound DM waiting to become a prompt. +type queuedTurn struct { + chatID int64 + messageID int + prompt string + images []provider.ImageBlock +} + +// Run drives the bot. Returns when ctx is cancelled or GetMe fails. +func (b *Bot) Run(ctx context.Context) error { + if b.Config.BotToken == "" { + return fmt.Errorf("no bot token configured; run `zot bot setup` first") + } + me, err := b.Client.GetMe(ctx) + if err != nil { + return fmt.Errorf("getMe: %w", err) + } + // Keep the stored username/id in sync with the actual bot. + if b.Config.BotID != me.ID || b.Config.BotUsername != me.Username { + b.Config.BotID = me.ID + b.Config.BotUsername = me.Username + _ = b.Save(b.Config) + } + + fmt.Printf("telegram bridge online as @%s (id=%d)\n", me.Username, me.ID) + if b.Config.AllowedUserID == 0 { + fmt.Println("no user paired yet — send /start to the bot from Telegram to claim it") + } else { + fmt.Printf("paired with telegram user id %d\n", b.Config.AllowedUserID) + } + + return b.pollLoop(ctx) +} + +// pollLoop long-polls Telegram for updates and dispatches them. +func (b *Bot) pollLoop(ctx context.Context) error { + backoff := time.Second + for { + if err := ctx.Err(); err != nil { + return err + } + updates, err := b.Client.GetUpdates(ctx, b.Config.LastUpdateID+1, 30) + if err != nil { + if ctx.Err() != nil { + return ctx.Err() + } + fmt.Fprintln(stderr(), "telegram: getUpdates error:", err) + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): + } + if backoff < 30*time.Second { + backoff *= 2 + } + continue + } + backoff = time.Second + for _, u := range updates { + if err := b.handleUpdate(ctx, u); err != nil { + fmt.Fprintln(stderr(), "telegram: handleUpdate:", err) + } + b.Config.LastUpdateID = u.UpdateID + _ = b.Save(b.Config) + } + } +} + +// handleUpdate processes a single Telegram update. +func (b *Bot) handleUpdate(ctx context.Context, u Update) error { + msg := u.Message + if msg == nil { + msg = u.Edited + } + if msg == nil || msg.From == nil || msg.From.IsBot { + return nil + } + if msg.Chat.Type != "private" { + return nil + } + + // Pairing: first user who sends /start claims the bridge. + text := strings.TrimSpace(msg.Text) + if b.Config.AllowedUserID == 0 { + if strings.HasPrefix(text, "/start") { + b.Config.AllowedUserID = msg.From.ID + _ = b.Save(b.Config) + _ = b.Client.SendMessage(ctx, msg.Chat.ID, + fmt.Sprintf("paired with @%s. send any message and i'll forward it to zot.", msg.From.Username), + msg.MessageID) + return nil + } + _ = b.Client.SendMessage(ctx, msg.Chat.ID, + "this bot isn't paired yet. send /start to claim it.", + msg.MessageID) + return nil + } + + // Enforce allowed user. + if msg.From.ID != b.Config.AllowedUserID { + _ = b.Client.SendMessage(ctx, msg.Chat.ID, + "this bot is paired with a different user.", + msg.MessageID) + return nil + } + + // Built-in commands that bypass the agent. + switch text { + case "/start", "/help": + _ = b.Client.SendMessage(ctx, msg.Chat.ID, + "send me any message and i'll forward it to zot. attach an image and i'll pass it to the model. commands: /status, /stop.", + msg.MessageID) + return nil + case "/status": + return b.sendStatus(ctx, msg.Chat.ID, msg.MessageID) + case "/stop": + b.mu.Lock() + cancel := b.activeCtx + b.mu.Unlock() + if cancel != nil { + cancel() + _ = b.Client.SendMessage(ctx, msg.Chat.ID, "cancelled the current turn.", msg.MessageID) + } else { + _ = b.Client.SendMessage(ctx, msg.Chat.ID, "nothing running.", msg.MessageID) + } + return nil + } + + // Build the prompt: combine text + caption; download image attachments. + prompt := strings.TrimSpace(msg.Text) + if msg.Caption != "" { + if prompt != "" { + prompt += "\n" + } + prompt += msg.Caption + } + + var images []provider.ImageBlock + if len(msg.Photo) > 0 { + // Photos arrive in multiple sizes; take the largest (last in the slice). + largest := msg.Photo[len(msg.Photo)-1] + if data, mime, err := b.download(ctx, largest.FileID, ""); err == nil { + images = append(images, provider.ImageBlock{MimeType: mime, Data: data}) + } else { + fmt.Fprintln(stderr(), "telegram: download photo:", err) + } + } + if msg.Document != nil && isImageMIME(msg.Document.MimeType) { + if data, mime, err := b.download(ctx, msg.Document.FileID, msg.Document.MimeType); err == nil { + images = append(images, provider.ImageBlock{MimeType: mime, Data: data}) + } + } + + if prompt == "" && len(images) == 0 { + return nil + } + + b.mu.Lock() + b.queue = append(b.queue, queuedTurn{ + chatID: msg.Chat.ID, + messageID: msg.MessageID, + prompt: prompt, + images: images, + }) + idle := !b.busy + b.mu.Unlock() + + if idle { + go b.drainQueue(ctx) + } + return nil +} + +// drainQueue runs queued turns one at a time until the queue is empty. +func (b *Bot) drainQueue(parent context.Context) { + for { + b.mu.Lock() + if len(b.queue) == 0 { + b.busy = false + b.activeCtx = nil + b.mu.Unlock() + return + } + t := b.queue[0] + b.queue = b.queue[1:] + b.busy = true + turnCtx, cancel := context.WithCancel(parent) + b.activeCtx = cancel + b.mu.Unlock() + + if b.RefreshCreds != nil { + if err := b.RefreshCreds(); err != nil { + fmt.Fprintln(stderr(), "telegram: refresh creds:", err) + } + } + b.runTurn(turnCtx, t) + cancel() + } +} + +// runTurn sends the queued prompt to the agent and streams the reply. +func (b *Bot) runTurn(ctx context.Context, t queuedTurn) { + stopTyping := b.startTyping(ctx, t.chatID) + defer stopTyping() + + var replyBuilder strings.Builder + var lastAssistantText string + var turnErr error + + sink := func(ev core.AgentEvent) { + switch e := ev.(type) { + case core.EvTextDelta: + replyBuilder.WriteString(e.Delta) + case core.EvAssistantMessage: + var sb strings.Builder + for _, c := range e.Message.Content { + if tb, ok := c.(provider.TextBlock); ok { + if sb.Len() > 0 { + sb.WriteString("\n") + } + sb.WriteString(tb.Text) + } + } + if sb.Len() > 0 { + lastAssistantText = sb.String() + } + replyBuilder.Reset() + case core.EvTurnEnd: + if e.Err != nil { + turnErr = e.Err + } + } + } + + if err := b.Agent.Prompt(ctx, t.prompt, t.images, sink); err != nil { + turnErr = err + } + + reply := strings.TrimSpace(lastAssistantText) + if reply == "" { + reply = strings.TrimSpace(replyBuilder.String()) + } + if turnErr != nil && ctx.Err() == nil { + reply = "error: " + turnErr.Error() + } + if reply == "" { + reply = "(no reply)" + } + // Telegram caps messages at 4096 chars. Chunk to be safe. + for _, chunk := range chunkMessage(reply, 4000) { + if err := b.Client.SendMessage(context.Background(), t.chatID, chunk, 0); err != nil { + fmt.Fprintln(stderr(), "telegram: sendMessage:", err) + break + } + } +} + +// startTyping keeps Telegram's "typing..." indicator alive until the +// returned stop function is called. +func (b *Bot) startTyping(ctx context.Context, chatID int64) func() { + tctx, cancel := context.WithCancel(ctx) + go func() { + for { + _ = b.Client.SendChatAction(tctx, chatID, "typing") + select { + case <-tctx.Done(): + return + case <-time.After(4 * time.Second): + } + } + }() + return cancel +} + +// sendStatus describes agent state to the Telegram user. +func (b *Bot) sendStatus(ctx context.Context, chatID int64, replyTo int) error { + b.mu.Lock() + busy := b.busy + queued := len(b.queue) + b.mu.Unlock() + + state := "idle" + if busy { + state = "working" + } + lines := []string{ + fmt.Sprintf("state: %s", state), + fmt.Sprintf("queued: %d", queued), + fmt.Sprintf("model: %s", b.Agent.Model), + } + cost := b.Agent.Cost() + lines = append(lines, fmt.Sprintf("cost: $%.4f (%d in / %d out)", + cost.CostUSD, cost.InputTokens, cost.OutputTokens)) + return b.Client.SendMessage(ctx, chatID, strings.Join(lines, "\n"), replyTo) +} + +// download fetches a file from Telegram and returns bytes + mime. +func (b *Bot) download(ctx context.Context, fileID, mime string) ([]byte, string, error) { + f, err := b.Client.GetFile(ctx, fileID) + if err != nil { + return nil, "", err + } + data, err := b.Client.DownloadFile(ctx, f.FilePath) + if err != nil { + return nil, "", err + } + if mime == "" { + mime = guessImageMIME(f.FilePath) + } + return data, mime, nil +} + +// chunkMessage splits s into chunks no larger than limit runes, on line +// boundaries when possible. +func chunkMessage(s string, limit int) []string { + if len(s) <= limit { + return []string{s} + } + var out []string + lines := strings.Split(s, "\n") + var cur strings.Builder + for _, l := range lines { + if cur.Len()+len(l)+1 > limit && cur.Len() > 0 { + out = append(out, cur.String()) + cur.Reset() + } + if len(l) > limit { + // Line itself too long; hard-split. + for len(l) > limit { + out = append(out, l[:limit]) + l = l[limit:] + } + } + if cur.Len() > 0 { + cur.WriteString("\n") + } + cur.WriteString(l) + } + if cur.Len() > 0 { + out = append(out, cur.String()) + } + return out +} + +// isImageMIME returns true for MIME types the model can probably ingest +// as a vision input. +func isImageMIME(m string) bool { + switch strings.ToLower(m) { + case "image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp": + return true + } + return false +} + +// guessImageMIME infers a mime type from a filename suffix. Falls back +// to image/png because telegram photos are always re-encoded to jpeg +// but getFile's file_path may omit the extension. +func guessImageMIME(path string) string { + lower := strings.ToLower(path) + switch { + case strings.HasSuffix(lower, ".png"): + return "image/png" + case strings.HasSuffix(lower, ".jpg"), strings.HasSuffix(lower, ".jpeg"): + return "image/jpeg" + case strings.HasSuffix(lower, ".gif"): + return "image/gif" + case strings.HasSuffix(lower, ".webp"): + return "image/webp" + } + return "image/jpeg" +} diff --git a/internal/agent/modes/telegram/config.go b/internal/agent/modes/telegram/config.go new file mode 100644 index 0000000..8fe5e83 --- /dev/null +++ b/internal/agent/modes/telegram/config.go @@ -0,0 +1,64 @@ +// Package telegram implements zot's Telegram bot bridge. +// +// It runs in-process, polling Telegram for DMs and forwarding them to +// a core.Agent. Responses stream back as Telegram messages. Images +// sent by the user are downloaded and attached to the next prompt as +// provider.ImageBlock, so vision-capable models see them the same way +// they would via drag-and-drop in the TUI. +// +// State (bot token, allowed user id) lives in $ZOT_HOME/bot.json. +package telegram + +import ( + "encoding/json" + "errors" + "os" + "path/filepath" +) + +// Config is the on-disk state for the telegram bridge. +type Config struct { + BotToken string `json:"bot_token,omitempty"` + BotUsername string `json:"bot_username,omitempty"` + BotID int64 `json:"bot_id,omitempty"` + AllowedUserID int64 `json:"allowed_user_id,omitempty"` + LastUpdateID int64 `json:"last_update_id,omitempty"` +} + +// ConfigPath returns the path to bot.json. +func ConfigPath(zotHome string) string { + return filepath.Join(zotHome, "bot.json") +} + +// LoadConfig reads bot.json, returning a zero Config if it doesn't exist. +func LoadConfig(zotHome string) (Config, error) { + var c Config + b, err := os.ReadFile(ConfigPath(zotHome)) + if errors.Is(err, os.ErrNotExist) { + return c, nil + } + if err != nil { + return c, err + } + if err := json.Unmarshal(b, &c); err != nil { + return c, err + } + return c, nil +} + +// SaveConfig writes bot.json atomically. +func SaveConfig(zotHome string, c Config) error { + if err := os.MkdirAll(zotHome, 0o755); err != nil { + return err + } + b, err := json.MarshalIndent(c, "", " ") + if err != nil { + return err + } + path := ConfigPath(zotHome) + tmp := path + ".tmp" + if err := os.WriteFile(tmp, b, 0o600); err != nil { + return err + } + return os.Rename(tmp, path) +} diff --git a/internal/agent/modes/telegram/daemon.go b/internal/agent/modes/telegram/daemon.go new file mode 100644 index 0000000..35f1529 --- /dev/null +++ b/internal/agent/modes/telegram/daemon.go @@ -0,0 +1,109 @@ +package telegram + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" +) + +// PIDPath returns the location of the bot's pid file. +func PIDPath(zotHome string) string { + return filepath.Join(zotHome, "bot.pid") +} + +// LogPath returns the location of the bot's log file (stdout+stderr +// from a detached `zot bot start`). +func LogPath(zotHome string) string { + return filepath.Join(zotHome, "logs", "bot.log") +} + +// WritePID persists pid to bot.pid. Overwrites any existing file. +func WritePID(zotHome string, pid int) error { + p := PIDPath(zotHome) + if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil { + return err + } + return os.WriteFile(p, []byte(strconv.Itoa(pid)+"\n"), 0o644) +} + +// ReadPID returns the pid stored in bot.pid, or 0 if the file doesn't +// exist. Returns an error for any other read/parse failure. +func ReadPID(zotHome string) (int, error) { + b, err := os.ReadFile(PIDPath(zotHome)) + if errors.Is(err, os.ErrNotExist) { + return 0, nil + } + if err != nil { + return 0, err + } + pid, err := strconv.Atoi(strings.TrimSpace(string(b))) + if err != nil { + return 0, fmt.Errorf("parse pid: %w", err) + } + return pid, nil +} + +// RemovePID deletes the pid file if it exists. +func RemovePID(zotHome string) error { + err := os.Remove(PIDPath(zotHome)) + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err +} + +// IsRunning returns (pid, true) if a live process with the recorded +// pid exists, or (pid, false) if the pid file points to a dead process. +// Stale pid files are left in place; the caller may remove them. +func IsRunning(zotHome string) (int, bool, error) { + pid, err := ReadPID(zotHome) + if err != nil { + return 0, false, err + } + if pid <= 0 { + return 0, false, nil + } + proc, err := os.FindProcess(pid) + if err != nil { + return pid, false, nil + } + // signal 0 is POSIX's "does the process exist?" probe. On Windows + // os.Process is always usable and Signal(0) returns nil, so we'd + // miss stale pids; acceptable for the macos/linux-first audience. + if err := proc.Signal(syscall.Signal(0)); err != nil { + if errors.Is(err, os.ErrProcessDone) || errors.Is(err, syscall.ESRCH) { + return pid, false, nil + } + // Other errors (EPERM) mean the process exists but we can't + // inspect it; treat as running. + return pid, true, nil + } + return pid, true, nil +} + +// StopProcess sends SIGTERM to pid and waits up to graceful for it to +// exit, then escalates to SIGKILL. Returns nil if the process is gone. +func StopProcess(pid int, graceful time.Duration) error { + proc, err := os.FindProcess(pid) + if err != nil { + return err + } + _ = proc.Signal(syscall.SIGTERM) + + deadline := time.Now().Add(graceful) + for time.Now().Before(deadline) { + if err := proc.Signal(syscall.Signal(0)); err != nil { + if errors.Is(err, os.ErrProcessDone) || errors.Is(err, syscall.ESRCH) { + return nil + } + } + time.Sleep(100 * time.Millisecond) + } + _ = proc.Kill() + return nil +} diff --git a/internal/agent/modes/telegram/fs.go b/internal/agent/modes/telegram/fs.go new file mode 100644 index 0000000..4a34fdf --- /dev/null +++ b/internal/agent/modes/telegram/fs.go @@ -0,0 +1,10 @@ +package telegram + +import ( + "io" + "os" +) + +func defaultOpen(path string) (io.ReadCloser, error) { + return os.Open(path) +} diff --git a/internal/agent/modes/telegram/io.go b/internal/agent/modes/telegram/io.go new file mode 100644 index 0000000..9873a2a --- /dev/null +++ b/internal/agent/modes/telegram/io.go @@ -0,0 +1,9 @@ +package telegram + +import ( + "io" + "os" +) + +// stderr is a tiny hook so tests can redirect bot logging. +var stderr = func() io.Writer { return os.Stderr }