From cead9fdff1342c0e6065ad422a2caeea756186ff Mon Sep 17 00:00:00 2001 From: patriceckhart Date: Tue, 28 Apr 2026 08:23:04 +0200 Subject: [PATCH] telegram: let zot send images and files back through the bridge The bridge already mirrored the assistant's text reply into the paired Telegram chat but had no way to push real attachments. A turn that came in over Telegram could only ever produce a textual description of an image, never the image itself. Add two model-facing tools, registered on the running agent only while the bridge is connected: - telegram_send_image(path, caption?) uploads a local image (png/jpg/gif/webp) as an inline Telegram photo. Telegram compresses for preview, which is what you usually want for a screenshot or chart. - telegram_send_file(path, caption?) uploads any local file as a document attachment with no compression. Use for non-images or when the recipient needs the original bytes. Plumbing: - Client.SendPhoto multipart upload mirrors SendDocument, hitting sendPhoto so Telegram renders the image inline. - Bridge.SendImage / SendDocument resolve the paired chat id and return a clear error when the bridge is not running or no user has paired yet. - A small TelegramSender interface in package tools keeps the tools package free of any telegram dependency; an adapter in interactive.go forwards to the live *telegram.Bridge. - applyTelegramTools mutates the running agent's tool registry on /telegram connect / disconnect, on /model swaps, and on login rebuilds. Walks the live registry rather than restoring from a snapshot so extension or /reload-ext additions survive a later disconnect; we only add or strip the two telegram entries. Both tools respect the sandbox, refuse non-image inputs in send_image, and reject directories. They return a one-line text result the model can use to confirm the upload ("sent /path/foo.png to telegram (1.2 MB)"). --- internal/agent/modes/interactive.go | 65 ++++++++++ internal/agent/modes/telegram/api.go | 42 +++++++ internal/agent/modes/telegram/bridge.go | 39 +++++- internal/agent/tools/telegram_send.go | 158 ++++++++++++++++++++++++ 4 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 internal/agent/tools/telegram_send.go diff --git a/internal/agent/modes/interactive.go b/internal/agent/modes/interactive.go index 6c7eb6c..ea60fcf 100644 --- a/internal/agent/modes/interactive.go +++ b/internal/agent/modes/interactive.go @@ -2502,6 +2502,11 @@ func (i *Interactive) applyModelSelection(prov, model string) { // identical messages will reuse the existing entries. Nothing // to invalidate. i.mu.Unlock() + // The new agent was built off the base tool registry, so any + // dynamically-registered tools (telegram_send_*) need to be + // reattached. applyTelegramTools is a no-op when the bridge is + // idle so the cross-provider path still works on a vanilla setup. + i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active()) if i.cfg.PersistModel != nil { i.cfg.PersistModel(p, md) } @@ -2529,6 +2534,7 @@ func (i *Interactive) handleAuthEvent(ev auth.Event) { i.statusErr = "" i.statusOK = "logged in to " + ev.Provider + " via " + ev.Method i.mu.Unlock() + i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active()) i.dialog.ShowResult(true, "") } } @@ -3091,6 +3097,7 @@ func (i *Interactive) telegramConnect() { i.invalidate() return } + i.applyTelegramTools(true) state := i.telegramBridge.State() label := "telegram connected" if state.Username != "" { @@ -3117,6 +3124,7 @@ func (i *Interactive) telegramDisconnect() { return } i.telegramBridge.Stop() + i.applyTelegramTools(false) i.mu.Lock() i.statusOK = "telegram disconnected" i.statusErr = "" @@ -3124,6 +3132,63 @@ func (i *Interactive) telegramDisconnect() { i.invalidate() } +// telegramSenderAdapter wraps the bridge so the tools package can +// drive it without importing telegram directly. The Active() check +// is forwarded to the bridge so the tool can fail clearly with a +// model-readable error when the user disconnected mid-turn. +type telegramSenderAdapter struct { + bridge *telegram.Bridge +} + +func (a telegramSenderAdapter) SendImage(ctx context.Context, path, caption string) error { + if a.bridge == nil { + return fmt.Errorf("telegram bridge is not connected") + } + return a.bridge.SendImage(ctx, path, caption) +} + +func (a telegramSenderAdapter) SendDocument(ctx context.Context, path, caption string) error { + if a.bridge == nil { + return fmt.Errorf("telegram bridge is not connected") + } + return a.bridge.SendDocument(ctx, path, caption) +} + +func (a telegramSenderAdapter) Active() bool { + return a.bridge != nil && a.bridge.Active() +} + +// applyTelegramTools registers (active=true) or removes (active=false) +// the telegram_send_image and telegram_send_file tools on the running +// agent so the model only sees them while the bridge is connected. +// Snapshots and mutates the live tool registry so any extension or +// /reload-ext additions made while Telegram is connected survive a +// later /telegram disconnect (we only add or strip the two telegram +// entries, never the rest). +func (i *Interactive) applyTelegramTools(active bool) { + if i.agent == nil { + return + } + current := i.agent.Tools + next := core.Registry{} + for name, t := range current { + if name == "telegram_send_image" || name == "telegram_send_file" { + continue + } + next[name] = t + } + if active { + sender := telegramSenderAdapter{bridge: i.telegramBridge} + next["telegram_send_image"] = &tools.TelegramSendImageTool{ + CWD: i.cfg.CWD, Sandbox: i.cfg.Sandbox, Sender: sender, + } + next["telegram_send_file"] = &tools.TelegramSendFileTool{ + CWD: i.cfg.CWD, Sandbox: i.cfg.Sandbox, Sender: sender, + } + } + i.agent.SetTools(next) +} + // telegramStatus writes a one-liner describing the bridge state. // Reports on both the in-tui bridge and the background daemon so // the user isn't confused when the daemon owns the poll loop. diff --git a/internal/agent/modes/telegram/api.go b/internal/agent/modes/telegram/api.go index 5be4fee..5612e25 100644 --- a/internal/agent/modes/telegram/api.go +++ b/internal/agent/modes/telegram/api.go @@ -142,6 +142,48 @@ func (c *Client) SendChatAction(ctx context.Context, chatID int64, action string return nil } +// SendPhoto uploads a local image file as a Telegram photo. Telegram +// re-encodes / scales photos for inline preview; use SendDocument +// when the recipient needs the original bytes. +func (c *Client) SendPhoto(ctx context.Context, chatID int64, path, caption string) error { + f, err := openFile(path) + if err != nil { + return err + } + defer f.Close() + + var buf bytes.Buffer + w := multipart.NewWriter(&buf) + _ = w.WriteField("chat_id", strconv.FormatInt(chatID, 10)) + if caption != "" { + _ = w.WriteField("caption", caption) + } + part, err := w.CreateFormFile("photo", lastPathElem(path)) + if err != nil { + return err + } + if _, err := io.Copy(part, f); err != nil { + return err + } + w.Close() + + req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL()+"/sendPhoto", &buf) + if err != nil { + return err + } + req.Header.Set("content-type", w.FormDataContentType()) + resp, err := c.http.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("sendPhoto http %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + return nil +} + // SendDocument uploads a local file as a document attachment. func (c *Client) SendDocument(ctx context.Context, chatID int64, path, caption string) error { f, err := openFile(path) diff --git a/internal/agent/modes/telegram/bridge.go b/internal/agent/modes/telegram/bridge.go index 402ab0d..2e00ce2 100644 --- a/internal/agent/modes/telegram/bridge.go +++ b/internal/agent/modes/telegram/bridge.go @@ -153,7 +153,8 @@ func (b *Bridge) Stop() { // visual noise to a plain back-and-forth). func (b *Bridge) OnAssistantText(text string) { b.mu.Lock() - prefix := "zot: " + // prefix := "zot: " + prefix := "" if b.nextReplyFromTelegram { prefix = "" b.nextReplyFromTelegram = false @@ -198,6 +199,42 @@ func (b *Bridge) sendToPaired(text, prefix string) { } } +// SendImage uploads path to the paired Telegram chat as an inline +// photo. Returns an error if the bridge is not running, no user has +// paired yet, or the upload itself fails. Used by the +// telegram_send_image tool so a Telegram-originated turn can yield +// a real image instead of a textual description. +func (b *Bridge) SendImage(ctx context.Context, path, caption string) error { + b.mu.Lock() + chatID := b.chatID + running := b.running + b.mu.Unlock() + if !running { + return fmt.Errorf("telegram bridge is not running") + } + if chatID == 0 { + return fmt.Errorf("telegram bridge has no paired chat yet") + } + return b.Client.SendPhoto(ctx, chatID, path, caption) +} + +// SendDocument uploads path to the paired Telegram chat as a raw +// document attachment (no compression). Counterpart of SendImage for +// the telegram_send_file tool. +func (b *Bridge) SendDocument(ctx context.Context, path, caption string) error { + b.mu.Lock() + chatID := b.chatID + running := b.running + b.mu.Unlock() + if !running { + return fmt.Errorf("telegram bridge is not running") + } + if chatID == 0 { + return fmt.Errorf("telegram bridge has no paired chat yet") + } + return b.Client.SendDocument(ctx, chatID, path, caption) +} + // pollLoop long-polls Telegram and dispatches each update. Runs // until ctx cancels. func (b *Bridge) pollLoop(ctx context.Context) { diff --git a/internal/agent/tools/telegram_send.go b/internal/agent/tools/telegram_send.go new file mode 100644 index 0000000..2fd944d --- /dev/null +++ b/internal/agent/tools/telegram_send.go @@ -0,0 +1,158 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/patriceckhart/zot/internal/core" + "github.com/patriceckhart/zot/internal/provider" +) + +// TelegramSender is the small affordance the telegram-send tools call +// into. The real implementation lives in the interactive runtime and +// forwards to the active *telegram.Bridge; tests can pass any stub. +// +// The kind argument distinguishes "photo" (compressed inline image +// preview) from "document" (raw file attachment, no compression). For +// images Telegram resizes to its preview format, which loses detail +// but renders inline; documents preserve the original bytes but show +// up as a file the recipient downloads. +type TelegramSender interface { + // SendImage uploads path as an inline-rendered photo with an + // optional caption. Returns an error if the bridge is not + // active or the upload fails. + SendImage(ctx context.Context, path, caption string) error + // SendDocument uploads path as a raw attachment. + SendDocument(ctx context.Context, path, caption string) error + // Active reports whether a paired Telegram chat is currently + // reachable. Tools surface a clear error to the model when it + // tries to send without a connected bridge. + Active() bool +} + +// TelegramSendImageTool exposes the bridge's photo-send affordance to +// the model so a turn that comes in over Telegram can produce a real +// image reply (a screenshot, a generated chart, a downloaded asset) +// instead of a textual description of one. Only registered while the +// bridge is connected; deregistered on disconnect. +type TelegramSendImageTool struct { + CWD string + Sandbox *Sandbox + Sender TelegramSender +} + +type telegramSendImageArgs struct { + Path string `json:"path"` + Caption string `json:"caption,omitempty"` +} + +const telegramSendImageSchema = `{"type":"object","properties":{"path":{"type":"string","description":"absolute or cwd-relative path to a local image file (png/jpg/gif/webp)"},"caption":{"type":"string","description":"optional caption sent alongside the image"}},"required":["path"]}` + +func (t *TelegramSendImageTool) Name() string { return "telegram_send_image" } +func (t *TelegramSendImageTool) Description() string { + return "Send a local image file to the paired Telegram chat as an inline photo. Use when the user (over Telegram) asks to see an image rather than have it described." +} +func (t *TelegramSendImageTool) Schema() json.RawMessage { + return json.RawMessage(telegramSendImageSchema) +} + +func (t *TelegramSendImageTool) Execute(ctx context.Context, raw json.RawMessage, _ func(string)) (core.ToolResult, error) { + var a telegramSendImageArgs + if err := json.Unmarshal(raw, &a); err != nil { + return core.ToolResult{}, fmt.Errorf("invalid args: %w", err) + } + if a.Path == "" { + return core.ToolResult{}, fmt.Errorf("path is required") + } + if t.Sender == nil || !t.Sender.Active() { + return core.ToolResult{ + IsError: true, + Content: []provider.Content{provider.TextBlock{Text: "telegram bridge is not connected; cannot send image"}}, + }, nil + } + path := resolvePath(t.CWD, a.Path) + if err := t.Sandbox.CheckPath(path); err != nil { + return core.ToolResult{}, err + } + info, err := os.Stat(path) + if err != nil { + return core.ToolResult{}, err + } + if info.IsDir() { + return core.ToolResult{}, fmt.Errorf("%s is a directory", path) + } + if mime := imageMIME(path); mime == "" { + return core.ToolResult{ + IsError: true, + Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("%s is not a recognised image format (png/jpg/gif/webp); use telegram_send_file for arbitrary attachments", path)}}, + }, nil + } + if err := t.Sender.SendImage(ctx, path, a.Caption); err != nil { + return core.ToolResult{}, fmt.Errorf("send: %w", err) + } + kb := info.Size() / 1024 + return core.ToolResult{ + Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("sent %s to telegram (%d KB)", path, kb)}}, + }, nil +} + +// TelegramSendFileTool uploads any local file to the paired chat as a +// document attachment. Use this for non-image files or when the model +// needs the recipient to receive the original bytes (no Telegram +// compression). For images you usually want telegram_send_image. +type TelegramSendFileTool struct { + CWD string + Sandbox *Sandbox + Sender TelegramSender +} + +type telegramSendFileArgs struct { + Path string `json:"path"` + Caption string `json:"caption,omitempty"` +} + +const telegramSendFileSchema = `{"type":"object","properties":{"path":{"type":"string","description":"absolute or cwd-relative path to any local file"},"caption":{"type":"string","description":"optional caption sent alongside the file"}},"required":["path"]}` + +func (t *TelegramSendFileTool) Name() string { return "telegram_send_file" } +func (t *TelegramSendFileTool) Description() string { + return "Send a local file to the paired Telegram chat as a document attachment (no compression). Use for non-image files or when the recipient needs the original bytes." +} +func (t *TelegramSendFileTool) Schema() json.RawMessage { + return json.RawMessage(telegramSendFileSchema) +} + +func (t *TelegramSendFileTool) Execute(ctx context.Context, raw json.RawMessage, _ func(string)) (core.ToolResult, error) { + var a telegramSendFileArgs + if err := json.Unmarshal(raw, &a); err != nil { + return core.ToolResult{}, fmt.Errorf("invalid args: %w", err) + } + if a.Path == "" { + return core.ToolResult{}, fmt.Errorf("path is required") + } + if t.Sender == nil || !t.Sender.Active() { + return core.ToolResult{ + IsError: true, + Content: []provider.Content{provider.TextBlock{Text: "telegram bridge is not connected; cannot send file"}}, + }, nil + } + path := resolvePath(t.CWD, a.Path) + if err := t.Sandbox.CheckPath(path); err != nil { + return core.ToolResult{}, err + } + info, err := os.Stat(path) + if err != nil { + return core.ToolResult{}, err + } + if info.IsDir() { + return core.ToolResult{}, fmt.Errorf("%s is a directory", path) + } + if err := t.Sender.SendDocument(ctx, path, a.Caption); err != nil { + return core.ToolResult{}, fmt.Errorf("send: %w", err) + } + kb := info.Size() / 1024 + return core.ToolResult{ + Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("sent %s to telegram (%d KB)", path, kb)}}, + }, nil +}