diff --git a/internal/agent/modes/interactive.go b/internal/agent/modes/interactive.go index 6c7eb6c..ea60fcf 100644 --- a/internal/agent/modes/interactive.go +++ b/internal/agent/modes/interactive.go @@ -2502,6 +2502,11 @@ func (i *Interactive) applyModelSelection(prov, model string) { // identical messages will reuse the existing entries. Nothing // to invalidate. i.mu.Unlock() + // The new agent was built off the base tool registry, so any + // dynamically-registered tools (telegram_send_*) need to be + // reattached. applyTelegramTools is a no-op when the bridge is + // idle so the cross-provider path still works on a vanilla setup. + i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active()) if i.cfg.PersistModel != nil { i.cfg.PersistModel(p, md) } @@ -2529,6 +2534,7 @@ func (i *Interactive) handleAuthEvent(ev auth.Event) { i.statusErr = "" i.statusOK = "logged in to " + ev.Provider + " via " + ev.Method i.mu.Unlock() + i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active()) i.dialog.ShowResult(true, "") } } @@ -3091,6 +3097,7 @@ func (i *Interactive) telegramConnect() { i.invalidate() return } + i.applyTelegramTools(true) state := i.telegramBridge.State() label := "telegram connected" if state.Username != "" { @@ -3117,6 +3124,7 @@ func (i *Interactive) telegramDisconnect() { return } i.telegramBridge.Stop() + i.applyTelegramTools(false) i.mu.Lock() i.statusOK = "telegram disconnected" i.statusErr = "" @@ -3124,6 +3132,63 @@ func (i *Interactive) telegramDisconnect() { i.invalidate() } +// telegramSenderAdapter wraps the bridge so the tools package can +// drive it without importing telegram directly. The Active() check +// is forwarded to the bridge so the tool can fail clearly with a +// model-readable error when the user disconnected mid-turn. +type telegramSenderAdapter struct { + bridge *telegram.Bridge +} + +func (a telegramSenderAdapter) SendImage(ctx context.Context, path, caption string) error { + if a.bridge == nil { + return fmt.Errorf("telegram bridge is not connected") + } + return a.bridge.SendImage(ctx, path, caption) +} + +func (a telegramSenderAdapter) SendDocument(ctx context.Context, path, caption string) error { + if a.bridge == nil { + return fmt.Errorf("telegram bridge is not connected") + } + return a.bridge.SendDocument(ctx, path, caption) +} + +func (a telegramSenderAdapter) Active() bool { + return a.bridge != nil && a.bridge.Active() +} + +// applyTelegramTools registers (active=true) or removes (active=false) +// the telegram_send_image and telegram_send_file tools on the running +// agent so the model only sees them while the bridge is connected. +// Snapshots and mutates the live tool registry so any extension or +// /reload-ext additions made while Telegram is connected survive a +// later /telegram disconnect (we only add or strip the two telegram +// entries, never the rest). +func (i *Interactive) applyTelegramTools(active bool) { + if i.agent == nil { + return + } + current := i.agent.Tools + next := core.Registry{} + for name, t := range current { + if name == "telegram_send_image" || name == "telegram_send_file" { + continue + } + next[name] = t + } + if active { + sender := telegramSenderAdapter{bridge: i.telegramBridge} + next["telegram_send_image"] = &tools.TelegramSendImageTool{ + CWD: i.cfg.CWD, Sandbox: i.cfg.Sandbox, Sender: sender, + } + next["telegram_send_file"] = &tools.TelegramSendFileTool{ + CWD: i.cfg.CWD, Sandbox: i.cfg.Sandbox, Sender: sender, + } + } + i.agent.SetTools(next) +} + // telegramStatus writes a one-liner describing the bridge state. // Reports on both the in-tui bridge and the background daemon so // the user isn't confused when the daemon owns the poll loop. diff --git a/internal/agent/modes/telegram/api.go b/internal/agent/modes/telegram/api.go index 5be4fee..5612e25 100644 --- a/internal/agent/modes/telegram/api.go +++ b/internal/agent/modes/telegram/api.go @@ -142,6 +142,48 @@ func (c *Client) SendChatAction(ctx context.Context, chatID int64, action string return nil } +// SendPhoto uploads a local image file as a Telegram photo. Telegram +// re-encodes / scales photos for inline preview; use SendDocument +// when the recipient needs the original bytes. +func (c *Client) SendPhoto(ctx context.Context, chatID int64, path, caption string) error { + f, err := openFile(path) + if err != nil { + return err + } + defer f.Close() + + var buf bytes.Buffer + w := multipart.NewWriter(&buf) + _ = w.WriteField("chat_id", strconv.FormatInt(chatID, 10)) + if caption != "" { + _ = w.WriteField("caption", caption) + } + part, err := w.CreateFormFile("photo", lastPathElem(path)) + if err != nil { + return err + } + if _, err := io.Copy(part, f); err != nil { + return err + } + w.Close() + + req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL()+"/sendPhoto", &buf) + if err != nil { + return err + } + req.Header.Set("content-type", w.FormDataContentType()) + resp, err := c.http.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("sendPhoto http %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + return nil +} + // SendDocument uploads a local file as a document attachment. func (c *Client) SendDocument(ctx context.Context, chatID int64, path, caption string) error { f, err := openFile(path) diff --git a/internal/agent/modes/telegram/bridge.go b/internal/agent/modes/telegram/bridge.go index 402ab0d..2e00ce2 100644 --- a/internal/agent/modes/telegram/bridge.go +++ b/internal/agent/modes/telegram/bridge.go @@ -153,7 +153,8 @@ func (b *Bridge) Stop() { // visual noise to a plain back-and-forth). func (b *Bridge) OnAssistantText(text string) { b.mu.Lock() - prefix := "zot: " + // prefix := "zot: " + prefix := "" if b.nextReplyFromTelegram { prefix = "" b.nextReplyFromTelegram = false @@ -198,6 +199,42 @@ func (b *Bridge) sendToPaired(text, prefix string) { } } +// SendImage uploads path to the paired Telegram chat as an inline +// photo. Returns an error if the bridge is not running, no user has +// paired yet, or the upload itself fails. Used by the +// telegram_send_image tool so a Telegram-originated turn can yield +// a real image instead of a textual description. +func (b *Bridge) SendImage(ctx context.Context, path, caption string) error { + b.mu.Lock() + chatID := b.chatID + running := b.running + b.mu.Unlock() + if !running { + return fmt.Errorf("telegram bridge is not running") + } + if chatID == 0 { + return fmt.Errorf("telegram bridge has no paired chat yet") + } + return b.Client.SendPhoto(ctx, chatID, path, caption) +} + +// SendDocument uploads path to the paired Telegram chat as a raw +// document attachment (no compression). Counterpart of SendImage for +// the telegram_send_file tool. +func (b *Bridge) SendDocument(ctx context.Context, path, caption string) error { + b.mu.Lock() + chatID := b.chatID + running := b.running + b.mu.Unlock() + if !running { + return fmt.Errorf("telegram bridge is not running") + } + if chatID == 0 { + return fmt.Errorf("telegram bridge has no paired chat yet") + } + return b.Client.SendDocument(ctx, chatID, path, caption) +} + // pollLoop long-polls Telegram and dispatches each update. Runs // until ctx cancels. func (b *Bridge) pollLoop(ctx context.Context) { diff --git a/internal/agent/tools/telegram_send.go b/internal/agent/tools/telegram_send.go new file mode 100644 index 0000000..2fd944d --- /dev/null +++ b/internal/agent/tools/telegram_send.go @@ -0,0 +1,158 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/patriceckhart/zot/internal/core" + "github.com/patriceckhart/zot/internal/provider" +) + +// TelegramSender is the small affordance the telegram-send tools call +// into. The real implementation lives in the interactive runtime and +// forwards to the active *telegram.Bridge; tests can pass any stub. +// +// The kind argument distinguishes "photo" (compressed inline image +// preview) from "document" (raw file attachment, no compression). For +// images Telegram resizes to its preview format, which loses detail +// but renders inline; documents preserve the original bytes but show +// up as a file the recipient downloads. +type TelegramSender interface { + // SendImage uploads path as an inline-rendered photo with an + // optional caption. Returns an error if the bridge is not + // active or the upload fails. + SendImage(ctx context.Context, path, caption string) error + // SendDocument uploads path as a raw attachment. + SendDocument(ctx context.Context, path, caption string) error + // Active reports whether a paired Telegram chat is currently + // reachable. Tools surface a clear error to the model when it + // tries to send without a connected bridge. + Active() bool +} + +// TelegramSendImageTool exposes the bridge's photo-send affordance to +// the model so a turn that comes in over Telegram can produce a real +// image reply (a screenshot, a generated chart, a downloaded asset) +// instead of a textual description of one. Only registered while the +// bridge is connected; deregistered on disconnect. +type TelegramSendImageTool struct { + CWD string + Sandbox *Sandbox + Sender TelegramSender +} + +type telegramSendImageArgs struct { + Path string `json:"path"` + Caption string `json:"caption,omitempty"` +} + +const telegramSendImageSchema = `{"type":"object","properties":{"path":{"type":"string","description":"absolute or cwd-relative path to a local image file (png/jpg/gif/webp)"},"caption":{"type":"string","description":"optional caption sent alongside the image"}},"required":["path"]}` + +func (t *TelegramSendImageTool) Name() string { return "telegram_send_image" } +func (t *TelegramSendImageTool) Description() string { + return "Send a local image file to the paired Telegram chat as an inline photo. Use when the user (over Telegram) asks to see an image rather than have it described." +} +func (t *TelegramSendImageTool) Schema() json.RawMessage { + return json.RawMessage(telegramSendImageSchema) +} + +func (t *TelegramSendImageTool) Execute(ctx context.Context, raw json.RawMessage, _ func(string)) (core.ToolResult, error) { + var a telegramSendImageArgs + if err := json.Unmarshal(raw, &a); err != nil { + return core.ToolResult{}, fmt.Errorf("invalid args: %w", err) + } + if a.Path == "" { + return core.ToolResult{}, fmt.Errorf("path is required") + } + if t.Sender == nil || !t.Sender.Active() { + return core.ToolResult{ + IsError: true, + Content: []provider.Content{provider.TextBlock{Text: "telegram bridge is not connected; cannot send image"}}, + }, nil + } + path := resolvePath(t.CWD, a.Path) + if err := t.Sandbox.CheckPath(path); err != nil { + return core.ToolResult{}, err + } + info, err := os.Stat(path) + if err != nil { + return core.ToolResult{}, err + } + if info.IsDir() { + return core.ToolResult{}, fmt.Errorf("%s is a directory", path) + } + if mime := imageMIME(path); mime == "" { + return core.ToolResult{ + IsError: true, + Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("%s is not a recognised image format (png/jpg/gif/webp); use telegram_send_file for arbitrary attachments", path)}}, + }, nil + } + if err := t.Sender.SendImage(ctx, path, a.Caption); err != nil { + return core.ToolResult{}, fmt.Errorf("send: %w", err) + } + kb := info.Size() / 1024 + return core.ToolResult{ + Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("sent %s to telegram (%d KB)", path, kb)}}, + }, nil +} + +// TelegramSendFileTool uploads any local file to the paired chat as a +// document attachment. Use this for non-image files or when the model +// needs the recipient to receive the original bytes (no Telegram +// compression). For images you usually want telegram_send_image. +type TelegramSendFileTool struct { + CWD string + Sandbox *Sandbox + Sender TelegramSender +} + +type telegramSendFileArgs struct { + Path string `json:"path"` + Caption string `json:"caption,omitempty"` +} + +const telegramSendFileSchema = `{"type":"object","properties":{"path":{"type":"string","description":"absolute or cwd-relative path to any local file"},"caption":{"type":"string","description":"optional caption sent alongside the file"}},"required":["path"]}` + +func (t *TelegramSendFileTool) Name() string { return "telegram_send_file" } +func (t *TelegramSendFileTool) Description() string { + return "Send a local file to the paired Telegram chat as a document attachment (no compression). Use for non-image files or when the recipient needs the original bytes." +} +func (t *TelegramSendFileTool) Schema() json.RawMessage { + return json.RawMessage(telegramSendFileSchema) +} + +func (t *TelegramSendFileTool) Execute(ctx context.Context, raw json.RawMessage, _ func(string)) (core.ToolResult, error) { + var a telegramSendFileArgs + if err := json.Unmarshal(raw, &a); err != nil { + return core.ToolResult{}, fmt.Errorf("invalid args: %w", err) + } + if a.Path == "" { + return core.ToolResult{}, fmt.Errorf("path is required") + } + if t.Sender == nil || !t.Sender.Active() { + return core.ToolResult{ + IsError: true, + Content: []provider.Content{provider.TextBlock{Text: "telegram bridge is not connected; cannot send file"}}, + }, nil + } + path := resolvePath(t.CWD, a.Path) + if err := t.Sandbox.CheckPath(path); err != nil { + return core.ToolResult{}, err + } + info, err := os.Stat(path) + if err != nil { + return core.ToolResult{}, err + } + if info.IsDir() { + return core.ToolResult{}, fmt.Errorf("%s is a directory", path) + } + if err := t.Sender.SendDocument(ctx, path, a.Caption); err != nil { + return core.ToolResult{}, fmt.Errorf("send: %w", err) + } + kb := info.Size() / 1024 + return core.ToolResult{ + Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("sent %s to telegram (%d KB)", path, kb)}}, + }, nil +}