mirror of
https://github.com/patriceckhart/zot.git
synced 2026-06-27 05:46:34 +02:00
telegram: let zot send images and files back through the bridge
The bridge already mirrored the assistant's text reply into the
paired Telegram chat but had no way to push real attachments. A
turn that came in over Telegram could only ever produce a textual
description of an image, never the image itself.
Add two model-facing tools, registered on the running agent only
while the bridge is connected:
- telegram_send_image(path, caption?) uploads a local image
(png/jpg/gif/webp) as an inline Telegram photo. Telegram
compresses for preview, which is what you usually want for a
screenshot or chart.
- telegram_send_file(path, caption?) uploads any local file as a
document attachment with no compression. Use for non-images or
when the recipient needs the original bytes.
Plumbing:
- Client.SendPhoto multipart upload mirrors SendDocument, hitting
sendPhoto so Telegram renders the image inline.
- Bridge.SendImage / SendDocument resolve the paired chat id and
return a clear error when the bridge is not running or no user
has paired yet.
- A small TelegramSender interface in package tools keeps the
tools package free of any telegram dependency; an adapter in
interactive.go forwards to the live *telegram.Bridge.
- applyTelegramTools mutates the running agent's tool registry on
/telegram connect / disconnect, on /model swaps, and on login
rebuilds. Walks the live registry rather than restoring from a
snapshot so extension or /reload-ext additions survive a later
disconnect; we only add or strip the two telegram entries.
Both tools respect the sandbox, refuse non-image inputs in
send_image, and reject directories. They return a one-line text
result the model can use to confirm the upload ("sent /path/foo.png
to telegram (1.2 MB)").
This commit is contained in:
parent
998a062a20
commit
cead9fdff1
4 changed files with 303 additions and 1 deletions
|
|
@ -2502,6 +2502,11 @@ func (i *Interactive) applyModelSelection(prov, model string) {
|
|||
// identical messages will reuse the existing entries. Nothing
|
||||
// to invalidate.
|
||||
i.mu.Unlock()
|
||||
// The new agent was built off the base tool registry, so any
|
||||
// dynamically-registered tools (telegram_send_*) need to be
|
||||
// reattached. applyTelegramTools is a no-op when the bridge is
|
||||
// idle so the cross-provider path still works on a vanilla setup.
|
||||
i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active())
|
||||
if i.cfg.PersistModel != nil {
|
||||
i.cfg.PersistModel(p, md)
|
||||
}
|
||||
|
|
@ -2529,6 +2534,7 @@ func (i *Interactive) handleAuthEvent(ev auth.Event) {
|
|||
i.statusErr = ""
|
||||
i.statusOK = "logged in to " + ev.Provider + " via " + ev.Method
|
||||
i.mu.Unlock()
|
||||
i.applyTelegramTools(i.telegramBridge != nil && i.telegramBridge.Active())
|
||||
i.dialog.ShowResult(true, "")
|
||||
}
|
||||
}
|
||||
|
|
@ -3091,6 +3097,7 @@ func (i *Interactive) telegramConnect() {
|
|||
i.invalidate()
|
||||
return
|
||||
}
|
||||
i.applyTelegramTools(true)
|
||||
state := i.telegramBridge.State()
|
||||
label := "telegram connected"
|
||||
if state.Username != "" {
|
||||
|
|
@ -3117,6 +3124,7 @@ func (i *Interactive) telegramDisconnect() {
|
|||
return
|
||||
}
|
||||
i.telegramBridge.Stop()
|
||||
i.applyTelegramTools(false)
|
||||
i.mu.Lock()
|
||||
i.statusOK = "telegram disconnected"
|
||||
i.statusErr = ""
|
||||
|
|
@ -3124,6 +3132,63 @@ func (i *Interactive) telegramDisconnect() {
|
|||
i.invalidate()
|
||||
}
|
||||
|
||||
// telegramSenderAdapter wraps the bridge so the tools package can
|
||||
// drive it without importing telegram directly. The Active() check
|
||||
// is forwarded to the bridge so the tool can fail clearly with a
|
||||
// model-readable error when the user disconnected mid-turn.
|
||||
type telegramSenderAdapter struct {
|
||||
bridge *telegram.Bridge
|
||||
}
|
||||
|
||||
func (a telegramSenderAdapter) SendImage(ctx context.Context, path, caption string) error {
|
||||
if a.bridge == nil {
|
||||
return fmt.Errorf("telegram bridge is not connected")
|
||||
}
|
||||
return a.bridge.SendImage(ctx, path, caption)
|
||||
}
|
||||
|
||||
func (a telegramSenderAdapter) SendDocument(ctx context.Context, path, caption string) error {
|
||||
if a.bridge == nil {
|
||||
return fmt.Errorf("telegram bridge is not connected")
|
||||
}
|
||||
return a.bridge.SendDocument(ctx, path, caption)
|
||||
}
|
||||
|
||||
func (a telegramSenderAdapter) Active() bool {
|
||||
return a.bridge != nil && a.bridge.Active()
|
||||
}
|
||||
|
||||
// applyTelegramTools registers (active=true) or removes (active=false)
|
||||
// the telegram_send_image and telegram_send_file tools on the running
|
||||
// agent so the model only sees them while the bridge is connected.
|
||||
// Snapshots and mutates the live tool registry so any extension or
|
||||
// /reload-ext additions made while Telegram is connected survive a
|
||||
// later /telegram disconnect (we only add or strip the two telegram
|
||||
// entries, never the rest).
|
||||
func (i *Interactive) applyTelegramTools(active bool) {
|
||||
if i.agent == nil {
|
||||
return
|
||||
}
|
||||
current := i.agent.Tools
|
||||
next := core.Registry{}
|
||||
for name, t := range current {
|
||||
if name == "telegram_send_image" || name == "telegram_send_file" {
|
||||
continue
|
||||
}
|
||||
next[name] = t
|
||||
}
|
||||
if active {
|
||||
sender := telegramSenderAdapter{bridge: i.telegramBridge}
|
||||
next["telegram_send_image"] = &tools.TelegramSendImageTool{
|
||||
CWD: i.cfg.CWD, Sandbox: i.cfg.Sandbox, Sender: sender,
|
||||
}
|
||||
next["telegram_send_file"] = &tools.TelegramSendFileTool{
|
||||
CWD: i.cfg.CWD, Sandbox: i.cfg.Sandbox, Sender: sender,
|
||||
}
|
||||
}
|
||||
i.agent.SetTools(next)
|
||||
}
|
||||
|
||||
// telegramStatus writes a one-liner describing the bridge state.
|
||||
// Reports on both the in-tui bridge and the background daemon so
|
||||
// the user isn't confused when the daemon owns the poll loop.
|
||||
|
|
|
|||
|
|
@ -142,6 +142,48 @@ func (c *Client) SendChatAction(ctx context.Context, chatID int64, action string
|
|||
return nil
|
||||
}
|
||||
|
||||
// SendPhoto uploads a local image file as a Telegram photo. Telegram
|
||||
// re-encodes / scales photos for inline preview; use SendDocument
|
||||
// when the recipient needs the original bytes.
|
||||
func (c *Client) SendPhoto(ctx context.Context, chatID int64, path, caption string) error {
|
||||
f, err := openFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
_ = w.WriteField("chat_id", strconv.FormatInt(chatID, 10))
|
||||
if caption != "" {
|
||||
_ = w.WriteField("caption", caption)
|
||||
}
|
||||
part, err := w.CreateFormFile("photo", lastPathElem(path))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := io.Copy(part, f); err != nil {
|
||||
return err
|
||||
}
|
||||
w.Close()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL()+"/sendPhoto", &buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("content-type", w.FormDataContentType())
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 400 {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("sendPhoto http %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SendDocument uploads a local file as a document attachment.
|
||||
func (c *Client) SendDocument(ctx context.Context, chatID int64, path, caption string) error {
|
||||
f, err := openFile(path)
|
||||
|
|
|
|||
|
|
@ -153,7 +153,8 @@ func (b *Bridge) Stop() {
|
|||
// visual noise to a plain back-and-forth).
|
||||
func (b *Bridge) OnAssistantText(text string) {
|
||||
b.mu.Lock()
|
||||
prefix := "zot: "
|
||||
// prefix := "zot: "
|
||||
prefix := ""
|
||||
if b.nextReplyFromTelegram {
|
||||
prefix = ""
|
||||
b.nextReplyFromTelegram = false
|
||||
|
|
@ -198,6 +199,42 @@ func (b *Bridge) sendToPaired(text, prefix string) {
|
|||
}
|
||||
}
|
||||
|
||||
// SendImage uploads path to the paired Telegram chat as an inline
|
||||
// photo. Returns an error if the bridge is not running, no user has
|
||||
// paired yet, or the upload itself fails. Used by the
|
||||
// telegram_send_image tool so a Telegram-originated turn can yield
|
||||
// a real image instead of a textual description.
|
||||
func (b *Bridge) SendImage(ctx context.Context, path, caption string) error {
|
||||
b.mu.Lock()
|
||||
chatID := b.chatID
|
||||
running := b.running
|
||||
b.mu.Unlock()
|
||||
if !running {
|
||||
return fmt.Errorf("telegram bridge is not running")
|
||||
}
|
||||
if chatID == 0 {
|
||||
return fmt.Errorf("telegram bridge has no paired chat yet")
|
||||
}
|
||||
return b.Client.SendPhoto(ctx, chatID, path, caption)
|
||||
}
|
||||
|
||||
// SendDocument uploads path to the paired Telegram chat as a raw
|
||||
// document attachment (no compression). Counterpart of SendImage for
|
||||
// the telegram_send_file tool.
|
||||
func (b *Bridge) SendDocument(ctx context.Context, path, caption string) error {
|
||||
b.mu.Lock()
|
||||
chatID := b.chatID
|
||||
running := b.running
|
||||
b.mu.Unlock()
|
||||
if !running {
|
||||
return fmt.Errorf("telegram bridge is not running")
|
||||
}
|
||||
if chatID == 0 {
|
||||
return fmt.Errorf("telegram bridge has no paired chat yet")
|
||||
}
|
||||
return b.Client.SendDocument(ctx, chatID, path, caption)
|
||||
}
|
||||
|
||||
// pollLoop long-polls Telegram and dispatches each update. Runs
|
||||
// until ctx cancels.
|
||||
func (b *Bridge) pollLoop(ctx context.Context) {
|
||||
|
|
|
|||
158
internal/agent/tools/telegram_send.go
Normal file
158
internal/agent/tools/telegram_send.go
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/patriceckhart/zot/internal/core"
|
||||
"github.com/patriceckhart/zot/internal/provider"
|
||||
)
|
||||
|
||||
// TelegramSender is the small affordance the telegram-send tools call
|
||||
// into. The real implementation lives in the interactive runtime and
|
||||
// forwards to the active *telegram.Bridge; tests can pass any stub.
|
||||
//
|
||||
// The kind argument distinguishes "photo" (compressed inline image
|
||||
// preview) from "document" (raw file attachment, no compression). For
|
||||
// images Telegram resizes to its preview format, which loses detail
|
||||
// but renders inline; documents preserve the original bytes but show
|
||||
// up as a file the recipient downloads.
|
||||
type TelegramSender interface {
|
||||
// SendImage uploads path as an inline-rendered photo with an
|
||||
// optional caption. Returns an error if the bridge is not
|
||||
// active or the upload fails.
|
||||
SendImage(ctx context.Context, path, caption string) error
|
||||
// SendDocument uploads path as a raw attachment.
|
||||
SendDocument(ctx context.Context, path, caption string) error
|
||||
// Active reports whether a paired Telegram chat is currently
|
||||
// reachable. Tools surface a clear error to the model when it
|
||||
// tries to send without a connected bridge.
|
||||
Active() bool
|
||||
}
|
||||
|
||||
// TelegramSendImageTool exposes the bridge's photo-send affordance to
|
||||
// the model so a turn that comes in over Telegram can produce a real
|
||||
// image reply (a screenshot, a generated chart, a downloaded asset)
|
||||
// instead of a textual description of one. Only registered while the
|
||||
// bridge is connected; deregistered on disconnect.
|
||||
type TelegramSendImageTool struct {
|
||||
CWD string
|
||||
Sandbox *Sandbox
|
||||
Sender TelegramSender
|
||||
}
|
||||
|
||||
type telegramSendImageArgs struct {
|
||||
Path string `json:"path"`
|
||||
Caption string `json:"caption,omitempty"`
|
||||
}
|
||||
|
||||
const telegramSendImageSchema = `{"type":"object","properties":{"path":{"type":"string","description":"absolute or cwd-relative path to a local image file (png/jpg/gif/webp)"},"caption":{"type":"string","description":"optional caption sent alongside the image"}},"required":["path"]}`
|
||||
|
||||
func (t *TelegramSendImageTool) Name() string { return "telegram_send_image" }
|
||||
func (t *TelegramSendImageTool) Description() string {
|
||||
return "Send a local image file to the paired Telegram chat as an inline photo. Use when the user (over Telegram) asks to see an image rather than have it described."
|
||||
}
|
||||
func (t *TelegramSendImageTool) Schema() json.RawMessage {
|
||||
return json.RawMessage(telegramSendImageSchema)
|
||||
}
|
||||
|
||||
func (t *TelegramSendImageTool) Execute(ctx context.Context, raw json.RawMessage, _ func(string)) (core.ToolResult, error) {
|
||||
var a telegramSendImageArgs
|
||||
if err := json.Unmarshal(raw, &a); err != nil {
|
||||
return core.ToolResult{}, fmt.Errorf("invalid args: %w", err)
|
||||
}
|
||||
if a.Path == "" {
|
||||
return core.ToolResult{}, fmt.Errorf("path is required")
|
||||
}
|
||||
if t.Sender == nil || !t.Sender.Active() {
|
||||
return core.ToolResult{
|
||||
IsError: true,
|
||||
Content: []provider.Content{provider.TextBlock{Text: "telegram bridge is not connected; cannot send image"}},
|
||||
}, nil
|
||||
}
|
||||
path := resolvePath(t.CWD, a.Path)
|
||||
if err := t.Sandbox.CheckPath(path); err != nil {
|
||||
return core.ToolResult{}, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return core.ToolResult{}, err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return core.ToolResult{}, fmt.Errorf("%s is a directory", path)
|
||||
}
|
||||
if mime := imageMIME(path); mime == "" {
|
||||
return core.ToolResult{
|
||||
IsError: true,
|
||||
Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("%s is not a recognised image format (png/jpg/gif/webp); use telegram_send_file for arbitrary attachments", path)}},
|
||||
}, nil
|
||||
}
|
||||
if err := t.Sender.SendImage(ctx, path, a.Caption); err != nil {
|
||||
return core.ToolResult{}, fmt.Errorf("send: %w", err)
|
||||
}
|
||||
kb := info.Size() / 1024
|
||||
return core.ToolResult{
|
||||
Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("sent %s to telegram (%d KB)", path, kb)}},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// TelegramSendFileTool uploads any local file to the paired chat as a
|
||||
// document attachment. Use this for non-image files or when the model
|
||||
// needs the recipient to receive the original bytes (no Telegram
|
||||
// compression). For images you usually want telegram_send_image.
|
||||
type TelegramSendFileTool struct {
|
||||
CWD string
|
||||
Sandbox *Sandbox
|
||||
Sender TelegramSender
|
||||
}
|
||||
|
||||
type telegramSendFileArgs struct {
|
||||
Path string `json:"path"`
|
||||
Caption string `json:"caption,omitempty"`
|
||||
}
|
||||
|
||||
const telegramSendFileSchema = `{"type":"object","properties":{"path":{"type":"string","description":"absolute or cwd-relative path to any local file"},"caption":{"type":"string","description":"optional caption sent alongside the file"}},"required":["path"]}`
|
||||
|
||||
func (t *TelegramSendFileTool) Name() string { return "telegram_send_file" }
|
||||
func (t *TelegramSendFileTool) Description() string {
|
||||
return "Send a local file to the paired Telegram chat as a document attachment (no compression). Use for non-image files or when the recipient needs the original bytes."
|
||||
}
|
||||
func (t *TelegramSendFileTool) Schema() json.RawMessage {
|
||||
return json.RawMessage(telegramSendFileSchema)
|
||||
}
|
||||
|
||||
func (t *TelegramSendFileTool) Execute(ctx context.Context, raw json.RawMessage, _ func(string)) (core.ToolResult, error) {
|
||||
var a telegramSendFileArgs
|
||||
if err := json.Unmarshal(raw, &a); err != nil {
|
||||
return core.ToolResult{}, fmt.Errorf("invalid args: %w", err)
|
||||
}
|
||||
if a.Path == "" {
|
||||
return core.ToolResult{}, fmt.Errorf("path is required")
|
||||
}
|
||||
if t.Sender == nil || !t.Sender.Active() {
|
||||
return core.ToolResult{
|
||||
IsError: true,
|
||||
Content: []provider.Content{provider.TextBlock{Text: "telegram bridge is not connected; cannot send file"}},
|
||||
}, nil
|
||||
}
|
||||
path := resolvePath(t.CWD, a.Path)
|
||||
if err := t.Sandbox.CheckPath(path); err != nil {
|
||||
return core.ToolResult{}, err
|
||||
}
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return core.ToolResult{}, err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return core.ToolResult{}, fmt.Errorf("%s is a directory", path)
|
||||
}
|
||||
if err := t.Sender.SendDocument(ctx, path, a.Caption); err != nil {
|
||||
return core.ToolResult{}, fmt.Errorf("send: %w", err)
|
||||
}
|
||||
kb := info.Size() / 1024
|
||||
return core.ToolResult{
|
||||
Content: []provider.Content{provider.TextBlock{Text: fmt.Sprintf("sent %s to telegram (%d KB)", path, kb)}},
|
||||
}, nil
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue