diff --git a/internal/agent/build.go b/internal/agent/build.go index 5d5aa6b..368d776 100644 --- a/internal/agent/build.go +++ b/internal/agent/build.go @@ -405,6 +405,9 @@ func Resolve(args Args, requireCred bool) (Resolved, error) { if skillAddendum != "" { append_ = append(append_, skillAddendum) } + if AutoSwarmEnabled() { + append_ = append(append_, AutoSwarmSystemAddendum) + } // Custom system prompt resolution order: // 1. --system-prompt flag (highest priority; ad-hoc per run) diff --git a/internal/agent/cli.go b/internal/agent/cli.go index 98183d9..b409a9d 100644 --- a/internal/agent/cli.go +++ b/internal/agent/cli.go @@ -17,6 +17,7 @@ import ( "github.com/patriceckhart/zot/internal/agent/extensions" "github.com/patriceckhart/zot/internal/agent/modes" + "github.com/patriceckhart/zot/internal/agent/tools" "github.com/patriceckhart/zot/internal/auth" "github.com/patriceckhart/zot/internal/core" "github.com/patriceckhart/zot/internal/extproto" @@ -422,6 +423,45 @@ func runInteractive(ctx context.Context, args Args, version string) error { extToolAdapter := &extToolAdapter{mgr: extMgr} r.MergeExtensionTools(extToolAdapter) + // Build the swarm supervisor BEFORE the agent so the auto-swarm + // tool can reference it during tool-registry construction. State + // lives under ZotHome/swarm so per-agent meta/events survive + // restarts; the user can hunt orphaned agents down with + // `git worktree list` if anything misbehaves. + // + // swarmMgr is also captured by loadSession / changeCWD closures + // further down the function, which is why we keep the variable + // in this outer scope rather than scoping it tighter. + var swarmMgr *swarm.Swarm + swarmMgr = swarm.New(swarm.Config{ + Root: filepath.Join(ZotHome(), "swarm"), + RepoRoot: r.CWD, + }) + // Pull any previously-spawned agents off disk so the dashboard + // shows them as detached and the user can resume / remove them. + _, _ = swarmMgr.Reload() + + // Inject the swarm_spawn auto-swarm tool only when /settings -> + // auto-swarm is currently enabled. Registering it unconditionally + // leaves the model trying to call it (and getting a polite error) + // even when the user has switched the feature off. The /settings + // toggle live-mutates the running agent's registry separately so + // flipping the flag mid-session takes effect on the next turn. + injectSwarmSpawn := func(reg core.Registry) core.Registry { + if reg == nil { + return reg + } + if !AutoSwarmEnabled() { + return reg + } + reg["swarm_spawn"] = &tools.SwarmSpawnTool{ + Swarm: swarmMgr, + Enabled: AutoSwarmEnabled, + } + return reg + } + injectSwarmSpawn(r.ToolRegistry) + // Confirmation gate: when --no-yolo is on, the agent must ask // the user before every tool call. In interactive mode the TUI // provides the Confirmer; in print/json/rpc modes there's no @@ -475,6 +515,7 @@ func runInteractive(ctx context.Context, args Args, version string) error { } resolved.UseSandbox(sharedSandbox) resolved.MergeExtensionTools(extToolAdapter) + injectSwarmSpawn(resolved.ToolRegistry) return wireAgentExt(resolved.NewAgent()), resolved.Provider, resolved.Model, nil } @@ -493,6 +534,7 @@ func runInteractive(ctx context.Context, args Args, version string) error { } resolved.UseSandbox(sharedSandbox) resolved.MergeExtensionTools(extToolAdapter) + injectSwarmSpawn(resolved.ToolRegistry) return wireAgentExt(resolved.NewAgent()), resolved.Provider, resolved.Model, nil } @@ -519,6 +561,7 @@ func runInteractive(ctx context.Context, args Args, version string) error { } resolved.UseSandbox(sharedSandbox) resolved.MergeExtensionTools(extToolAdapter) + injectSwarmSpawn(resolved.ToolRegistry) return wireAgentExt(resolved.NewAgent()), resolved.Provider, resolved.Model, nil } @@ -544,6 +587,7 @@ func runInteractive(ctx context.Context, args Args, version string) error { } resolved.UseSandbox(sharedSandbox) resolved.MergeExtensionTools(extToolAdapter) + injectSwarmSpawn(resolved.ToolRegistry) current.SetTools(resolved.ToolRegistry) }) @@ -552,11 +596,6 @@ func runInteractive(ctx context.Context, args Args, version string) error { var sess *core.Session var sessBaselineMsgs int // messages already on disk when current session opened - // swarmMgr is constructed below, but loadSession (defined before - // the construction site) needs to re-scope it whenever the user - // swaps sessions. Forward-declare here so the closure can - // reference it; the assignment happens at the construction line. - var swarmMgr *swarm.Swarm // persistMu guards sess + sessBaselineMsgs against concurrent access // from the agent loop's per-message persistence hook (runs on the // agent goroutine) and the TUI's session swap / flush callbacks @@ -861,18 +900,9 @@ func runInteractive(ctx context.Context, args Args, version string) error { initialCfg, _ := LoadConfig() - // Build the swarm supervisor. Root lives under ZotHome/swarm so - // worktrees survive across zot sessions and the user can hunt - // them down with `git worktree list` if anything misbehaves. - swarmMgr = swarm.New(swarm.Config{ - Root: filepath.Join(ZotHome(), "swarm"), - RepoRoot: r.CWD, - }) - // Pull any previously-spawned agents off disk so the user can see, - // resume, or remove them from the dashboard. Failures here aren't - // fatal — the supervisor still works for new agents. - _, _ = swarmMgr.Reload() - // Scope the dashboard to the active host session so /swarm only + // swarmMgr was constructed and reloaded earlier (before the agent + // build, so the auto-swarm tool could capture it). Here we just + // scope the dashboard to the active host session so /swarm only // shows agents this session spawned (and any pre-upgrade unscoped // agents — see SnapshotAll docs). Updated again whenever the // user swaps sessions via loadSession below. @@ -887,6 +917,8 @@ func runInteractive(ctx context.Context, args Args, version string) error { Terminal: term, Theme: tui.DetectThemeFromBackground(80 * time.Millisecond), InlineImagesEnabled: initialCfg.InlineImagesEnabled, + AutoSwarmEnabled: initialCfg.AutoSwarmEnabled, + AutoSwarmSystemAddendum: AutoSwarmSystemAddendum, SettingsStore: configSettingsStore{}, Model: r.Model, Provider: r.Provider, diff --git a/internal/agent/config.go b/internal/agent/config.go index 1fc0f66..47623a3 100644 --- a/internal/agent/config.go +++ b/internal/agent/config.go @@ -27,6 +27,11 @@ type Config struct { // detected protocol when available. InlineImagesEnabled *bool `json:"inline_images_enabled,omitempty"` + // AutoSwarmEnabled lets the main agent spawn background sub-agents + // for parallel sub-tasks via a built-in swarm_spawn tool. Off by + // default; nil/missing means disabled. Toggle from /settings. + AutoSwarmEnabled *bool `json:"auto_swarm_enabled,omitempty"` + // LastChangelogShown is the version whose release-notes // dialog the user has already seen. When the running binary's // version differs, the next interactive run shows the diff --git a/internal/agent/modes/interactive.go b/internal/agent/modes/interactive.go index 21771ca..b962b3f 100644 --- a/internal/agent/modes/interactive.go +++ b/internal/agent/modes/interactive.go @@ -39,7 +39,18 @@ type InteractiveConfig struct { // auto-detect and render when supported; false disables; true uses // the detected protocol when available. InlineImagesEnabled *bool - SettingsStore SettingsStore + + // AutoSwarmEnabled mirrors the persisted config flag at startup so + // the /settings dialog can render the current state without + // re-reading config.json on every open. + AutoSwarmEnabled *bool + + // AutoSwarmSystemAddendum is the system-prompt block that gets + // appended/stripped when the user toggles auto-swarm at runtime. + // Plumbed in from the cli so this package doesn't have to import + // agent (cycle). + AutoSwarmSystemAddendum string + SettingsStore SettingsStore // Agent is optional. If nil, zot opens without credentials; the // user must /login before they can prompt. @@ -209,6 +220,7 @@ type chatCacheKey struct { // SettingsStore persists user-toggleable settings surfaced by /settings. type SettingsStore interface { SetInlineImages(enabled bool) error + SetAutoSwarm(enabled bool) error } type Interactive struct { @@ -2470,17 +2482,49 @@ func (i *Interactive) openSettingsDialog() { } else { imgHint = "terminal supports " + imageProtocolName(detected) } - i.settingsDialog.Open([]settingsItem{{ - key: "inline_images_enabled", - label: "render images when supported", - desc: "draw screenshots inline instead of showing a text placeholder", - value: imgEnabled, - disabled: imgDisabled, - hint: imgHint, - }}) + + autoSwarm := false + if i.cfg.AutoSwarmEnabled != nil { + autoSwarm = *i.cfg.AutoSwarmEnabled + } + autoSwarmDisabled := i.cfg.Swarm == nil + autoSwarmHint := "" + if autoSwarmDisabled { + autoSwarm = false + autoSwarmHint = "swarm supervisor not available in this mode" + } + + i.settingsDialog.Open([]settingsItem{ + { + key: "inline_images_enabled", + label: "render images when supported", + desc: "draw screenshots inline instead of showing a text placeholder", + value: imgEnabled, + disabled: imgDisabled, + hint: imgHint, + }, + { + key: "auto_swarm_enabled", + label: "auto-swarm", + desc: "let the agent spawn background sub-agents in parallel via the swarm_spawn tool", + value: autoSwarm, + disabled: autoSwarmDisabled, + hint: autoSwarmHint, + }, + }) } func (i *Interactive) applySettingToggle(key string, value bool) { + // Every setting toggle forces a full repaint at the end — same + // effect as the user pressing Ctrl+L — so any per-setting visual + // change (image rendering, status copy, future toggles) lands + // immediately instead of waiting for the next diff frame. + defer func() { + if i.rend != nil { + i.rend.Clear() + } + i.invalidate() + }() switch key { case "inline_images_enabled": val := value @@ -2499,6 +2543,30 @@ func (i *Interactive) applySettingToggle(key string, value bool) { i.statusOK = "inline image rendering " + onOff(value) i.statusErr = "" i.mu.Unlock() + case "auto_swarm_enabled": + val := value + i.cfg.AutoSwarmEnabled = &val + if i.cfg.SettingsStore != nil { + if err := i.cfg.SettingsStore.SetAutoSwarm(value); err != nil { + i.mu.Lock() + i.statusErr = "settings: " + err.Error() + i.mu.Unlock() + return + } + } + // Add/remove the swarm_spawn tool on the live agent so the + // model's tools[] list reflects the toggle on the next turn. + // Without this the tool stays advertised after a disable and + // the model keeps trying to call it. + i.applyAutoSwarmTool(value) + // Also swap the system-prompt addendum in/out so the model + // knows to use the tool proactively (or stops referencing it + // after a disable). + i.applyAutoSwarmSystemPrompt(value) + i.mu.Lock() + i.statusOK = "auto-swarm " + onOff(value) + i.statusErr = "" + i.mu.Unlock() } } @@ -2790,10 +2858,7 @@ func (i *Interactive) runSlash(ctx context.Context, cmd string) (done bool) { i.modelDialog.Open(i.cfg.Model, loggedIn) } case "/settings": - i.mu.Lock() - i.statusErr = "/settings is temporarily disabled" - i.statusOK = "" - i.mu.Unlock() + i.openSettingsDialog() case "/sessions": i.sessionDialog.Open(i.cfg.ZotHome, i.cfg.CWD) case "/jump": @@ -4353,6 +4418,58 @@ func (a telegramSenderAdapter) Active() bool { return a.bridge != nil && a.bridge.Active() } +// applyAutoSwarmSystemPrompt appends (active=true) or strips +// (active=false) the auto-swarm system-prompt block on the running +// agent so the model proactively considers swarm_spawn when the user +// flips the toggle. The block lives at the tail of agent.System so +// stripping is a plain suffix-trim; idempotent in both directions. +func (i *Interactive) applyAutoSwarmSystemPrompt(active bool) { + if i.agent == nil { + return + } + addendum := i.cfg.AutoSwarmSystemAddendum + if addendum == "" { + return + } + sys := i.agent.System + has := strings.Contains(sys, addendum) + switch { + case active && !has: + if sys != "" && !strings.HasSuffix(sys, "\n\n") { + sys += "\n\n" + } + i.agent.System = sys + addendum + case !active && has: + i.agent.System = strings.TrimRight(strings.ReplaceAll(sys, addendum, ""), "\n") + "\n" + } +} + +// applyAutoSwarmTool registers (active=true) or removes (active=false) +// the swarm_spawn tool on the running agent so the model only sees it +// when /settings -> auto-swarm is enabled. Mirrors applyTelegramTools' +// snapshot+mutate pattern so extension tools and /reload-ext additions +// survive a toggle. +func (i *Interactive) applyAutoSwarmTool(active bool) { + if i.agent == nil { + return + } + current := i.agent.Tools + next := core.Registry{} + for name, t := range current { + if name == "swarm_spawn" { + continue + } + next[name] = t + } + if active && i.cfg.Swarm != nil { + next["swarm_spawn"] = &tools.SwarmSpawnTool{ + Swarm: i.cfg.Swarm, + Enabled: func() bool { return true }, + } + } + i.agent.SetTools(next) +} + // applyTelegramTools registers (active=true) or removes (active=false) // the telegram_send_image and telegram_send_file tools on the running // agent so the model only sees them while the bridge is connected. diff --git a/internal/agent/modes/slash_suggest.go b/internal/agent/modes/slash_suggest.go index 69a3830..6acb7f6 100644 --- a/internal/agent/modes/slash_suggest.go +++ b/internal/agent/modes/slash_suggest.go @@ -50,6 +50,7 @@ var slashCatalog = []slashCommand{ {Name: "/swarm", Desc: "supervise background agents that share this working directory"}, {Name: "/reload-ext", Desc: "hot-reload all extensions (re-read manifests and respawn)"}, {Name: "/telegram", Desc: "connect, disconnect, or show status of the telegram bridge"}, + {Name: "/settings", Desc: "open settings"}, {Name: "/clear", Desc: "clear the chat transcript"}, {Name: "/exit", Desc: "exit zot"}, } diff --git a/internal/agent/settings_store.go b/internal/agent/settings_store.go index af3e9c4..b0ce31d 100644 --- a/internal/agent/settings_store.go +++ b/internal/agent/settings_store.go @@ -10,3 +10,32 @@ func (configSettingsStore) SetInlineImages(enabled bool) error { cfg.InlineImagesEnabled = &enabled return SaveConfig(cfg) } + +func (configSettingsStore) SetAutoSwarm(enabled bool) error { + cfg, err := LoadConfig() + if err != nil { + return err + } + cfg.AutoSwarmEnabled = &enabled + return SaveConfig(cfg) +} + +// AutoSwarmEnabled reads the current auto-swarm flag from config. +// Used by the swarm_spawn tool at call time to gate execution. +func AutoSwarmEnabled() bool { + cfg, err := LoadConfig() + if err != nil { + return false + } + return cfg.AutoSwarmEnabled != nil && *cfg.AutoSwarmEnabled +} + +// AutoSwarmSystemAddendum is appended to the system prompt when +// auto-swarm is enabled, so the model knows it may delegate to +// background sub-agents without the user having to mention the tool +// by name. Kept short so it doesn't bloat the cached prompt prefix. +const AutoSwarmSystemAddendum = `Auto-swarm is enabled. You have a swarm_spawn tool that forks background sub-agents working in parallel in this same working directory. + +Use it proactively when the user's request naturally splits into independent sub-tasks that can run concurrently (e.g. "refactor module A and module B", "write the implementation and the tests", "investigate three separate files"). Spawn one sub-agent per independent sub-task with a self-contained task description (sub-agents start with no context from this conversation). Continue working on the remaining or coordinating work yourself in parallel; do not wait for sub-agents to finish before responding. Briefly tell the user which sub-agents you spawned and what each is doing. + +Do NOT use swarm_spawn for trivial single-step work, for tasks that depend on each other sequentially, or when the user explicitly asked you to do the work yourself.` diff --git a/internal/agent/tools/swarm_spawn.go b/internal/agent/tools/swarm_spawn.go new file mode 100644 index 0000000..8a31026 --- /dev/null +++ b/internal/agent/tools/swarm_spawn.go @@ -0,0 +1,125 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/patriceckhart/zot/internal/core" + "github.com/patriceckhart/zot/internal/provider" + "github.com/patriceckhart/zot/internal/swarm" +) + +// SwarmSpawnTool lets the main agent fork a background sub-agent +// against the host's cwd via swarm.Swarm.SpawnReq. The sub-agent runs +// in parallel: the tool returns the agent id immediately and the main +// turn continues uninterrupted. The user can monitor / chat with the +// spawned agent via /swarm. +// +// Gated by the auto_swarm_enabled config flag at call time so a user +// can flip it off mid-session and the next call refuses cleanly +// without re-registering the tool. +type SwarmSpawnTool struct { + // Swarm is the supervisor used to spawn agents. Nil means + // "auto-swarm not available in this mode" and the tool always + // errors. + Swarm *swarm.Swarm + + // Enabled reads the live config flag. Lets users toggle from + // /settings without rebuilding the agent. When nil, the tool + // is treated as disabled. + Enabled func() bool +} + +type swarmSpawnArgs struct { + Task string `json:"task"` + Model string `json:"model,omitempty"` + Provider string `json:"provider,omitempty"` +} + +const swarmSpawnSchema = `{ + "type": "object", + "properties": { + "task": { + "type": "string", + "description": "The full task description for the sub-agent. Be specific: the sub-agent has the same tools (read/write/edit/bash) and shares this working directory, but starts with NO context from this conversation." + }, + "model": { + "type": "string", + "description": "Optional model id to pin the sub-agent to (e.g. \"claude-sonnet-4-5\", \"gpt-5\"). Defaults to the host's current model." + }, + "provider": { + "type": "string", + "description": "Optional provider id (e.g. \"anthropic\", \"openai\"). Usually paired with model." + } + }, + "required": ["task"] +}` + +func (t *SwarmSpawnTool) Name() string { return "swarm_spawn" } +func (t *SwarmSpawnTool) Description() string { + return "Spawn a background sub-agent to work on a parallel sub-task. Returns the sub-agent id immediately; the sub-agent keeps running while this conversation continues. Useful for splitting independent work (write tests while implementing a feature, refactor module A while drafting module B). The sub-agent shares this working directory and has the same tools." +} +func (t *SwarmSpawnTool) Schema() json.RawMessage { return json.RawMessage(swarmSpawnSchema) } + +func (t *SwarmSpawnTool) Execute(ctx context.Context, raw json.RawMessage, progress func(string)) (core.ToolResult, error) { + if t.Swarm == nil { + return toolErr("swarm_spawn: swarm supervisor not available in this mode"), nil + } + if t.Enabled == nil || !t.Enabled() { + return toolErr("swarm_spawn: auto-swarm is disabled. Ask the user to enable it from /settings before delegating sub-tasks."), nil + } + var a swarmSpawnArgs + if err := json.Unmarshal(raw, &a); err != nil { + return core.ToolResult{}, fmt.Errorf("invalid args: %w", err) + } + task := strings.TrimSpace(a.Task) + if task == "" { + return toolErr("swarm_spawn: task is required"), nil + } + + agent, err := t.Swarm.SpawnReq(ctx, swarm.SpawnRequest{ + Task: task, + Model: strings.TrimSpace(a.Model), + Provider: strings.TrimSpace(a.Provider), + }) + if err != nil { + return core.ToolResult{}, fmt.Errorf("swarm_spawn: %w", err) + } + + var sb strings.Builder + fmt.Fprintf(&sb, "spawned sub-agent %s\n", agent.ID) + fmt.Fprintf(&sb, "task: %s\n", truncateTask(task, 200)) + if a.Model != "" { + fmt.Fprintf(&sb, "model: %s\n", a.Model) + } + if a.Provider != "" { + fmt.Fprintf(&sb, "provider: %s\n", a.Provider) + } + sb.WriteString("\nThe sub-agent is running in the background. Use /swarm in the TUI to monitor it. ") + sb.WriteString("This conversation continues immediately; do not wait for the sub-agent to finish before working on the next thing.") + return core.ToolResult{ + Content: []provider.Content{provider.TextBlock{Text: sb.String()}}, + Details: map[string]any{ + "agent_id": agent.ID, + "task": task, + "model": a.Model, + "provider": a.Provider, + }, + }, nil +} + +func toolErr(msg string) core.ToolResult { + return core.ToolResult{ + Content: []provider.Content{provider.TextBlock{Text: msg}}, + IsError: true, + } +} + +func truncateTask(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n-3] + "..." +}