Skip to content

Commit 68dfb3b

Browse files
authored
Merge pull request #318 from andrewyng/feat/code-agent-upgrades
Code agent: tool ergonomics, parallel execution, plan/discuss modes, explorer subagent
2 parents 042a5b0 + b4a94b7 commit 68dfb3b

29 files changed

Lines changed: 1813 additions & 77 deletions

platform/coworker/agent.py

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
make_send_message_tool,
2020
)
2121
from .engine import Approver, TurnEngine
22+
from .environment import environment_context
2223
from .memory import MemoryStore, Scope, format_memories, memory_tools
2324
from .permissions import Mode, PermissionEngine
2425
from .project import load_agents_md
@@ -28,10 +29,43 @@
2829
from .skills import SkillLoader, skill_catalog_text, skill_tools
2930
from .tools import ToolRegistry
3031
from .tools.directories import request_directory_tool
32+
from .tools.plan import propose_plan_tool
33+
from .tools.subagent import explorer_tools
3134
from .web import make_web_fetch_tool, make_web_search_tool
3235
from .tools.shell import LocalExecutor
3336
from .tools.todo import TodoList
3437

38+
# Appended each turn while discuss mode is active: enforcement-only read-only, with no
39+
# pressure toward a plan proposal (that's what distinguishes it from plan mode).
40+
_DISCUSS_MODE_CONTEXT = """\
41+
Discuss mode is active: write and shell tools are disabled. Explore and answer freely; if
42+
the user asks for a change, describe it in chat instead of attempting it (they can switch
43+
to plan or approval mode to have you make it)."""
44+
45+
# Appended to the latest user message every turn while plan mode is active. The mode can
46+
# flip mid-session (plan approval), so this can't live in the static instructions.
47+
_PLAN_MODE_CONTEXT = """\
48+
Plan mode is active: write and shell tools are blocked. Explore read-only and design an
49+
approach. When you've committed to one, present it with `propose_plan` (what you'll change,
50+
in which files, how you'll verify) — don't describe edits as if you were making them. If
51+
the plan is approved, this same session switches to execution and you implement it; if
52+
rejected, revise the plan using the feedback."""
53+
54+
# When-to-remember rules, injected only when a memory store is wired. Without these,
55+
# models either never call `remember` or save noise the repo already records.
56+
_MEMORY_GUIDANCE = """\
57+
Memory:
58+
- You have persistent memory across sessions. Use `remember` for durable facts: the user's \
59+
corrections and stated preferences (include the why), and project context you couldn't \
60+
rederive from the code. Don't save what the repo already records (code structure, git \
61+
history, AGENTS.md) or details that only matter to the current task. Use absolute dates, \
62+
never "yesterday".
63+
- Before saving, check the known-memories list: if an entry already covers it, revise that \
64+
entry with `memory_update` instead of adding a near-duplicate; retire wrong or obsolete \
65+
entries with `memory_forget`.
66+
- Memories reflect when they were written. If one names a file, flag, or URL, verify it \
67+
still exists before relying on it."""
68+
3569

3670
def _enabled_connector_tools(secrets: SecretStore) -> tuple[set[str], set[str]]:
3771
connectors = {c["name"]: c for c in connector_list(secrets)}
@@ -77,6 +111,7 @@ def build_engine(
77111
audit_sink: Optional[Any] = None,
78112
roots: Optional[list] = None,
79113
directory_requester: Optional[Any] = None,
114+
plan_approver: Optional[Any] = None,
80115
) -> TurnEngine:
81116
ws = Path(workspace).expanduser().resolve() if workspace else None
82117
if agent.needs_workspace and ws is None:
@@ -129,6 +164,21 @@ def build_engine(
129164
# Web search + fetch: research tools for every agent (keyless DuckDuckGo default).
130165
registry.register(make_web_search_tool(secrets))
131166
registry.register(make_web_fetch_tool())
167+
# Route by the model's `provider:` prefix (OpenAI default, Ollama, …). The manager normally
168+
# passes its shared router; this fallback covers the TUI / direct build_engine() callers.
169+
# Resolved here (not at engine construction) because the explorer subagent captures it.
170+
provider = provider or ProviderRouter(secrets, default_provider="openai")
171+
# The Code agent can fan broad research out to read-only explorer subagents, keeping its
172+
# own context for the actual change.
173+
if agent.name == "code" and ws is not None:
174+
registry.register_all(
175+
explorer_tools(
176+
workspace=ws,
177+
provider=provider,
178+
model=model,
179+
model_settings=model_settings,
180+
)
181+
)
132182
# Scheduling: Cowork + MyHelper can set up scheduled tasks (origin = this session).
133183
if (
134184
task_store is not None
@@ -147,6 +197,7 @@ def build_engine(
147197

148198
instructions = agent.system_prompt
149199
if ws is not None:
200+
instructions = f"{instructions}\n\n{environment_context(ws)}"
150201
conventions = load_agents_md(ws)
151202
if conventions:
152203
instructions = f"{instructions}\n\n{conventions}"
@@ -155,6 +206,7 @@ def build_engine(
155206
registry.register_all(
156207
memory_tools(memory_store, workspace=str(ws) if ws else None)
157208
)
209+
instructions = f"{instructions}\n\n{_MEMORY_GUIDANCE}"
158210
remembered = memory_store.list(scope=Scope.GLOBAL)
159211
if ws is not None:
160212
remembered += memory_store.list(scope=Scope.WORKSPACE, workspace=str(ws))
@@ -175,17 +227,32 @@ def build_engine(
175227
auto_allow_tools=set(config.auto_allow),
176228
roots=root_list or None,
177229
)
178-
# Tell the agent, each turn, which directories it has and their access (orphan Cowork can gain
179-
# folders mid-session) — appended to the latest user message since mid-thread system messages
180-
# aren't reliable across providers. Multi-dir surfaces (Cowork/MyHelper) only.
181-
context_provider = (
230+
# The plan-mode exit door. Always registered (surfaces can flip a live session into
231+
# plan mode via set_mode, and the registry is fixed at build); the engine rejects the
232+
# call whenever the session isn't actually in plan mode.
233+
registry.register(propose_plan_tool())
234+
235+
# Per-turn ephemeral context, appended to the latest user message since mid-thread system
236+
# messages aren't reliable across providers. Two producers: the plan-mode reminder (mode can
237+
# flip mid-session, so it's checked each turn, not baked into the instructions) and the live
238+
# directory list (orphan Cowork can gain folders mid-session; Cowork/MyHelper only).
239+
roots_context = (
182240
(lambda: render_context(root_list))
183241
if root_list and agent.name in ("cowork", "myhelper")
184242
else None
185243
)
186-
# Route by the model's `provider:` prefix (OpenAI default, Ollama, …). The manager normally
187-
# passes its shared router; this fallback covers the TUI / direct build_engine() callers.
188-
provider = provider or ProviderRouter(secrets, default_provider="openai")
244+
245+
def context_provider() -> str:
246+
parts = []
247+
if permissions.mode is Mode.PLAN:
248+
parts.append(_PLAN_MODE_CONTEXT)
249+
elif permissions.mode is Mode.DISCUSS:
250+
parts.append(_DISCUSS_MODE_CONTEXT)
251+
if roots_context is not None:
252+
ctx = roots_context()
253+
if ctx:
254+
parts.append(ctx)
255+
return "\n\n".join(parts)
189256

190257
engine = TurnEngine(
191258
provider=provider,
@@ -202,6 +269,7 @@ def build_engine(
202269
audit_sink=audit_sink,
203270
context_provider=context_provider,
204271
directory_requester=directory_requester,
272+
plan_approver=plan_approver,
205273
)
206274
engine.executor = executor # type: ignore[attr-defined]
207275
engine.todo = todo # type: ignore[attr-defined]

platform/coworker/agents/code.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import aisuite as ai
66

7+
from ..tools.files import file_tools
78
from ..tools.git import git_tools
89
from ..tools.search import search_tools
910
from ..tools.shell import shell_tools
@@ -17,6 +18,12 @@
1718
- Explore first. Use `grep` and `read_file` to find the relevant code and learn how it works \
1819
before editing. Don't guess at APIs, signatures, or layout — read them. `git_log` shows how a \
1920
file evolved. Read meaningful chunks, not a line at a time.
21+
- Independent lookups run in parallel: when you need several reads/greps and none depends on \
22+
another's result, request them together in one batch instead of one per turn.
23+
- For broad questions spanning many files ("where is X handled?", "how does the Y flow \
24+
work?"), delegate to `explore` — a read-only subagent that searches in its own context and \
25+
returns only a report, keeping your context for the actual change. Independent explores can \
26+
run in parallel. For a single known file, just read it yourself.
2027
2128
Match the codebase:
2229
- Write code that reads like the surrounding code: match its style, naming, structure, and \
@@ -35,10 +42,13 @@
3542
`apply_unified_diff` for standard unified diffs; `write_file` for new files or full rewrites.
3643
3744
Verify:
38-
- `run_shell` is a persistent bash (cd and env persist). After changes, run the narrowest \
45+
- `run_shell` is a persistent shell (cd and env persist). After changes, run the narrowest \
3946
relevant test/build/lint to confirm your work. Don't report something done without verifying \
4047
it; if you can't verify, say so plainly. Don't repeat a failing command — if stuck after 2–3 \
4148
attempts, step back, reconsider, and surface the blocker.
49+
- Pass a short `description` with each command (shown in approval prompts), and raise \
50+
`timeout_seconds` for slow builds/tests. For long-running processes (dev servers, watchers), \
51+
set `run_in_background` and poll `shell_task_output`; stop them with `shell_task_kill`.
4252
4353
Plan multi-step work:
4454
- For anything beyond a few steps, maintain a task list with `todo_write`: keep exactly one \
@@ -59,14 +69,17 @@
5969
def code_agent() -> Agent:
6070
def factory(context: AgentContext) -> list:
6171
ws = str(context.workspace)
62-
# Our `grep` (ripgrep, .gitignore-aware) replaces the toolkit's slower search_files.
72+
# Our `grep` (ripgrep, .gitignore-aware) replaces the toolkit's slower search_files;
73+
# our line-numbered, windowing `read_file` replaces its read_file/read_file_lines.
74+
replaced = {"search_files", "read_file", "read_file_lines"}
6375
files = [
6476
t
6577
for t in ai.toolkits.files(root=ws, allow_write=True)
66-
if getattr(t, "__name__", "") != "search_files"
78+
if getattr(t, "__name__", "") not in replaced
6779
]
6880
tools = [
6981
*files,
82+
*file_tools(ws), # read_file (numbered lines, windowed)
7083
*ai.toolkits.git(root=ws), # git_status, git_diff
7184
*git_tools(ws), # git_log
7285
*search_tools(ws), # grep

0 commit comments

Comments
 (0)