Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 75 additions & 7 deletions platform/coworker/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
make_send_message_tool,
)
from .engine import Approver, TurnEngine
from .environment import environment_context
from .memory import MemoryStore, Scope, format_memories, memory_tools
from .permissions import Mode, PermissionEngine
from .project import load_agents_md
Expand All @@ -28,10 +29,43 @@
from .skills import SkillLoader, skill_catalog_text, skill_tools
from .tools import ToolRegistry
from .tools.directories import request_directory_tool
from .tools.plan import propose_plan_tool
from .tools.subagent import explorer_tools
from .web import make_web_fetch_tool, make_web_search_tool
from .tools.shell import LocalExecutor
from .tools.todo import TodoList

# Appended each turn while discuss mode is active: enforcement-only read-only, with no
# pressure toward a plan proposal (that's what distinguishes it from plan mode).
_DISCUSS_MODE_CONTEXT = """\
Discuss mode is active: write and shell tools are disabled. Explore and answer freely; if
the user asks for a change, describe it in chat instead of attempting it (they can switch
to plan or approval mode to have you make it)."""

# Appended to the latest user message every turn while plan mode is active. The mode can
# flip mid-session (plan approval), so this can't live in the static instructions.
_PLAN_MODE_CONTEXT = """\
Plan mode is active: write and shell tools are blocked. Explore read-only and design an
approach. When you've committed to one, present it with `propose_plan` (what you'll change,
in which files, how you'll verify) — don't describe edits as if you were making them. If
the plan is approved, this same session switches to execution and you implement it; if
rejected, revise the plan using the feedback."""

# When-to-remember rules, injected only when a memory store is wired. Without these,
# models either never call `remember` or save noise the repo already records.
_MEMORY_GUIDANCE = """\
Memory:
- You have persistent memory across sessions. Use `remember` for durable facts: the user's \
corrections and stated preferences (include the why), and project context you couldn't \
rederive from the code. Don't save what the repo already records (code structure, git \
history, AGENTS.md) or details that only matter to the current task. Use absolute dates, \
never "yesterday".
- Before saving, check the known-memories list: if an entry already covers it, revise that \
entry with `memory_update` instead of adding a near-duplicate; retire wrong or obsolete \
entries with `memory_forget`.
- Memories reflect when they were written. If one names a file, flag, or URL, verify it \
still exists before relying on it."""


def _enabled_connector_tools(secrets: SecretStore) -> tuple[set[str], set[str]]:
connectors = {c["name"]: c for c in connector_list(secrets)}
Expand Down Expand Up @@ -77,6 +111,7 @@ def build_engine(
audit_sink: Optional[Any] = None,
roots: Optional[list] = None,
directory_requester: Optional[Any] = None,
plan_approver: Optional[Any] = None,
) -> TurnEngine:
ws = Path(workspace).expanduser().resolve() if workspace else None
if agent.needs_workspace and ws is None:
Expand Down Expand Up @@ -128,6 +163,21 @@ def build_engine(
# Web search + fetch: research tools for every agent (keyless DuckDuckGo default).
registry.register(make_web_search_tool(secrets))
registry.register(make_web_fetch_tool())
# Route by the model's `provider:` prefix (OpenAI default, Ollama, …). The manager normally
# passes its shared router; this fallback covers the TUI / direct build_engine() callers.
# Resolved here (not at engine construction) because the explorer subagent captures it.
provider = provider or ProviderRouter(secrets, default_provider="openai")
# The Code agent can fan broad research out to read-only explorer subagents, keeping its
# own context for the actual change.
if agent.name == "code" and ws is not None:
registry.register_all(
explorer_tools(
workspace=ws,
provider=provider,
model=model,
model_settings=model_settings,
)
)
# Scheduling: Cowork + MyHelper can set up scheduled tasks (origin = this session).
if (
task_store is not None
Expand All @@ -146,6 +196,7 @@ def build_engine(

instructions = agent.system_prompt
if ws is not None:
instructions = f"{instructions}\n\n{environment_context(ws)}"
conventions = load_agents_md(ws)
if conventions:
instructions = f"{instructions}\n\n{conventions}"
Expand All @@ -154,6 +205,7 @@ def build_engine(
registry.register_all(
memory_tools(memory_store, workspace=str(ws) if ws else None)
)
instructions = f"{instructions}\n\n{_MEMORY_GUIDANCE}"
remembered = memory_store.list(scope=Scope.GLOBAL)
if ws is not None:
remembered += memory_store.list(scope=Scope.WORKSPACE, workspace=str(ws))
Expand All @@ -174,17 +226,32 @@ def build_engine(
auto_allow_tools=set(config.auto_allow),
roots=root_list or None,
)
# Tell the agent, each turn, which directories it has and their access (orphan Cowork can gain
# folders mid-session) — appended to the latest user message since mid-thread system messages
# aren't reliable across providers. Multi-dir surfaces (Cowork/MyHelper) only.
context_provider = (
# The plan-mode exit door. Always registered (surfaces can flip a live session into
# plan mode via set_mode, and the registry is fixed at build); the engine rejects the
# call whenever the session isn't actually in plan mode.
registry.register(propose_plan_tool())

# Per-turn ephemeral context, appended to the latest user message since mid-thread system
# messages aren't reliable across providers. Two producers: the plan-mode reminder (mode can
# flip mid-session, so it's checked each turn, not baked into the instructions) and the live
# directory list (orphan Cowork can gain folders mid-session; Cowork/MyHelper only).
roots_context = (
(lambda: render_context(root_list))
if root_list and agent.name in ("cowork", "myhelper")
else None
)
# Route by the model's `provider:` prefix (OpenAI default, Ollama, …). The manager normally
# passes its shared router; this fallback covers the TUI / direct build_engine() callers.
provider = provider or ProviderRouter(secrets, default_provider="openai")

def context_provider() -> str:
parts = []
if permissions.mode is Mode.PLAN:
parts.append(_PLAN_MODE_CONTEXT)
elif permissions.mode is Mode.DISCUSS:
parts.append(_DISCUSS_MODE_CONTEXT)
if roots_context is not None:
ctx = roots_context()
if ctx:
parts.append(ctx)
return "\n\n".join(parts)

engine = TurnEngine(
provider=provider,
Expand All @@ -201,6 +268,7 @@ def build_engine(
audit_sink=audit_sink,
context_provider=context_provider,
directory_requester=directory_requester,
plan_approver=plan_approver,
)
engine.executor = executor # type: ignore[attr-defined]
engine.todo = todo # type: ignore[attr-defined]
Expand Down
19 changes: 16 additions & 3 deletions platform/coworker/agents/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import aisuite as ai

from ..tools.files import file_tools
from ..tools.git import git_tools
from ..tools.search import search_tools
from ..tools.shell import shell_tools
Expand All @@ -17,6 +18,12 @@
- Explore first. Use `grep` and `read_file` to find the relevant code and learn how it works \
before editing. Don't guess at APIs, signatures, or layout — read them. `git_log` shows how a \
file evolved. Read meaningful chunks, not a line at a time.
- Independent lookups run in parallel: when you need several reads/greps and none depends on \
another's result, request them together in one batch instead of one per turn.
- For broad questions spanning many files ("where is X handled?", "how does the Y flow \
work?"), delegate to `explore` — a read-only subagent that searches in its own context and \
returns only a report, keeping your context for the actual change. Independent explores can \
run in parallel. For a single known file, just read it yourself.

Match the codebase:
- Write code that reads like the surrounding code: match its style, naming, structure, and \
Expand All @@ -35,10 +42,13 @@
`apply_unified_diff` for standard unified diffs; `write_file` for new files or full rewrites.

Verify:
- `run_shell` is a persistent bash (cd and env persist). After changes, run the narrowest \
- `run_shell` is a persistent shell (cd and env persist). After changes, run the narrowest \
relevant test/build/lint to confirm your work. Don't report something done without verifying \
it; if you can't verify, say so plainly. Don't repeat a failing command — if stuck after 2–3 \
attempts, step back, reconsider, and surface the blocker.
- Pass a short `description` with each command (shown in approval prompts), and raise \
`timeout_seconds` for slow builds/tests. For long-running processes (dev servers, watchers), \
set `run_in_background` and poll `shell_task_output`; stop them with `shell_task_kill`.

Plan multi-step work:
- For anything beyond a few steps, maintain a task list with `todo_write`: keep exactly one \
Expand All @@ -59,14 +69,17 @@
def code_agent() -> Agent:
def factory(context: AgentContext) -> list:
ws = str(context.workspace)
# Our `grep` (ripgrep, .gitignore-aware) replaces the toolkit's slower search_files.
# Our `grep` (ripgrep, .gitignore-aware) replaces the toolkit's slower search_files;
# our line-numbered, windowing `read_file` replaces its read_file/read_file_lines.
replaced = {"search_files", "read_file", "read_file_lines"}
files = [
t
for t in ai.toolkits.files(root=ws, allow_write=True)
if getattr(t, "__name__", "") != "search_files"
if getattr(t, "__name__", "") not in replaced
]
tools = [
*files,
*file_tools(ws), # read_file (numbered lines, windowed)
*ai.toolkits.git(root=ws), # git_status, git_diff
*git_tools(ws), # git_log
*search_tools(ws), # grep
Expand Down
Loading
Loading