Merge pull request #22608 from dannon/agent-typed-orchestration-state

mvdbeek · web-flow · commit c180663872cd · 2026-05-01T19:38:16.000+02:00
Pass orchestration state between agents as a typed object
diff --git a/lib/galaxy/agents/base.py b/lib/galaxy/agents/base.py
@@ -13,7 +13,10 @@
     Callable,
     Sequence,
 )
-from dataclasses import dataclass
+from dataclasses import (
+    dataclass,
+    field,
+)
 from typing import (
     Any,
     Literal,
@@ -96,6 +99,7 @@
     "ActionSuggestion",
     "ActionType",
     "AgentResponse",
+    "AgentRunState",
     "AgentType",
     "BaseGalaxyAgent",
     "ConfidenceLevel",
@@ -258,6 +262,24 @@ def __init__(
         self.reasoning = reasoning
 
 
+@dataclass
+class AgentRunState:
+    """Per-invocation state shared across sequential multi-agent flows.
+
+    The orchestrator creates a fresh instance per user query and attaches it
+    to each agent's context. Sequential agents read prior agents' responses
+    from here instead of parsing them out of a text-concatenated prompt.
+    """
+
+    prior_responses: dict[str, "AgentResponse"] = field(default_factory=dict)
+
+    def get_prior(self, agent_type: str) -> Optional["AgentResponse"]:
+        return self.prior_responses.get(agent_type)
+
+    def record(self, agent_type: str, response: "AgentResponse") -> None:
+        self.prior_responses[agent_type] = response
+
+
 @dataclass
 class GalaxyAgentDependencies:
     """Dependencies passed to Galaxy agents via dependency injection."""
@@ -280,6 +302,7 @@ class BaseGalaxyAgent(ABC):
 
     agent_type: str
     agent: Agent[GalaxyAgentDependencies, Any]
+    _INTERNAL_CONTEXT_KEYS = frozenset({"run_state"})
 
     def __init__(self, deps: GalaxyAgentDependencies):
         self.deps = deps
@@ -446,7 +469,9 @@ def _prepare_prompt(self, query: str, context: dict[str, Any]) -> str:
         prompt_parts = [query]
 
         if context:
-            context_str = "\n".join([f"{k}: {v}" for k, v in context.items() if v])
+            context_str = "\n".join(
+                [f"{k}: {v}" for k, v in context.items() if v and k not in self._INTERNAL_CONTEXT_KEYS]
+            )
             if context_str:
                 prompt_parts.insert(0, f"Context:\n{context_str}\n")
 
diff --git a/lib/galaxy/agents/error_analysis.py b/lib/galaxy/agents/error_analysis.py
@@ -20,6 +20,7 @@
     ActionSuggestion,
     ActionType,
     AgentResponse,
+    AgentRunState,
     AgentType,
     BaseGalaxyAgent,
     ConfidenceLiteral,
@@ -108,10 +109,15 @@ async def process(self, query: str, context: Optional[dict[str, Any]] = None) ->
         try:
             log.info(f"ErrorAnalysis: Received query (length={len(query)})")
             log.info(f"ErrorAnalysis: Query preview: {query[:800]}...")
-            if "Previous analysis" in query:
-                log.info("ErrorAnalysis: Query contains previous analysis context")
 
             enhanced_query = query
+            run_state = context.get("run_state") if context else None
+            if isinstance(run_state, AgentRunState):
+                prior = run_state.get_prior(AgentType.HISTORY)
+                if prior is not None:
+                    log.info("ErrorAnalysis: Found prior history analysis in run_state")
+                    enhanced_query += f"\n\nContext from history analysis:\n{prior.content}"
+
             if context and context.get("job_id"):
                 job_details = await self.get_job_details(context["job_id"])
                 if "error" not in job_details:
diff --git a/lib/galaxy/agents/orchestrator.py b/lib/galaxy/agents/orchestrator.py
@@ -17,6 +17,7 @@
 from galaxy.schema.agents import ConfidenceLevel
 from .base import (
     AgentResponse,
+    AgentRunState,
     AgentType,
     BaseGalaxyAgent,
     extract_result_content,
@@ -190,26 +191,23 @@ async def _execute_sequential(
         self, agents: list[str], query: str, context: Optional[dict[str, Any]] = None
     ) -> dict[str, AgentResponse]:
         """Execute agents sequentially with timeout protection."""
-        responses = {}
-        current_query = query
+        responses: dict[str, AgentResponse] = {}
         timeout = self._get_agent_timeout()
+        run_state = AgentRunState()
+        ctx: dict[str, Any] = {**(context or {}), "run_state": run_state}
 
         log.info(f"Orchestrator: Running agents in SEQUENTIAL mode: {agents}")
         for agent_name in agents:
             try:
-                log.info(f"Orchestrator: Starting agent '{agent_name}' with query length {len(current_query)}")
+                log.info(f"Orchestrator: Starting agent '{agent_name}' with query length {len(query)}")
                 agent = self.deps.get_agent(agent_name, self.deps)
-                response = await asyncio.wait_for(agent.process(current_query, context or {}), timeout=timeout)
+                response = await asyncio.wait_for(agent.process(query, ctx), timeout=timeout)
                 responses[agent_name] = response
+                run_state.record(agent_name, response)
 
                 log.debug(f"Orchestrator: Agent '{agent_name}' completed. Response length: {len(response.content)}")
                 log.debug(f"Orchestrator: Agent '{agent_name}' response preview: {response.content[:500]}...")
 
-                # Cap previous response to avoid unbounded query growth
-                prev_content = response.content[:2000]
-                current_query = f"{query}\n\nPrevious analysis from {agent_name}: {prev_content}"
-                log.debug(f"Orchestrator: Updated query for next agent, total length: {len(current_query)}")
-
             except asyncio.TimeoutError:
                 log.error(f"Agent {agent_name} timed out after {timeout}s")
                 responses[agent_name] = _create_error_response(
diff --git a/test/unit/app/test_agents.py b/test/unit/app/test_agents.py
@@ -45,12 +45,18 @@
     CustomToolAgent,
     ErrorAnalysisAgent,
     GalaxyAgentDependencies,
+    HistoryAgent,
     QueryRouterAgent,
 )
 from galaxy.agents.base import truncate_message_history
 from galaxy.agents.registry import build_default_registry
 
 agent_registry = build_default_registry()
+from galaxy.agents.base import (
+    AgentResponse,
+    AgentRunState,
+    AgentType,
+)
 from galaxy.agents.error_analysis import ErrorAnalysisResult
 from galaxy.agents.orchestrator import (
     AgentPlan,
@@ -642,6 +648,181 @@ async def test_workflow_orchestrator_generic_fallback_behavior(self):
             assert response.agent_type == "orchestrator"
             assert "having trouble" in response.content
 
+    def test_agent_run_state_record_and_get_prior(self):
+        run_state = AgentRunState()
+        assert run_state.get_prior("history") is None
+
+        history_response = AgentResponse(
+            content="Found a failed job in the BRC history.",
+            confidence=ConfidenceLevel.HIGH,
+            agent_type="history",
+        )
+        run_state.record("history", history_response)
+
+        retrieved = run_state.get_prior("history")
+        assert retrieved is history_response
+        assert retrieved.content == "Found a failed job in the BRC history."
+        assert run_state.get_prior("error_analysis") is None
+
+    @pytest.mark.asyncio
+    async def test_orchestrator_sequential_attaches_run_state_to_context(self):
+        agent = WorkflowOrchestratorAgent(self.deps)
+
+        captured_contexts: list[dict[str, Any]] = []
+
+        async def capture_history(query, context):
+            captured_contexts.append(dict(context))
+            return MagicMock(
+                content="History summary content",
+                agent_type="history",
+                confidence=ConfidenceLevel.HIGH,
+            )
+
+        async def capture_error(query, context):
+            captured_contexts.append(dict(context))
+            return MagicMock(
+                content="Error analysis content",
+                agent_type="error_analysis",
+                confidence=ConfidenceLevel.HIGH,
+            )
+
+        mock_history_agent = MagicMock()
+        mock_history_agent.process = AsyncMock(side_effect=capture_history)
+        mock_error_agent = MagicMock()
+        mock_error_agent.process = AsyncMock(side_effect=capture_error)
+
+        def get_agent_side_effect(agent_type, deps):
+            if agent_type == "history":
+                return mock_history_agent
+            if agent_type == "error_analysis":
+                return mock_error_agent
+            raise ValueError(f"Unexpected agent type: {agent_type}")
+
+        self.deps.get_agent = MagicMock(side_effect=get_agent_side_effect)
+
+        with patch.object(agent, "_get_agent_plan") as mock_get_plan:
+            mock_get_plan.return_value = AgentPlan(
+                agents=["history", "error_analysis"],
+                sequential=True,
+                reasoning="Find failed job, then diagnose it",
+            )
+
+            await agent.process("Why did my job fail?")
+
+        assert len(captured_contexts) == 2
+
+        first_run_state = captured_contexts[0].get("run_state")
+        second_run_state = captured_contexts[1].get("run_state")
+        assert isinstance(first_run_state, AgentRunState)
+        assert isinstance(second_run_state, AgentRunState)
+        # Same run_state instance is reused across the sequential flow
+        assert first_run_state is second_run_state
+
+        # First agent saw an empty run_state; second agent saw history recorded
+        history_prior = second_run_state.get_prior("history")
+        assert history_prior is not None
+        assert history_prior.content == "History summary content"
+
+    @pytest.mark.asyncio
+    async def test_orchestrator_sequential_passes_original_query(self):
+        agent = WorkflowOrchestratorAgent(self.deps)
+        original_query = "Why did my job fail?"
+        captured_queries: list[str] = []
+
+        async def capture_query(query, context):
+            captured_queries.append(query)
+            return MagicMock(
+                content="some response",
+                agent_type="history",
+                confidence=ConfidenceLevel.HIGH,
+            )
+
+        mock_history_agent = MagicMock()
+        mock_history_agent.process = AsyncMock(side_effect=capture_query)
+        mock_error_agent = MagicMock()
+        mock_error_agent.process = AsyncMock(side_effect=capture_query)
+
+        def get_agent_side_effect(agent_type, deps):
+            if agent_type == "history":
+                return mock_history_agent
+            if agent_type == "error_analysis":
+                return mock_error_agent
+            raise ValueError(f"Unexpected agent type: {agent_type}")
+
+        self.deps.get_agent = MagicMock(side_effect=get_agent_side_effect)
+
+        with patch.object(agent, "_get_agent_plan") as mock_get_plan:
+            mock_get_plan.return_value = AgentPlan(
+                agents=["history", "error_analysis"],
+                sequential=True,
+                reasoning="Find failed job, then diagnose it",
+            )
+
+            await agent.process(original_query)
+
+        assert len(captured_queries) == 2
+        for q in captured_queries:
+            assert q == original_query
+            assert "Previous analysis from" not in q
+
+    @pytest.mark.asyncio
+    async def test_error_analysis_reads_history_from_run_state(self):
+        self.mock_config.ai_model = "gpt-4o"
+        agent = ErrorAnalysisAgent(self.deps)
+
+        run_state = AgentRunState()
+        history_response = AgentResponse(
+            content="Found failing job 'select_first1' in BRC history; stderr says 'AssertionError'.",
+            confidence=ConfidenceLevel.HIGH,
+            agent_type=AgentType.HISTORY,
+        )
+        run_state.record(AgentType.HISTORY, history_response)
+
+        captured_prompts: list[str] = []
+
+        async def fake_run_with_retry(prompt, *args, **kwargs):
+            captured_prompts.append(prompt)
+            mock_result = mock.Mock()
+            mock_result.output = ErrorAnalysisResult(
+                error_category="tool_failure",
+                error_severity="medium",
+                likely_cause="Bad input",
+                solution_steps=["Re-run"],
+                confidence="high",
+                requires_admin=False,
+            )
+            return mock_result
+
+        with mock.patch.object(agent, "_run_with_retry", side_effect=fake_run_with_retry):
+            await agent.process("Why did my job fail?", context={"run_state": run_state})
+
+        assert len(captured_prompts) == 1
+        prompt = captured_prompts[0]
+        assert "Context from history analysis:" in prompt
+        assert "select_first1" in prompt
+        assert "AssertionError" in prompt
+
+    @pytest.mark.asyncio
+    async def test_internal_run_state_is_not_rendered_in_default_prompt(self):
+        agent = HistoryAgent(self.deps)
+        run_state = AgentRunState()
+        captured_prompts: list[str] = []
+
+        async def fake_run_with_retry(prompt, *args, **kwargs):
+            captured_prompts.append(prompt)
+            mock_result = mock.Mock()
+            mock_result.output = "History summary"
+            return mock_result
+
+        with mock.patch.object(agent, "_run_with_retry", side_effect=fake_run_with_retry):
+            await agent.process("Summarize my history", context={"run_state": run_state, "history_id": "abc123"})
+
+        assert len(captured_prompts) == 1
+        prompt = captured_prompts[0]
+        assert "history_id: abc123" in prompt
+        assert "run_state" not in prompt
+        assert "AgentRunState" not in prompt
+
     def _orchestrator_agent(self):
         agent = WorkflowOrchestratorAgent(self.deps)
         return agent