feat(workflows): improve nested workflow pause/resume handling

ibacalu · ibacalu · commit cdc973b8eb00 · 2025-12-07T09:29:19.000+02:00
This update addresses a critical issue in the handling of nested workflows, particularly when using the `for_each` sequential mode. The changes ensure that child workflows correctly maintain their execution context, preventing misrouting of responses intended for child workflows back to the parent. This fix is crucial for workflows that include pause points, such as prompts for user approvals. Additionally, new tests have been introduced to verify the correct behavior of nested workflows in these scenarios.
diff --git a/src/workflows_mcp/engine/executors_workflow.py b/src/workflows_mcp/engine/executors_workflow.py
@@ -326,24 +326,36 @@ async def resume(
                 "ExecutionContext not found - workflow composition not supported in this context"
             )
 
-        # 4. Inject ExecutionContext into deserialized child state
+        # 4. Get child workflow name (from checkpoint or inputs)
+        workflow_name = pause_metadata.get("child_workflow") or inputs.workflow
+
+        # 5. Create child context (SAME AS execute() method)
+        # This is critical for proper variable resolution and context isolation.
+        # Without this, the child workflow would use parent's execution context,
+        # causing variable resolution to fail or return incorrect values.
+        child_context = exec_context.create_child_context(
+            parent_execution=context,
+            workflow_name=workflow_name,
+        )
+
+        # 6. Inject child ExecutionContext into deserialized child state
         # Required because _execution_context is a PrivateAttr that isn't serialized.
         # After deserialization, child_execution_state.context._execution_context is None.
         # This must be set before resume_from_state() to ensure proper context propagation.
-        child_execution_state.context.set_execution_context(exec_context)
+        child_execution_state.context.set_execution_context(child_context)
 
-        # 5. Create WorkflowRunner and resume child workflow
+        # 7. Create WorkflowRunner and resume child workflow
         from .workflow_runner import WorkflowRunner
 
         # No checkpointing for nested workflows - parent handles all checkpointing
         runner = WorkflowRunner()
 
         try:
-            # Resume child workflow from ExecutionState
+            # Resume child workflow from ExecutionState with CHILD context
             child_execution_result = await runner.resume_from_state(
                 execution_state=child_execution_state,
                 response=response,
-                context=exec_context,
+                context=child_context,
             )
         except Exception as e:
             raise ValueError(f"Failed to resume child workflow: {e}") from e
diff --git a/src/workflows_mcp/engine/orchestrator.py b/src/workflows_mcp/engine/orchestrator.py
@@ -424,9 +424,16 @@ async def resume_for_each(
                 metadata = Metadata(**serialized["metadata"])
 
                 # Reconstruct output from serialized data
-                output = None
+                # Use Any type because output can be Execution (Workflow) or BlockOutput
+                output: Any = None
                 if serialized["output"] is not None:
-                    output = executor.output_type(**serialized["output"])
+                    # Special case: WorkflowExecutor returns Execution, not BlockOutput
+                    # Check by executor type name (Workflow blocks have output_type=type(None))
+                    if executor_type == "Workflow":
+                        # Workflow block - output is serialized Execution object
+                        output = Execution.model_validate(serialized["output"])
+                    else:
+                        output = executor.output_type(**serialized["output"])
 
                 iteration_results[key] = BlockExecution(
                     inputs=serialized["inputs"],
diff --git a/src/workflows_mcp/templates/agents/workflow-creator/layer2/decompose.yaml b/src/workflows_mcp/templates/agents/workflow-creator/layer2/decompose.yaml
@@ -52,26 +52,13 @@ blocks:
         fi
         echo "Depth {{inputs.current_depth}}/{{inputs.max_depth}} - OK to proceed"
 
-  - id: generate_sub_workflows
-    description: "Generate each sub-workflow by calling workflow-creator-generate-workflow"
-    type: Workflow
-    for_each: "{{inputs.sub_workflows}}"
-    for_each_mode: sequential
-    depends_on: [check_depth]
-    condition: "{{inputs.current_depth < inputs.max_depth}}"
-    inputs:
-      workflow: workflow-creator-generate-workflow
-      inputs:
-        user_requests: "{{inputs.workspace}}/{{each.value.name}}-requirements.json"
-        executor_docs_path: "{{inputs.executor_docs_path}}"
-        examples: "{{inputs.examples}}"
-        workspace: "{{inputs.workspace}}/{{each.value.name}}"
-
   - id: prepare_sub_requirements
     description: "Create requirements files for each sub-workflow"
     type: CreateFile
     for_each: "{{inputs.sub_workflows}}"
     for_each_mode: parallel
+    depends_on: [check_depth]
+    condition: "{{inputs.current_depth < inputs.max_depth}}"
     inputs:
       create_parents: true
       overwrite: true
@@ -84,9 +71,33 @@ blocks:
           "outputs": {{each.value.outputs | default([]) | tojson}},
           "steps": ["Generated as sub-workflow of parent requirements"],
           "is_sub_workflow": true,
+          "is_recursive": {{each.value.is_recursive | default(false) | tojson}},
           "parent_context": "{{inputs.parent_requirements.name | default('parent')}}"
         }
 
+  - id: generate_sub_workflows
+    description: "Recursively call workflow-creator for each sub-workflow (includes design, validation, and save)"
+    type: Workflow
+    for_each: "{{inputs.sub_workflows}}"
+    for_each_mode: sequential
+    depends_on: [prepare_sub_requirements]
+    condition: "{{inputs.current_depth < inputs.max_depth}}"
+    inputs:
+      workflow: workflow-creator
+      inputs:
+        # Pass the sub-workflow requirements as prefilled (skips interactive prompts)
+        prefilled_requirements:
+          name: "{{each.value.name}}"
+          description: "{{each.value.purpose}}"
+          inputs: "{{each.value.inputs | default([])}}"
+          outputs: "{{each.value.outputs | default([])}}"
+          steps: ["Generated as sub-workflow"]
+          tags: ["sub-workflow", "{{inputs.parent_requirements.name | default('parent')}}"]
+        workspace: "{{inputs.workspace}}/{{each.value.name}}"
+        recursion_depth: "{{inputs.current_depth + 1}}"
+        max_recursion_depth: "{{inputs.max_depth}}"
+        max_validation_attempts: 5
+
   - id: collect_results
     description: "Summarize generated sub-workflows"
     type: Shell
@@ -101,6 +112,8 @@ blocks:
         echo "  Depth: {{inputs.current_depth}}/{{inputs.max_depth}}"
         echo "  Sub-workflows requested: {{inputs.sub_workflows | length}}"
         echo "  Workspace: {{inputs.workspace}}"
+        echo ""
+        echo "Generated sub-workflows saved to ~/.workflows/"
 
 outputs:
   sub_workflow_count:
diff --git a/tests/test_nested_workflow_pause_resume.py b/tests/test_nested_workflow_pause_resume.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+"""Test nested Workflow blocks in for_each with pause/resume.
+
+This test reproduces a bug where:
+1. Parent workflow has for_each: sequential with Workflow blocks
+2. Child workflow pauses (Prompt block)
+3. User provides feedback/response
+4. Response is incorrectly routed to parent instead of child
+
+The bug causes:
+- Child workflow never completes its approval flow
+- Remaining for_each iterations are skipped
+- Sub-workflows are never saved
+
+Test Scenario:
+1. Parent workflow with for_each: sequential calling child workflows
+2. Child workflows have Prompt blocks that pause for approval
+3. Resume first child with "yes"
+4. Verify first child completes with approval
+5. Second child should pause for its approval
+6. Resume second child with "yes"
+7. Verify both children completed successfully
+"""
+
+import json
+from typing import Any
+
+import pytest
+from mcp.types import TextContent
+from test_mcp_client import get_mcp_client
+
+
+class TestNestedWorkflowPauseResume:
+    """Test pause/resume for nested Workflow blocks in for_each loops."""
+
+    @pytest.mark.asyncio
+    async def test_nested_workflow_in_foreach_sequential_pause_resume(self) -> None:
+        """Test that nested workflow pause/resume works correctly in for_each sequential.
+
+        This is the core bug reproduction test:
+        1. Start parent workflow with 2 items
+        2. First child pauses for approval
+        3. Resume with "yes" -> first child should complete
+        4. Second child should pause for approval (not parent!)
+        5. Resume with "yes" -> second child should complete
+        6. Parent workflow should complete with all items processed
+        """
+        async with get_mcp_client() as client:
+            # Step 1: Start parent workflow
+            exec_result = await client.call_tool(
+                "execute_workflow",
+                arguments={
+                    "workflow": "nested-workflow-in-foreach-parent",
+                    "inputs": {
+                        "work_items": [
+                            {"name": "item1", "value": "value1"},
+                            {"name": "item2", "value": "value2"},
+                        ]
+                    },
+                    "debug": True,
+                },
+            )
+
+            exec_content = exec_result.content[0]
+            assert isinstance(exec_content, TextContent)
+            exec_response: dict[str, Any] = json.loads(exec_content.text)
+
+            # Verify workflow paused at first child
+            assert exec_response["status"] == "paused", (
+                f"Expected workflow to pause, got status: {exec_response.get('status')}"
+            )
+            assert "job_id" in exec_response, "Expected job_id in paused response"
+            job_id = exec_response["job_id"]
+
+            # The prompt should be from the CHILD workflow (item1)
+            prompt = exec_response.get("prompt", "")
+            assert "item1" in prompt, (
+                f"Expected prompt to mention 'item1' (first child), got: {prompt}"
+            )
+
+            # Step 2: Resume first child with "yes"
+            resume1_result = await client.call_tool(
+                "resume_workflow",
+                arguments={
+                    "job_id": job_id,
+                    "response": "yes",
+                    "debug": True,
+                },
+            )
+
+            resume1_content = resume1_result.content[0]
+            assert isinstance(resume1_content, TextContent)
+            resume1_response: dict[str, Any] = json.loads(resume1_content.text)
+
+            # Step 3: Should pause again for SECOND child (item2)
+            # THIS IS WHERE THE BUG MANIFESTS:
+            # - Bug behavior: status == "success" (parent completes, skipping item2)
+            # - Correct behavior: status == "paused" with prompt for item2
+            assert resume1_response["status"] == "paused", (
+                f"Expected workflow to pause for item2, got: {resume1_response.get('status')}. "
+                f"Resume response was incorrectly routed to parent workflow. "
+                f"Full response: {resume1_response}"
+            )
+
+            # Verify prompt is for second child (item2)
+            prompt2 = resume1_response.get("prompt", "")
+            assert "item2" in prompt2, (
+                f"Expected prompt to mention 'item2' (second child), got: {prompt2}. "
+                f"This indicates for_each iteration was not correctly resumed."
+            )
+
+            job_id_2 = resume1_response.get("job_id", job_id)
+
+            # Step 4: Resume second child with "yes"
+            resume2_result = await client.call_tool(
+                "resume_workflow",
+                arguments={
+                    "job_id": job_id_2,
+                    "response": "yes",
+                    "debug": True,
+                },
+            )
+
+            resume2_content = resume2_result.content[0]
+            assert isinstance(resume2_content, TextContent)
+            resume2_response: dict[str, Any] = json.loads(resume2_content.text)
+
+            # Step 5: Now workflow should complete successfully
+            assert resume2_response["status"] == "success", (
+                f"Expected workflow to complete after both children approved, "
+                f"got status: {resume2_response.get('status')}. "
+                f"Error: {resume2_response.get('error')}"
+            )
+
+            # Verify outputs
+            outputs = resume2_response.get("outputs", {})
+            assert outputs.get("setup_completed") is True
+            assert outputs.get("all_items_processed") is True
+            assert outputs.get("finalize_completed") is True
+
+    @pytest.mark.asyncio
+    async def test_nested_workflow_single_item_pause_resume(self) -> None:
+        """Test simpler case: single item for_each with nested workflow pause.
+
+        This isolates the core pause/resume logic without multiple iterations.
+        """
+        async with get_mcp_client() as client:
+            # Start with single item
+            exec_result = await client.call_tool(
+                "execute_workflow",
+                arguments={
+                    "workflow": "nested-workflow-in-foreach-parent",
+                    "inputs": {
+                        "work_items": [
+                            {"name": "single_item", "value": "single_value"},
+                        ]
+                    },
+                    "debug": True,
+                },
+            )
+
+            exec_content = exec_result.content[0]
+            assert isinstance(exec_content, TextContent)
+            exec_response: dict[str, Any] = json.loads(exec_content.text)
+
+            # Should pause for the single child
+            assert exec_response["status"] == "paused"
+            job_id = exec_response["job_id"]
+
+            prompt = exec_response.get("prompt", "")
+            assert "single_item" in prompt
+
+            # Resume with approval
+            resume_result = await client.call_tool(
+                "resume_workflow",
+                arguments={
+                    "job_id": job_id,
+                    "response": "yes",
+                    "debug": True,
+                },
+            )
+
+            resume_content = resume_result.content[0]
+            assert isinstance(resume_content, TextContent)
+            resume_response: dict[str, Any] = json.loads(resume_content.text)
+
+            # Should complete (only one item)
+            assert resume_response["status"] == "success", (
+                f"Expected success after single item approval, got: {resume_response}"
+            )
+
+            outputs = resume_response.get("outputs", {})
+            assert outputs.get("all_items_processed") is True
+
+    @pytest.mark.asyncio
+    async def test_nested_workflow_feedback_iteration(self) -> None:
+        """Test that feedback (non-approval response) triggers re-design iteration.
+
+        This simulates the workflow-creator flow where:
+        1. First response is feedback ("add more details")
+        2. Second response is approval ("approve")
+        """
+        async with get_mcp_client() as client:
+            # Start workflow
+            exec_result = await client.call_tool(
+                "execute_workflow",
+                arguments={
+                    "workflow": "nested-workflow-in-foreach-parent",
+                    "inputs": {
+                        "work_items": [
+                            {"name": "test_item", "value": "test_value"},
+                        ]
+                    },
+                    "debug": True,
+                },
+            )
+
+            exec_content = exec_result.content[0]
+            assert isinstance(exec_content, TextContent)
+            exec_response: dict[str, Any] = json.loads(exec_content.text)
+
+            assert exec_response["status"] == "paused"
+            job_id = exec_response["job_id"]
+
+            # Send denial (should complete with denied branch)
+            resume_result = await client.call_tool(
+                "resume_workflow",
+                arguments={
+                    "job_id": job_id,
+                    "response": "no",
+                    "debug": True,
+                },
+            )
+
+            resume_content = resume_result.content[0]
+            assert isinstance(resume_content, TextContent)
+            resume_response: dict[str, Any] = json.loads(resume_content.text)
+
+            # Should complete (denial is a valid response that completes the workflow)
+            assert resume_response["status"] == "success", (
+                f"Expected success after denial, got: {resume_response}"
+            )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/workflows/integration/nested-workflow-in-foreach-child.yaml b/tests/workflows/integration/nested-workflow-in-foreach-child.yaml
diff --git a/tests/workflows/integration/nested-workflow-in-foreach-parent.yaml b/tests/workflows/integration/nested-workflow-in-foreach-parent.yaml