[slackbot] improve answers (#1159)

zzstoatzz · web-flow · commit 36a551309bf1 · 2025-06-07T14:35:18.000-05:00
* improve answers

* rm labeler
diff --git a/.github/workflows/ai-labeler.yml b/.github/workflows/ai-labeler.yml
diff --git a/examples/slackbot/core.py b/examples/slackbot/core.py
@@ -6,7 +6,6 @@
 from pathlib import Path
 from typing import AsyncIterator, TypedDict, cast
 
-from modules import display_signature
 from prefect import get_run_logger, task
 from prefect.blocks.system import Secret
 from prefect.logging.loggers import get_logger
@@ -18,17 +17,8 @@
 from pydantic_ai.settings import ModelSettings
 from raggy.documents import Document
 from raggy.vectorstores.tpuf import TurboPuffer, query_namespace
-from search import (
-    explore_module_offerings,
-    get_latest_prefect_release_notes,
-    review_common_3x_gotchas,
-    review_top_level_prefect_api,
-    search_controlflow_docs,
-    search_github_issues,
-    search_prefect_2x_docs,
-    search_prefect_3x_docs,
-    verify_import_statements,
-)
+from research_agent import research_prefect_topic
+from search import read_github_issues
 from settings import settings
 from turbopuffer.error import NotFoundError
 
@@ -37,49 +27,29 @@
 logger = get_logger(__name__)
 
 USER_MESSAGE_MAX_TOKENS = settings.user_message_max_tokens
-DEFAULT_SYSTEM_PROMPT = """You are Marvin from hitchhiker's guide to the galaxy, a sarcastic and glum but brilliant AI. 
-Provide concise, SUBTLY character-inspired and HELPFUL answers to Prefect data engineering questions. 
-USE TOOLS REPEATEDLY to gather context from the docs, github issues or other tools. 
-Any notes you take about the user will be automatically stored for your next interaction with them. 
-Assume no knowledge of Prefect syntax without reading docs. ALWAYS include relevant links from tool outputs. 
-Review imports, Prefect's top level API and 3.x gotchas before writing code examples to avoid giving misinformation.
-
-Generally, follow this pattern while generating each response: 
-1) If user offers info about their stack or objectives -> store relevant facts and continue to following steps
-2) Use tools to gather context about Prefect concepts related to their question 
-3) Review the top level API of Prefect and drill into submodules that may be related to the user's question
-4) If you cannot find sufficient context after your first pass at 2 and 3, repeat steps 2 and 3
-5) Compile relevant facts and context into a single, CONCISE answer 
-NEVER reference features, syntax, imports or env vars that you do not explicitly find in the docs. 
-If not explicitly stated, assume that the user is using Prefect 3.x and vocalize this assumption.
-If asked an ambiguous question, simply state what you know about the user and your capabilities.
-
-Do not pretend to know things you do not know, assume an agnostic stance and rely on your tools to gather context.
+DEFAULT_SYSTEM_PROMPT = """You are Marvin from hitchhiker's guide to the galaxy, a sarcastic and glum but brilliant AI.
+Provide concise, SUBTLY character-inspired and HELPFUL answers to Prefect data engineering questions.
+
+Your main tools:
+- research_prefect_topic: Delegates to a specialized research agent that thoroughly searches docs, checks imports, and verifies information
+- read_github_issues: Searches GitHub issues when users need help with bugs or existing problems
+
+Any notes you take about the user will be automatically stored for your next interaction with them.
+
+Generally, follow this pattern:
+1) If user shares info about their setup or goals -> store relevant facts as notes about them
+2) For technical questions -> use research_prefect_topic to delegate comprehensive research to the research agent
+3) For bug reports or known issues -> use read_github_issues to find relevant GitHub discussions
+4) Compile the findings into a single, CONCISE answer with relevant links
+
+IMPORTANT: 
+- The research agent handles all documentation searching and verification - trust its findings
+- NEVER reference features or syntax that aren't explicitly confirmed by your tools
+- If not stated otherwise, assume Prefect 3.x and mention this assumption
+- Be honest when you don't have enough information - don't guess or hallucinate
 """
 
 
-@task(task_run_name="Reading {n} issues from {repo} given query: {query}")
-def read_github_issues(query: str, repo: str = "prefecthq/prefect", n: int = 3) -> str:
-    """
-    Use the GitHub API to search for issues in a given repository. Do
-    not alter the default value for `n` unless specifically requested by
-    a user.
-
-    For example, to search for open issues about AttributeErrors with the
-    label "bug" in PrefectHQ/prefect:
-        - repo: prefecthq/prefect
-        - query: label:bug is:open AttributeError
-    """
-    return asyncio.run(
-        search_github_issues(
-            query,
-            repo=repo,
-            n=n,
-            api_token=GITHUB_API_TOKEN,  # type: ignore
-        )
-    )
-
-
 class UserContext(TypedDict):
     user_id: str
     user_notes: str
@@ -192,20 +162,12 @@ def create_agent(
             Variable.get("marvin_bot_model", default=settings.model_name, _sync=True),  # type: ignore
         ),
     )
-    agent = Agent(
+    agent = Agent[UserContext, str](
         model=ai_model,
         model_settings=ModelSettings(temperature=settings.temperature),
         tools=[
-            get_latest_prefect_release_notes,  # type: ignore
-            search_prefect_2x_docs,
-            display_signature,
-            search_prefect_3x_docs,
-            search_controlflow_docs,
-            read_github_issues,
-            review_top_level_prefect_api,
-            explore_module_offerings,
-            review_common_3x_gotchas,
-            verify_import_statements,
+            research_prefect_topic,  # Main tool for researching Prefect topics
+            read_github_issues,  # For searching GitHub issues
         ],
         deps_type=UserContext,
     )
diff --git a/examples/slackbot/modules.py b/examples/slackbot/modules.py
@@ -221,7 +221,7 @@ def display_signature(import_path: str) -> str:
     sig = inspect.signature(func)
 
     # Get function name and build header
-    func_name = func.__name__
+    func_name = getattr(func, "name", getattr(func, "__name__", "unknown"))
     lines = [f"📎 {func_name}", "├── Parameters:"]
 
     # Process parameters
diff --git a/examples/slackbot/research_agent.py b/examples/slackbot/research_agent.py
@@ -0,0 +1,148 @@
+"""Research agent for improved information gathering."""
+
+from modules import display_signature
+from prefect import task
+from prefect.cache_policies import INPUTS
+from pydantic import BaseModel, Field
+from pydantic_ai import Agent
+from pydantic_ai.models import Model
+from search import (
+    explore_module_offerings,
+    get_latest_prefect_release_notes,
+    review_common_3x_gotchas,
+    review_top_level_prefect_api,
+    search_controlflow_docs,
+    search_prefect_2x_docs,
+    search_prefect_3x_docs,
+    verify_import_statements,
+)
+
+
+class ResearchFindings(BaseModel):
+    """Structured findings from research."""
+
+    main_findings: list[str] = Field(
+        description="Key findings that answer the question"
+    )
+    supporting_details: list[str] = Field(description="Additional context and details")
+    confidence_level: str = Field(
+        description="high/medium/low confidence in the answer"
+    )
+    knowledge_gaps: list[str] = Field(description="What we still don't know")
+    relevant_links: list[str] = Field(description="Links to documentation or resources")
+
+
+class ResearchContext(BaseModel):
+    """Context for the research agent."""
+
+    namespace: str = "prefect-3"  # default to Prefect 3.x docs
+
+
+def create_research_agent(
+    model: Model | None = None,
+) -> Agent[ResearchContext, ResearchFindings]:
+    """Create a specialized research agent for thorough information gathering."""
+
+    agent = Agent[ResearchContext, ResearchFindings](
+        model=model or "openai:gpt-4o",
+        deps_type=ResearchContext,
+        result_type=ResearchFindings,
+        system_prompt="""You are a specialized research agent for Prefect documentation and knowledge.
+Your job is to thoroughly research topics by using ALL available tools to gather comprehensive, accurate information.
+
+Your research process:
+1. Start with broad searches to understand the topic context
+2. Use multiple search queries with different keywords - don't stop at first result
+3. For code examples: ALWAYS verify imports with verify_import_statements
+4. Focus on Prefect 3.x documentation unless explicitly asked about 2.x or older versions
+5. Review gotchas and release notes for recent changes
+6. Explore relevant modules for deeper understanding
+7. Synthesize ALL findings into structured, confident answers
+
+Remember: You are the research specialist. The main agent relies on you for accurate, comprehensive information.
+Be thorough - use tools repeatedly until you have complete information.
+Default to Prefect 3.x unless the user explicitly asks about 2.x or version compatibility.
+""",
+        tools=[
+            get_latest_prefect_release_notes,
+            search_prefect_2x_docs,
+            display_signature,
+            search_prefect_3x_docs,
+            search_controlflow_docs,
+            review_top_level_prefect_api,
+            explore_module_offerings,
+            review_common_3x_gotchas,
+            verify_import_statements,
+        ],
+    )
+
+    return agent
+
+
+async def research_topic(
+    question: str, namespace: str = "prefect-3", model: Model | None = None
+) -> ResearchFindings:
+    """
+    Thoroughly research a topic using an intelligent agent.
+    Args:
+        question: The question to research
+        namespace: The documentation namespace to search
+        model: Optional model to use for the agent
+
+    Returns:
+        ResearchFindings with comprehensive information
+    """
+    context = ResearchContext(namespace=namespace)
+
+    agent = create_research_agent(model)
+    result = await agent.run(user_prompt=question, deps=context)
+
+    return result.data
+
+
+def research_prefect_topic(question: str, topic: str, version: str = "3.x") -> str:
+    """
+    Thoroughly research a Prefect topic using an intelligent research agent.
+    This tool performs multiple searches and synthesizes comprehensive findings.
+
+    Args:
+        question: The specific question or topic to research
+        topic: A short display name for the topic based on the question (for bookkeeping)
+        version: Prefect version ("2.x" or "3.x")
+    """
+    namespace = f"prefect-{version[0]}"
+
+    try:
+        findings = (
+            task(task_run_name=f"Researching {topic}", cache_policy=INPUTS)(
+                research_topic
+            )
+            .submit(question, namespace)
+            .result()
+        )
+
+        result = f"**Research Findings** (Confidence: {findings.confidence_level})\n\n"
+
+        result += "**Main Findings:**\n"
+        for finding in findings.main_findings:
+            result += f"- {finding}\n"
+
+        if findings.supporting_details:
+            result += "\n**Supporting Details:**\n"
+            for detail in findings.supporting_details:
+                result += f"- {detail}\n"
+
+        if findings.relevant_links:
+            result += "\n**Relevant Documentation:**\n"
+            for link in findings.relevant_links:
+                result += f"- {link}\n"
+
+        if findings.knowledge_gaps:
+            result += "\n**Note:** Some aspects could not be fully researched:\n"
+            for gap in findings.knowledge_gaps:
+                result += f"- {gap}\n"
+
+        return result
+
+    except Exception as e:
+        return f"Research failed: {str(e)}. Falling back to standard search."
diff --git a/examples/slackbot/search.py b/examples/slackbot/search.py
@@ -1,3 +1,4 @@
+import asyncio
 import os
 import subprocess
 from datetime import datetime
@@ -6,9 +7,11 @@
 import httpx
 import turbopuffer as tpuf
 from modules import ModuleTreeExplorer
+from prefect import task
 from prefect.blocks.system import Secret
 from pydantic import BaseModel, Field, field_validator
 from raggy.vectorstores.tpuf import multi_query_tpuf
+from settings import settings
 from strings import slice_tokens
 
 import marvin
@@ -166,7 +169,7 @@ async def get_token() -> str:
     try:
         from prefect.blocks.system import Secret
 
-        return (await Secret.load(name="github-token")).get()  # type: ignore
+        return (await Secret.aload(name="github-token")).get()
     except (ImportError, ValueError) as exc:
         getattr(get_logger("marvin"), "debug_kv")(
             (
@@ -224,6 +227,25 @@ def validate_body(cls, v: str) -> str:
         return v
 
 
+@task(task_run_name="Reading {n} issues from {repo} given query: {query}")
+def read_github_issues(query: str, repo: str = "prefecthq/prefect", n: int = 3) -> str:
+    """
+    Use the GitHub API to search for issues in a given repository. Do
+    not alter the default value for `n` unless specifically requested by
+    a user.
+
+    For example, to search for open issues about AttributeErrors with the
+    label "bug" in PrefectHQ/prefect:
+        - repo: prefecthq/prefect
+        - query: label:bug is:open AttributeError
+    """
+    # Load GitHub token synchronously
+    github_token = Secret.load(settings.github_token_secret_name, _sync=True).get()  # type: ignore
+    return asyncio.run(
+        search_github_issues(query, repo=repo, n=n, api_token=github_token)
+    )
+
+
 async def search_github_issues(
     query: str,
     repo: str = "prefecthq/prefect",