diff --git a/.github/workflows/pr-quality-check.yml b/.github/workflows/pr-quality-check.yml new file mode 100644 index 000000000..2deda4c0b --- /dev/null +++ b/.github/workflows/pr-quality-check.yml @@ -0,0 +1,33 @@ +name: PR Quality Check +on: + pull_request_target: + types: [opened, reopened] + +jobs: + pr_quality_check: + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - run: pip install litellm PyGithub + - name: Run PR quality check agent + env: + # e.g: "claude-sonnet-4-6", "gpt-4o", etc. + MODEL: ${{ secrets.MODEL }} + # Only API key for the chosen model is required + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # Obtained automatically by GH Actions + AUTHOR_ASSOCIATION: ${{ github.event.pull_request.author_association }} + AUTHOR_USERNAME: ${{ github.event.pull_request.user.login }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_BODY: ${{ github.event.pull_request.body }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_TITLE: ${{ github.event.pull_request.title }} + REPO_NAME: ${{ github.repository }} + run: python scripts/agents/pr_checker_agent.py diff --git a/.github/workflows/security-review.yml b/.github/workflows/security-review.yml new file mode 100644 index 000000000..47dfb548c --- /dev/null +++ b/.github/workflows/security-review.yml @@ -0,0 +1,45 @@ +name: Security Review +on: + pull_request_target: + types: [opened, reopened] + issue_comment: + types: [created] + +jobs: + security-review: + runs-on: ubuntu-latest + # Always runs on PR creation + # Also runs if comment on PR contains "/security-review" + if: > + github.event_name == 'pull_request' || + ( + github.event_name == 'issue_comment' && + github.event.issue.pull_request != null && + contains(github.event.comment.body, '/security-review') + ) + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - run: pip install litellm PyGithub + - name: Run security review agent + env: + IGNORED_EXTENSIONS: .lock,.sum + IGNORED_FILENAMES: package-lock.json,yarn.lock,poetry.lock,Gemfile.lock,Cargo.lock,composer.lock,pnpm-lock.yaml,pip.lock + MAX_PATCH_CHARS_PER_FILE: 3000 + # e.g: "claude-sonnet-4-6", "gpt-4o", etc. + MODEL: ${{ secrets.MODEL }} + # Only API key for the chosen model is required + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # Obtained automatically by GH Actions + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} + REPO_NAME: ${{ github.repository }} + TRIGGER: ${{ github.event_name }} + run: python scripts/agents/security_review_agent.py diff --git a/.github/workflows/triage.yml b/.github/workflows/triage.yml new file mode 100644 index 000000000..6188ba3ca --- /dev/null +++ b/.github/workflows/triage.yml @@ -0,0 +1,33 @@ +name: Issue Triage +on: + issues: + types: [opened, reopened] + +jobs: + triage: + runs-on: ubuntu-latest + permissions: + issues: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - run: pip install litellm PyGithub + - name: Run triage agent + env: + AVAILABLE_LABELS: automation,bug,dependencies,documentation,enhancement,good-first-issue,meeting,needs-info,plugins,protocol,question,security,tech-debt,testing + LATEST_ISSUES_LIMIT: 100 + # e.g: "claude-sonnet-4-6", "gpt-4o", etc. + MODEL: ${{ secrets.MODEL }} + # Only API key for the chosen model is required + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # Obtained automatically by GH Actions + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE_BODY: ${{ github.event.issue.body }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + ISSUE_TITLE: ${{ github.event.issue.title }} + REPO_NAME: ${{ github.repository }} + run: python scripts/agents/triage_agent.py diff --git a/scripts/agents/helpers.py b/scripts/agents/helpers.py new file mode 100644 index 000000000..0cda26e17 --- /dev/null +++ b/scripts/agents/helpers.py @@ -0,0 +1,37 @@ +import os +import json +import litellm + +def validate_api_keys(): + valid_api_keys = ["ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GEMINI_API_KEY"] + if not any(os.environ.get(k) for k in valid_api_keys): + raise ValueError("No API key is set") + + +def validate_env_vars(env_vars: list[str]): + for env_var in env_vars: + if not os.environ.get(env_var): + raise ValueError(f"{env_var} is not set") + + +def run_agent(messages: list, tools: list, handle_tool_call, model: str): + while True: + response = litellm.completion( + model=model, messages=messages, tools=tools, temperature=0 + ) + message = response.choices[0].message + if message.content: + print(f"[agent] {message.content}") + messages.append(message.model_dump(exclude_none=True)) + if response.choices[0].finish_reason == "stop" or not message.tool_calls: + break + tool_results = [] + for tool_call in message.tool_calls: + inputs = json.loads(tool_call.function.arguments) + result = handle_tool_call(tool_call.function.name, inputs) + tool_results.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "content": result, + }) + messages.extend(tool_results) diff --git a/scripts/agents/pr_checker_agent.py b/scripts/agents/pr_checker_agent.py new file mode 100644 index 000000000..ac2742e75 --- /dev/null +++ b/scripts/agents/pr_checker_agent.py @@ -0,0 +1,126 @@ +import os +from github import Github, Auth +from helpers import validate_env_vars, validate_api_keys, run_agent + +# Setup + +gh = Github(auth=Auth.Token(os.environ["GITHUB_TOKEN"])) +repo = gh.get_repo(os.environ["REPO_NAME"]) +pr = repo.get_pull(int(os.environ["PR_NUMBER"])) +author = os.environ["AUTHOR_USERNAME"] + +MODEL = os.environ["MODEL"] +validate_env_vars(["GITHUB_TOKEN", "REPO_NAME", "PR_NUMBER", "AUTHOR_USERNAME", "MODEL"]) +validate_api_keys() + +# Tools + +TOOLS = [ + { + "type": "function", + "function": { + "name": "post_comment", + "description": ( + "Post a comment on the PR. Use this to welcome a first-time contributor, " + "ask for a clearer description, request an issue link, or flag non-compliance " + "with CONTRIBUTING.md. Combine multiple concerns into a single comment where " + "possible rather than posting several separate ones." + ), + "parameters": { + "type": "object", + "properties": { + "body": {"type": "string", "description": "The comment text (markdown supported)."} + }, + "required": ["body"], + }, + }, + }, +] + +# System prompt + +SYSTEM_PROMPT = """You are a PR review assistant for an open-source GitHub repository. +Check the following in order, then post at most one comment combining all concerns. If nothing needs flagging, stay silent. + +Checks: +1. FIRST CONTRIBUTION: Welcome first-time contributors and link any getting-started resources from CONTRIBUTING.md. +2. DESCRIPTION: If missing or too vague to explain what changed and why, ask for clarification. +3. LINKED ISSUE: If no "Fixes/Closes/Resolves/Related to #N" link exists, ask the author to add one. +4. CONTRIBUTING.md: If the PR doesn't follow the required structure, quote the specific rule that is violated. + +Rules: +- One comment maximum. Combine all concerns. +- Silence if everything is fine. +- Be constructive, not demanding. +- No emojis. + +When posting a comment, always use this exact structure (omit sections that don't apply): + +Thanks for the contribution! + + + + + + +... (repeat for each rule that is violated)""" + +# GitHub helpers + +def get_contributing_md() -> str: + """Fetches CONTRIBUTING.md from the repo root, or returns a notice if absent.""" + try: + contents = repo.get_contents("CONTRIBUTING.md") + return contents.decoded_content.decode("utf-8") + except Exception: + return "(No CONTRIBUTING.md found in this repository.)" + + +def is_first_contribution() -> bool: + """Returns True if the author has no previously merged PRs in this repo.""" + first_contribution_list = ['FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR', 'NONE'] + return os.environ["AUTHOR_ASSOCIATION"] in first_contribution_list + + +def post_comment(body: str) -> str: + pr.create_issue_comment(body) + return "Comment posted." + +# Tool dispatch + +def handle_tool_call(name: str, inputs: dict) -> str: + if name == "post_comment": + result = post_comment(inputs["body"]) + else: + result = f"Unknown tool: {name}" + + print(f"[tool] {name}: {result}") + return result + +# Agentic loop + +def build_initial_message() -> str: + first_contribution = is_first_contribution() + contributing_md = get_contributing_md() + + return ( + f"Please review this newly opened PR:\n\n" + f"Title: {os.environ['PR_TITLE']}\n" + f"Author: {author} ({'first-time contributor' if first_contribution else 'returning contributor'})\n" + f"Description:\n{os.environ.get('PR_BODY') or '(no description provided)'}\n\n" + f"---\n" + f"CONTRIBUTING.md contents:\n\n" + f"{contributing_md}" + ) + + +def run_pr_review_agent(): + messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": build_initial_message()}, + ] + run_agent(messages, TOOLS, handle_tool_call, MODEL) + + +if __name__ == "__main__": + run_pr_review_agent() diff --git a/scripts/agents/security_review_agent.py b/scripts/agents/security_review_agent.py new file mode 100644 index 000000000..9a6c73723 --- /dev/null +++ b/scripts/agents/security_review_agent.py @@ -0,0 +1,181 @@ +import os +from github import Github, Auth +from helpers import validate_env_vars, validate_api_keys, run_agent + +# Setup + +gh = Github(auth=Auth.Token(os.environ["GITHUB_TOKEN"])) +repo = gh.get_repo(os.environ["REPO_NAME"]) +pr = repo.get_pull(int(os.environ["PR_NUMBER"])) + +MODEL = os.environ["MODEL"] +validate_env_vars(["GITHUB_TOKEN", "REPO_NAME", "PR_NUMBER", "MODEL"]) +validate_api_keys() + +IGNORED_FILENAMES = set(os.environ.get( + "IGNORED_FILENAMES", + "package-lock.json,yarn.lock,poetry.lock,Gemfile.lock,Cargo.lock,composer.lock,pnpm-lock.yaml,pip.lock" +).split(",")) + +# Extensions must include a leading dot +IGNORED_EXTENSIONS = set(os.environ.get( + "IGNORED_EXTENSIONS", + ".lock,.sum" +).split(",")) + +# Truncate very large diffs like generated files to prevent bloating the prompt +MAX_PATCH_CHARS_PER_FILE = int(os.environ.get("MAX_PATCH_CHARS_PER_FILE", 3000)) + +# System prompt + +SYSTEM_PROMPT = """You are a security analysis assistant for a GitHub repository. +You are given a pull request diff and must identify potential security issues. + +Flag only: hardcoded secrets or credentials, injection vulnerabilities (SQL, shell, template), insecure cryptography or hashing, unsafe deserialization, path traversal, missing input validation on user-controlled data, known-vulnerable dependency versions, overly permissive file or network access. + +Do not comment on style, performance, test coverage, or best practices unless directly tied to a security risk. + +Always call post_security_review once when done, even if there are no findings. +No emojis. + +Use this exact format: + +### Summary + + +### Findings (omit section if none) + +**** + + + + + +... (repeat for each finding) + +Disclaimer: This review is AI-generated. Please validate the findings before fixing. +""" + +# GitHub helpers + +def get_pr_diff() -> str: + """ + Fetches changed files and their patches, filtering out lockfiles and + other noise. Returns a formatted string ready to be included in the prompt. + """ + sections = [] + for f in pr.get_files(): + filename = os.path.basename(f.filename) + _, ext = os.path.splitext(filename) + + if filename in IGNORED_FILENAMES or ext in IGNORED_EXTENSIONS: + print(f"[diff] Skipping {f.filename} (ignored file type)") + continue + + if not f.patch: + print(f"[diff] Skipping {f.filename} (no patch — binary or too large)") + continue + + patch = f.patch[:MAX_PATCH_CHARS_PER_FILE] + truncated = len(f.patch) > MAX_PATCH_CHARS_PER_FILE + sections.append( + f"### {f.filename}\n```diff\n{patch}" + + ("\n... (truncated)" if truncated else "") + + "\n```" + ) + + return "\n\n".join(sections) if sections else "(no reviewable changes found)" + + +def find_previous_security_comment() -> object | None: + """ + Looks for an existing security review comment posted by github-actions[bot] + so we can replace it rather than stacking multiple comments on updated reviews. + """ + for comment in pr.get_issue_comments(): + if ( + comment.user.login == "github-actions[bot]" + and "Automated Security Review" in comment.body + ): + return comment + return None + + +def post_or_update_comment(body: str): + """ + If a previous security review comment exists, edit it in place. + Otherwise post a new one to keep the PR timeline clean. + """ + existing = find_previous_security_comment() + if existing: + existing.edit(body) + print("[comment] Updated existing security review comment.") + else: + pr.create_issue_comment(body) + print("[comment] Posted new security review comment.") + +# Tools + +TOOLS = [ + { + "type": "function", + "function": { + "name": "post_security_review", + "description": ( + "Post the security review findings as a comment on the PR. " + "Call this once when your analysis is complete. " + "If there are no findings, still call this to confirm the review ran." + ), + "parameters": { + "type": "object", + "properties": { + "body": { + "type": "string", + "description": "The full markdown comment body to post on the PR.", + } + }, + "required": ["body"], + }, + }, + } +] + +# Tool dispatch + +def handle_tool_call(name: str, inputs: dict) -> str: + if name == "post_security_review": + # Prepend a header to identify review comments across runs + body = f"## Automated Security Review\n\n{inputs['body']}" + post_or_update_comment(body) + return "Security review comment posted." + return f"Unknown tool: {name}" + +# Agentic loop + +def build_initial_message() -> str: + trigger = os.environ.get("TRIGGER", "pull_request") + trigger_note = ( + "This review was requested manually via `/security-review`." + if trigger == "issue_comment" + else "This review was triggered automatically on PR creation." + ) + + return ( + f"Please perform a security review of this pull request.\n\n" + f"**PR #{pr.number}:** {pr.title}\n" + f"_{trigger_note}_\n\n" + f"---\n\n" + f"{get_pr_diff()}" + ) + + +def run_security_review_agent(): + messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": build_initial_message()}, + ] + run_agent(messages, TOOLS, handle_tool_call, MODEL) + + +if __name__ == "__main__": + run_security_review_agent() diff --git a/scripts/agents/triage_agent.py b/scripts/agents/triage_agent.py new file mode 100644 index 000000000..366a4dbb6 --- /dev/null +++ b/scripts/agents/triage_agent.py @@ -0,0 +1,225 @@ +import os +from github import Github, Auth +from helpers import validate_env_vars, validate_api_keys, run_agent + +# Setup + +gh = Github(auth=Auth.Token(os.environ["GITHUB_TOKEN"])) +repo = gh.get_repo(os.environ["REPO_NAME"]) +issue = repo.get_issue(int(os.environ["ISSUE_NUMBER"])) + +LATEST_ISSUES_LIMIT = int(os.environ["LATEST_ISSUES_LIMIT"], 100) +AVAILABLE_LABELS = os.environ.get("AVAILABLE_LABELS", "bug,enhancement,question,documentation,needs-info") +MODEL = os.environ["MODEL"] + +validate_env_vars(["GITHUB_TOKEN", "REPO_NAME", "ISSUE_NUMBER", "ISSUE_TITLE", "ISSUE_BODY", "MODEL"]) +validate_api_keys() + +# Tools + +TOOLS = [ + { + "type": "function", + "function": { + "name": "apply_label", + "description": ( + "Apply one or more labels to the issue. " + "Use labels like: " + AVAILABLE_LABELS + ), + "parameters": { + "type": "object", + "properties": { + "labels": { + "type": "array", + "items": {"type": "string"}, + "description": "List of labels to apply.", + } + }, + "required": ["labels"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "post_comment", + "description": "Post a comment on the issue, e.g. to ask for clarification or acknowledge receipt.", + "parameters": { + "type": "object", + "properties": { + "body": {"type": "string", "description": "The comment text (markdown supported)."} + }, + "required": ["body"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "mark_duplicate", + "description": ( + "Mark this issue as a duplicate of an existing one. " + "Use this when the issue is clearly asking about the same thing as an open issue. " + "Post a comment pointing to the original issue without closing anything." + ), + "parameters": { + "type": "object", + "properties": { + "original_issue_number": { + "type": "integer", + "description": "The issue number this is a duplicate of.", + }, + "reason": { + "type": "string", + "description": "Brief explanation of why these issues are duplicates.", + }, + }, + "required": ["original_issue_number", "reason"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "suggest_possible_duplicate", + "description": ( + "Use when an existing issue is related but not clearly the same thing. " + "Posts a comment pointing to the similar issue without closing anything." + "Continue triage normally after posting the comment." + ), + "parameters": { + "type": "object", + "properties": { + "related_issue_number": { + "type": "integer", + "description": "The issue number that might be related.", + }, + "reason": { + "type": "string", + "description": "Brief explanation of why these issues seem related.", + }, + }, + "required": ["related_issue_number", "reason"], + }, + }, + }, +] + +# System prompt + +SYSTEM_PROMPT = """You are an issue triage assistant for a GitHub repository. +Given a new issue and a list of existing open issues, follow these steps in order. +No emojis. + +1. DUPLICATE CHECK: If the issue clearly duplicates an existing one, call mark_duplicate and stop. + If it seems related but distinct, call suggest_possible_duplicate and continue triage. +2. LABEL: Apply appropriate labels (bug, enhancement, question, documentation, needs-info, good-first-issue, etc.). +3. NEEDS INFO: If the issue lacks key details (reproduction steps for bugs, use case for features), post a comment asking for them using this format: + +Thanks for opening this issue. To help us investigate, please provide: +- +... (repeat for each missing detail) + +4. ACKNOWLEDGE: If no duplicate was flagged and no needs-info comment was posted, acknowledge receipt with this format: + +Thanks for the report. We will take a look. + +Do not post acknowledgments on administrative issues such as meeting minutes or roadmaps.""" + +# GitHub helpers + +def get_existing_issues(limit: int = LATEST_ISSUES_LIMIT) -> str: + """ + Fetches the most recent open issues (excluding the current one) + and formats them into a string for the prompt. + """ + open_issues = repo.get_issues(state="open") + lines = [] + count = 0 + for existing in open_issues: + if existing.number == issue.number: + continue + lines.append( + f"- #{existing.number}: {existing.title}\n" + f" {(existing.body or '').strip()[:200]}" # truncate long bodies + ) + count += 1 + if count >= limit: + break + return "\n".join(lines) if lines else "(no other open issues)" + + +def apply_label(labels: list[str]) -> str: + existing_label_names = [l.name for l in repo.get_labels()] + for label in labels: + if label not in existing_label_names: + repo.create_label(label, "ededed") + issue.add_to_labels(*labels) + return f"Applied labels: {labels}" + + +def post_comment(body: str) -> str: + issue.create_comment(body) + return "Comment posted." + + +def mark_duplicate(original_issue_number: int, reason: str) -> str: + original = repo.get_issue(original_issue_number) + issue.create_comment( + f"This looks like a duplicate of #{original_issue_number} " + f"({original.html_url}).\n\n> {reason}\n\n" + f"If you believe it is distinct, please edit this issue with any additional details." + ) + issue.add_to_labels("duplicate") + return f"Marked as duplicate of #{original_issue_number}." + + +def suggest_possible_duplicate(related_issue_number: int, reason: str) -> str: + related = repo.get_issue(related_issue_number) + issue.create_comment( + f"This may be related to #{related_issue_number} " + f"({related.html_url}): {reason}\n\n" + f"Please check if that issue already covers what you are reporting." + ) + return f"Flagged as possibly related to #{related_issue_number}." + + +# Tool dispatch + +def handle_tool_call(name: str, inputs: dict) -> str: + if name == "apply_label": + result = apply_label(inputs["labels"]) + elif name == "post_comment": + result = post_comment(inputs["body"]) + elif name == "mark_duplicate": + result = mark_duplicate(inputs["original_issue_number"], inputs["reason"]) + elif name == "suggest_possible_duplicate": + result = suggest_possible_duplicate(inputs["related_issue_number"], inputs["reason"]) + else: + result = f"Unknown tool: {name}" + print(f"Tool {name}: {result}") + return result + +# Agentic loop + +def build_initial_message() -> str: + return ( + f"Please triage this new GitHub issue:\n\n" + f"Title: {os.environ['ISSUE_TITLE']}\n" + f"Body:\n{os.environ.get('ISSUE_BODY') or '(no description provided)'}\n\n" + f"---\n" + f"Here are the currently open issues for duplicate detection:\n\n" + f"{get_existing_issues()}" + ) + + +def run_triage_agent(): + messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": build_initial_message()}, + ] + run_agent(messages, TOOLS, handle_tool_call, MODEL) + + +if __name__ == "__main__": + run_triage_agent()