Add Claude-powered CVE triage to scheduled scan

dpark01 · dpark01 · commit 46173be71d20 · 2026-04-27T09:45:27.000-04:00
When the weekly Trivy scan finds new fixable HIGH/CRITICAL CVEs,
hand them off to Claude Sonnet 4.6 (on Vertex AI in viral-seq-ai
via Workload Identity Federation) for analysis, then file one
GitHub issue per CVE explaining the vuln, the dependency chain,
why the Rego policy didn't suppress it, and a recommended fix
informed by historical patterns in the repo.

"New" is determined by issue-existence (open OR closed) -- a CVE
with no issue whose title contains the CVE ID is treated as new.

workflow_dispatch inputs for testing without waiting for the
weekly schedule:
  - test_cve_id: bypass diff and force-analyze a specific CVE ID
  - dry_run:     run analysis but skip gh issue create
                 (artifact is still uploaded for inspection)

Required GH repo variables (already set):
  GCP_PROJECT_ID    - viral-seq-ai
  GCP_WIP_PROVIDER  - full WIF provider resource path
  GCP_SA_EMAIL      - viral-ngs-cve-triage@viral-seq-ai.iam.gserviceaccount.com

GCP-side: WIF pool github-actions-pool + provider broadinstitute-github
(gated by repository_owner == broadinstitute); SA has
roles/aiplatform.user and roles/serviceusage.serviceUsageConsumer.
diff --git a/.github/workflows/container-scan.yml b/.github/workflows/container-scan.yml
@@ -5,6 +5,17 @@ on:
     # Weekly scan of main branch mega image every Monday at 06:00 UTC
     - cron: '0 6 * * 1'
   workflow_dispatch:
+    inputs:
+      test_cve_id:
+        description: 'Optional: bypass new-CVE detection and force-analyze this specific CVE ID (for testing the Claude pipeline)'
+        required: false
+        type: string
+        default: ''
+      dry_run:
+        description: 'Run Claude analysis but do NOT file GitHub issues (artifact still uploaded)'
+        required: false
+        type: boolean
+        default: false
 
 permissions: {}
 
@@ -18,6 +29,8 @@ jobs:
       contents: read
       packages: read
       security-events: write
+      issues: write     # for filing CVE issues
+      id-token: write   # for OIDC token to GCP via WIF
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -49,7 +62,7 @@ jobs:
           format: 'json'
           output: 'trivy-results.json'
           severity: 'CRITICAL,HIGH'
-          exit-code: '1'
+          exit-code: '0'   # don't fail here — Claude pipeline + final-step gate handles signaling
           ignore-unfixed: true
           trivyignores: '.trivyignore'
           ignore-policy: '.trivy-ignore-policy.rego'
@@ -75,3 +88,237 @@ jobs:
         with:
           name: trivy-mega-scheduled
           path: trivy-results.json
+
+      # === Claude triage pipeline ===
+      # If new fixable HIGH/CRITICAL CVEs are found, hand them to Claude (Sonnet 4.6
+      # on Vertex AI) for analysis, then file GitHub issues. Source of truth for
+      # "new" is GH issues themselves: a CVE is new if no existing issue (open OR
+      # closed) has the CVE ID in its title.
+
+      - name: Identify new fixable CVEs
+        id: triage
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          TEST_CVE_ID: ${{ inputs.test_cve_id }}
+        run: |
+          set -euo pipefail
+
+          # Test mode: bypass scan-diff and use the provided CVE ID directly.
+          if [ -n "${TEST_CVE_ID:-}" ]; then
+            echo "::notice::Test mode active — analyzing TEST_CVE_ID=$TEST_CVE_ID"
+            echo "cve_ids=$TEST_CVE_ID" >> "$GITHUB_OUTPUT"
+            echo "test_mode=true" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Production mode: parse trivy JSON for fixable HIGH/CRITICAL CVEs.
+          all_cves=$(jq -r '
+            [.Results[]?.Vulnerabilities[]?
+              | select((.Severity == "HIGH" or .Severity == "CRITICAL")
+                       and (.FixedVersion // "") != "")
+              | .VulnerabilityID]
+            | unique[]
+          ' trivy-results.json)
+
+          if [ -z "$all_cves" ]; then
+            echo "::notice::No fixable HIGH/CRITICAL CVEs in scan."
+            echo "cve_ids=" >> "$GITHUB_OUTPUT"
+            echo "test_mode=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Dedup against existing GH issues (open + closed) by title-substring search.
+          new_cves=()
+          for cve in $all_cves; do
+            count=$(gh search issues \
+              --repo "$GITHUB_REPOSITORY" \
+              --state=all \
+              "\"$cve\" in:title" \
+              --json url --jq 'length')
+            if [ "$count" = "0" ]; then
+              new_cves+=("$cve")
+              echo "  NEW: $cve"
+            else
+              echo "  existing issue for $cve, skipping"
+            fi
+          done
+
+          new_list="${new_cves[*]:-}"
+          echo "::notice::Found ${#new_cves[@]} new fixable CVE(s)"
+          echo "cve_ids=$new_list" >> "$GITHUB_OUTPUT"
+          echo "test_mode=false" >> "$GITHUB_OUTPUT"
+
+      - name: Authenticate to GCP via Workload Identity Federation
+        if: steps.triage.outputs.cve_ids != ''
+        uses: google-github-actions/auth@v2
+        with:
+          workload_identity_provider: ${{ vars.GCP_WIP_PROVIDER }}
+          service_account: ${{ vars.GCP_SA_EMAIL }}
+
+      - name: Ensure issue labels exist
+        if: steps.triage.outputs.cve_ids != '' && inputs.dry_run != true
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Idempotent: gh label create exits non-zero if label exists; ignore that.
+          gh label create security    --color B60205 --description "Security-related issue"            --repo "$GITHUB_REPOSITORY" 2>/dev/null || true
+          gh label create cve         --color B60205 --description "CVE tracked in container scans"    --repo "$GITHUB_REPOSITORY" 2>/dev/null || true
+          gh label create test        --color FBCA04 --description "Test issue (filed by workflow_dispatch test_cve_id)" --repo "$GITHUB_REPOSITORY" 2>/dev/null || true
+
+      - name: Claude analysis on Vertex AI
+        if: steps.triage.outputs.cve_ids != ''
+        uses: anthropics/claude-code-action@beta
+        env:
+          CLAUDE_CODE_USE_VERTEX: '1'
+          CLOUD_ML_REGION: global
+          ANTHROPIC_VERTEX_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }}
+        with:
+          claude_args: |
+            --model claude-sonnet-4-6
+            --max-turns 30
+          settings: |
+            {
+              "permissions": {
+                "allow": [
+                  "Read",
+                  "Write",
+                  "Bash(git log:*)",
+                  "Bash(git show:*)",
+                  "Bash(git rev-parse:*)",
+                  "Bash(grep:*)",
+                  "Bash(find:*)",
+                  "Bash(jq:*)",
+                  "Bash(ls:*)",
+                  "Bash(mkdir:*)",
+                  "Bash(cat:*)",
+                  "Bash(head:*)",
+                  "Bash(tail:*)"
+                ]
+              }
+            }
+          prompt: |
+            You are triaging container vulnerabilities for the broadinstitute/viral-ngs repo.
+
+            ## Your task
+
+            For each CVE ID listed below, write a triage report to `/tmp/issues/<CVE-ID>.md`.
+            The reports will be filed verbatim as GitHub issues by the next workflow step.
+
+            **CVE IDs to analyze:** ${{ steps.triage.outputs.cve_ids }}
+
+            **Test mode:** ${{ steps.triage.outputs.test_mode }}
+            (If `true`, the CVE was supplied manually via `test_cve_id` and may not appear in
+            the current scan's `trivy-results.json`. Use your training knowledge in that case
+            and add a `> _Test analysis_` blockquote at the top of the report so reviewers
+            know it was generated for pipeline validation, not from a real scan finding.)
+
+            ## Required reading (do this BEFORE writing reports)
+
+            1. `trivy-results.json` (in the workspace root) — authoritative metadata for every
+               CVE flagged in the current scan. ALWAYS check here first for CVE details
+               (severity, vector, package path, fix version, references). Use `jq` to query.
+            2. `.agents/skills/container-vulns/SKILL.md` — read fully. This is the repo's
+               container-vulnerability playbook and tells you what the maintainers consider
+               actionable vs. accepted risk.
+            3. `.trivyignore` — existing per-CVE exceptions with their justifications. Mirror
+               the writing style and depth of justification when you recommend `.trivyignore`
+               additions.
+            4. `.trivy-ignore-policy.rego` — Rego policy for class-level CVE filtering.
+               Understand what it filters and why.
+            5. `docker/Dockerfile.*` — container build files showing dep installs and inline
+               mitigations. Look for prior fixups (`find ... -exec rm`, `gem install`, etc.)
+               applied to similar packages.
+            6. `docker/requirements/*.txt` — conda dependency lists. Use `grep` to find
+               which file pulls in the affected package.
+            7. Recent git history — `git log --all --oneline --grep <package>`,
+               `git log --all --oneline --grep CVE-`, and `git show <sha>` to inspect prior
+               fix patterns. ALWAYS verify a commit SHA exists before citing it.
+
+            ## Required structure for each report
+
+            File path: `/tmp/issues/<CVE-ID>.md` (filename MUST match the CVE ID exactly).
+            **First line MUST be a single H1 used as the issue title:**
+            `# [CVE-YYYY-NNNN] <package>: <one-line description>`
+
+            Then sections (use H2 `##` headers):
+
+            1. **Summary** — 2–3 sentences: what it is, severity, where it came from.
+            2. **Vulnerability details** — CVSS score + vector + plain-English meaning;
+               2–4 sentences explaining the bug technically.
+            3. **Dependency chain** — name the direct conda package or Docker layer that
+               pulls this in. Trace transitive deps where you can. If you can't determine
+               this confidently, say so explicitly — do NOT guess.
+            4. **Why the Rego policy didn't suppress it** — explain in terms of the AV/PR/UI/S
+               vector classes the policy filters and why this CVE's vector doesn't match.
+            5. **Recommended fix** — concrete and actionable. Options:
+                - Version bump (which file, which floor)
+                - Inline Dockerfile mitigation (which Dockerfile, what RUN-block addition)
+                - `.trivyignore` entry (with justification matching the existing style)
+               Cite historical precedent when applicable: `(mirror the fix in commit <sha>)`.
+            6. **Practical exploitability** — in this deployment model (ephemeral batch
+               containers, no network-facing services, no untrusted user input at runtime),
+               is this actually reachable? Be honest and specific.
+            7. **References** — GHSA URL, NVD URL, vendor advisory.
+
+            ## Constraints
+
+            - `mkdir -p /tmp/issues` first.
+            - One file per CVE.
+            - Be concise. Each report should be readable in 1–2 minutes (target: 300–600 words).
+            - Do NOT hallucinate package versions, file paths, or commit SHAs. Verify with
+              tools when in doubt.
+            - If you finish all reports with budget remaining, do NOT pad — stop.
+
+      - name: Upload Claude analysis as artifact
+        if: steps.triage.outputs.cve_ids != '' && always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: claude-cve-analysis
+          path: /tmp/issues/
+          if-no-files-found: warn
+
+      - name: File GitHub issues
+        if: steps.triage.outputs.cve_ids != '' && inputs.dry_run != true
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          TEST_MODE: ${{ steps.triage.outputs.test_mode }}
+        run: |
+          set -uo pipefail
+
+          if [ ! -d /tmp/issues ] || [ -z "$(ls -A /tmp/issues 2>/dev/null)" ]; then
+            echo "::error::No analysis files in /tmp/issues — Claude may have failed silently"
+            exit 1
+          fi
+
+          failed=0
+          for f in /tmp/issues/*.md; do
+            cve=$(basename "$f" .md)
+            title=$(head -1 "$f" | sed 's/^# *//')
+            body=$(tail -n +2 "$f")
+
+            labels="security,cve"
+            if [ "$TEST_MODE" = "true" ]; then
+              labels="$labels,test"
+            fi
+
+            echo "Creating issue for $cve: $title"
+            if ! gh issue create \
+                --repo "$GITHUB_REPOSITORY" \
+                --title "$title" \
+                --body "$body" \
+                --label "$labels"; then
+              echo "::error::Failed to create issue for $cve"
+              failed=$((failed+1))
+            fi
+          done
+
+          if [ $failed -gt 0 ]; then
+            echo "::error::$failed issue(s) failed to create"
+            exit 1
+          fi
+
+      - name: Fail job if new CVEs were found (production mode only)
+        if: steps.triage.outputs.cve_ids != '' && steps.triage.outputs.test_mode != 'true'
+        run: |
+          echo "::error::Scan found new fixable HIGH/CRITICAL CVEs. See filed issues for details."
+          exit 1