Scheduled Container Vulnerability Scan #8

Workflow file for this run

.github/workflows/container-scan.yml at dd60f90

	name: Scheduled Container Vulnerability Scan

	on:
	schedule:
	# Weekly scan of main branch mega image every Monday at 06:00 UTC
	- cron: '0 6 * * 1'
	workflow_dispatch:
	inputs:
	test_cve_id:
	description: 'Optional: bypass new-CVE detection and force-analyze this specific CVE ID (for testing the Claude pipeline)'
	required: false
	type: string
	default: ''
	dry_run:
	description: 'Run Claude analysis but do NOT file GitHub issues (artifact still uploaded)'
	required: false
	type: boolean
	default: false

	permissions: {}

	env:
	GHCR_REPO: ghcr.io/broadinstitute/viral-ngs

	jobs:
	scan:
	runs-on: ubuntu-latest
	permissions:
	contents: read
	packages: read
	security-events: write
	issues: write # for filing CVE issues
	id-token: write # for OIDC token to GCP via WIF
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Log in to GHCR
	uses: docker/login-action@v3
	with:
	registry: ghcr.io
	username: ${{ github.actor }}
	password: ${{ secrets.GITHUB_TOKEN }}

	- name: Run Trivy vulnerability scanner
	uses: aquasecurity/trivy-action@master
	with:
	image-ref: '${{ env.GHCR_REPO }}:main-mega-amd64'
	format: 'sarif'
	output: 'trivy-results.sarif'
	severity: 'CRITICAL,HIGH'
	limit-severities-for-sarif: true
	exit-code: '0'
	ignore-unfixed: true
	trivyignores: '.trivyignore'
	ignore-policy: '.trivy-ignore-policy.rego'

	- name: Run Trivy vulnerability scanner (JSON)
	uses: aquasecurity/trivy-action@master
	with:
	image-ref: '${{ env.GHCR_REPO }}:main-mega-amd64'
	format: 'json'
	output: 'trivy-results.json'
	severity: 'CRITICAL,HIGH'
	exit-code: '0' # don't fail here — Claude pipeline + final-step gate handles signaling
	ignore-unfixed: true
	trivyignores: '.trivyignore'
	ignore-policy: '.trivy-ignore-policy.rego'

	- name: Log scan result count
	if: always()
	run: \|
	if [ -f trivy-results.sarif ]; then
	COUNT=$(jq '[.runs[].results[]] \| length' trivy-results.sarif)
	echo "::notice::Trivy found $COUNT findings for mega-scheduled (after policy filtering)"
	fi

	- name: Upload Trivy scan results to GitHub Security tab
	if: always()
	uses: github/codeql-action/upload-sarif@v3
	with:
	sarif_file: 'trivy-results.sarif'
	category: 'container-mega-scheduled'

	- name: Upload Trivy JSON results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: trivy-mega-scheduled
	path: trivy-results.json

	# === Claude triage pipeline ===
	# If new fixable HIGH/CRITICAL CVEs are found, hand them to Claude (Sonnet 4.6
	# on Vertex AI) for analysis, then file GitHub issues. Source of truth for
	# "new" is GH issues themselves: a CVE is new if no existing issue (open OR
	# closed) has the CVE ID in its title.

	- name: Identify new fixable CVEs
	id: triage
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	TEST_CVE_ID: ${{ inputs.test_cve_id }}
	run: \|
	set -euo pipefail

	# Test mode: bypass scan-diff and use the provided CVE ID directly.
	if [ -n "${TEST_CVE_ID:-}" ]; then
	echo "::notice::Test mode active — analyzing TEST_CVE_ID=$TEST_CVE_ID"
	echo "cve_ids=$TEST_CVE_ID" >> "$GITHUB_OUTPUT"
	echo "test_mode=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	# Production mode: parse trivy JSON for fixable HIGH/CRITICAL CVEs.
	all_cves=$(jq -r '
	[.Results[]?.Vulnerabilities[]?
	\| select((.Severity == "HIGH" or .Severity == "CRITICAL")
	and (.FixedVersion // "") != "")
	\| .VulnerabilityID]
	\| unique[]
	' trivy-results.json)

	if [ -z "$all_cves" ]; then
	echo "::notice::No fixable HIGH/CRITICAL CVEs in scan."
	echo "cve_ids=" >> "$GITHUB_OUTPUT"
	echo "test_mode=false" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	# Dedup against existing GH issues (open + closed) by title-substring search.
	new_cves=()
	for cve in $all_cves; do
	count=$(gh search issues \
	--repo "$GITHUB_REPOSITORY" \
	--state=all \
	"\"$cve\" in:title" \
	--json url --jq 'length')
	if [ "$count" = "0" ]; then
	new_cves+=("$cve")
	echo " NEW: $cve"
	else
	echo " existing issue for $cve, skipping"
	fi
	done

	new_list="${new_cves[*]:-}"
	echo "::notice::Found ${#new_cves[@]} new fixable CVE(s)"
	echo "cve_ids=$new_list" >> "$GITHUB_OUTPUT"
	echo "test_mode=false" >> "$GITHUB_OUTPUT"

	- name: Authenticate to GCP via Workload Identity Federation
	if: steps.triage.outputs.cve_ids != ''
	uses: google-github-actions/auth@v2
	with:
	workload_identity_provider: ${{ vars.GCP_WIP_PROVIDER }}
	service_account: ${{ vars.GCP_SA_EMAIL }}

	- name: Ensure issue labels exist
	if: steps.triage.outputs.cve_ids != '' && inputs.dry_run != true
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	# Idempotent: gh label create exits non-zero if label exists; ignore that.
	gh label create security --color B60205 --description "Security-related issue" --repo "$GITHUB_REPOSITORY" 2>/dev/null \|\| true
	gh label create cve --color B60205 --description "CVE tracked in container scans" --repo "$GITHUB_REPOSITORY" 2>/dev/null \|\| true
	gh label create test --color FBCA04 --description "Test issue (filed by workflow_dispatch test_cve_id)" --repo "$GITHUB_REPOSITORY" 2>/dev/null \|\| true

	- name: Claude analysis on Vertex AI
	if: steps.triage.outputs.cve_ids != ''
	uses: anthropics/claude-code-action@v1
	env:
	CLAUDE_CODE_USE_VERTEX: '1'
	CLOUD_ML_REGION: global
	ANTHROPIC_VERTEX_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }}
	with:
	use_vertex: 'true'
	github_token: ${{ secrets.GITHUB_TOKEN }}
	claude_args: '--model claude-sonnet-4-6 --max-turns 30'
	settings: \|
	{
	"permissions": {
	"allow": [
	"Read",
	"Write",
	"Bash(git log:*)",
	"Bash(git show:*)",
	"Bash(git rev-parse:*)",
	"Bash(grep:*)",
	"Bash(find:*)",
	"Bash(jq:*)",
	"Bash(ls:*)",
	"Bash(mkdir:*)",
	"Bash(cat:*)",
	"Bash(head:*)",
	"Bash(tail:*)"
	]
	}
	}
	prompt: \|
	You are triaging container vulnerabilities for the broadinstitute/viral-ngs repo.

	## Your task

	For each CVE ID listed below, write a triage report to `/tmp/issues/<CVE-ID>.md`.
	The reports will be filed verbatim as GitHub issues by the next workflow step.

	CVE IDs to analyze: ${{ steps.triage.outputs.cve_ids }}

	Test mode: ${{ steps.triage.outputs.test_mode }}
	(If `true`, the CVE was supplied manually via `test_cve_id` and may not appear in
	the current scan's `trivy-results.json`. Use your training knowledge in that case
	and add a `> _Test analysis_` blockquote at the top of the report so reviewers
	know it was generated for pipeline validation, not from a real scan finding.)

	## Required reading (do this BEFORE writing reports)

	1. `trivy-results.json` (in the workspace root) — authoritative metadata for every
	CVE flagged in the current scan. ALWAYS check here first for CVE details
	(severity, vector, package path, fix version, references). Use `jq` to query.
	2. `.agents/skills/container-vulns/SKILL.md` — read fully. This is the repo's
	container-vulnerability playbook and tells you what the maintainers consider
	actionable vs. accepted risk.
	3. `.trivyignore` — existing per-CVE exceptions with their justifications. Mirror
	the writing style and depth of justification when you recommend `.trivyignore`
	additions.
	4. `.trivy-ignore-policy.rego` — Rego policy for class-level CVE filtering.
	Understand what it filters and why.
	5. `docker/Dockerfile.*` — container build files showing dep installs and inline
	mitigations. Look for prior fixups (`find ... -exec rm`, `gem install`, etc.)
	applied to similar packages.
	6. `docker/requirements/*.txt` — conda dependency lists. Use `grep` to find
	which file pulls in the affected package.
	7. Recent git history — `git log --all --oneline --grep <package>`,
	`git log --all --oneline --grep CVE-`, and `git show <sha>` to inspect prior
	fix patterns. ALWAYS verify a commit SHA exists before citing it.

	## Required structure for each report

	File path: `/tmp/issues/<CVE-ID>.md` (filename MUST match the CVE ID exactly).
	First line MUST be a single H1 used as the issue title:
	`# [CVE-YYYY-NNNN] <package>: <one-line description>`

	Then sections (use H2 `##` headers):

	1. Summary — 2–3 sentences: what it is, severity, where it came from.
	2. Vulnerability details — CVSS score + vector + plain-English meaning;
	2–4 sentences explaining the bug technically.
	3. Dependency chain — name the direct conda package or Docker layer that
	pulls this in. Trace transitive deps where you can. If you can't determine
	this confidently, say so explicitly — do NOT guess.
	4. Why the Rego policy didn't suppress it — explain in terms of the AV/PR/UI/S
	vector classes the policy filters and why this CVE's vector doesn't match.
	5. Recommended fix — concrete and actionable. Options:
	- Version bump (which file, which floor)
	- Inline Dockerfile mitigation (which Dockerfile, what RUN-block addition)
	- `.trivyignore` entry (with justification matching the existing style)
	Cite historical precedent when applicable: `(mirror the fix in commit <sha>)`.
	6. Practical exploitability — in this deployment model (ephemeral batch
	containers, no network-facing services, no untrusted user input at runtime),
	is this actually reachable? Be honest and specific.
	7. References — GHSA URL, NVD URL, vendor advisory.

	## Constraints

	- `mkdir -p /tmp/issues` first.
	- One file per CVE.
	- Be concise. Each report should be readable in 1–2 minutes (target: 300–600 words).
	- Do NOT hallucinate package versions, file paths, or commit SHAs. Verify with
	tools when in doubt.
	- If you finish all reports with budget remaining, do NOT pad — stop.

	- name: Upload Claude analysis as artifact
	if: steps.triage.outputs.cve_ids != '' && always()
	uses: actions/upload-artifact@v4
	with:
	name: claude-cve-analysis
	path: /tmp/issues/
	if-no-files-found: warn

	- name: File GitHub issues
	if: steps.triage.outputs.cve_ids != '' && inputs.dry_run != true
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	TEST_MODE: ${{ steps.triage.outputs.test_mode }}
	run: \|
	set -uo pipefail

	if [ ! -d /tmp/issues ] \|\| [ -z "$(ls -A /tmp/issues 2>/dev/null)" ]; then
	echo "::error::No analysis files in /tmp/issues — Claude may have failed silently"
	exit 1
	fi

	failed=0
	for f in /tmp/issues/*.md; do
	cve=$(basename "$f" .md)
	title=$(head -1 "$f" \| sed 's/^# *//')
	body=$(tail -n +2 "$f")

	labels="security,cve"
	if [ "$TEST_MODE" = "true" ]; then
	labels="$labels,test"
	fi

	echo "Creating issue for $cve: $title"
	if ! gh issue create \
	--repo "$GITHUB_REPOSITORY" \
	--title "$title" \
	--body "$body" \
	--label "$labels"; then
	echo "::error::Failed to create issue for $cve"
	failed=$((failed+1))
	fi
	done

	if [ $failed -gt 0 ]; then
	echo "::error::$failed issue(s) failed to create"
	exit 1
	fi

	- name: Fail job if new CVEs were found (production mode only)
	if: steps.triage.outputs.cve_ids != '' && steps.triage.outputs.test_mode != 'true'
	run: \|
	echo "::error::Scan found new fixable HIGH/CRITICAL CVEs. See filed issues for details."
	exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Scheduled Container Vulnerability Scan #8

Workflow file

Scheduled Container Vulnerability Scan #8

Uh oh!

Workflow file for this run