-
Notifications
You must be signed in to change notification settings - Fork 330
329 lines (295 loc) · 16.2 KB
/
code-diff-analyzer.yml
File metadata and controls
329 lines (295 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
---
name: Code Diff Analyzer
on:
workflow_call:
secrets:
BEDROCK_ACCESS_ROLE:
required: true
inputs:
skip_diff_analyzer_on_push: # Only update if event is push
required: false
type: boolean
default: true
skip_diff_analyzer_with_label_name: # Only update if event is pull_request_target
required: false
type: string
default: 'skip-diff-analyzer'
update_pr_comment_with_analyzer_report: # Only update if event is pull_request_target
required: false
type: boolean
default: false
hard_fail_level:
required: false
type: number
default: 3 # hard fail the workflow on issues with 'high' severity or above
outputs:
# 0 = no issues found
# 1 = Low severity issues found
# 2 = Medium severity issues found
# 3 = High severity issues found
# 4 = Critical severity issues found
# 5 = Analizer failed early
# 9 = Analizer skipped
DIFF_ANALYZER_LEVELS:
description: Code diff analyzer levels to output (0/1/2/3/4/5/9)
value: ${{ jobs.Code-Diff-Analyzer.outputs.OUTPUT_DIFF_ANALYZER_LEVELS }}
HEAD_COMMIT_SHA:
description: HEAD commit sha value to output (commit id)
value: ${{ jobs.Code-Diff-Analyzer.outputs.OUTPUT_HEAD_COMMIT_SHA }}
jobs:
Code-Diff-Analyzer:
runs-on: ubuntu-latest
permissions:
id-token: write # github oidc to assume aws roles
pull-requests: write # to create or update comment (peter-evans/create-or-update-comment)
env:
AWS_REGION: 'us-east-1'
ANTHROPIC_MODEL: 'us.anthropic.claude-sonnet-4-6'
ANTHROPIC_SMALL_FAST_MODEL: 'us.anthropic.claude-haiku-4-5-20251001-v1:0'
CLAUDE_CODE_USE_BEDROCK: '1'
CLAUDE_CODE_MAX_INPUT_TOKENS: '160000'
CLAUDE_CODE_MAX_OUTPUT_TOKENS: '4096'
MAX_THINKING_TOKENS: '1024'
DIFF_CONTENT_PATH: 'git_diff_content.txt'
DIFF_REPORT_PATH: 'git_diff_analyzer.json'
timeout-minutes: 10
outputs:
OUTPUT_DIFF_ANALYZER_LEVELS: ${{ steps.step-final-status.outputs.DIFF_ANALYZER_LEVELS }}
OUTPUT_HEAD_COMMIT_SHA: ${{ steps.step-final-status.outputs.HEAD_COMMIT_SHA }}
steps:
- name: Retrive ref sha
run: |
if [ "${{ github.event_name }}" = "push" ]; then
base_sha="${{ github.event.before }}"
head_sha="${{ github.event.after }}"
if [ "${{ inputs.skip_diff_analyzer_on_push }}" = "true" ]; then
echo "Diff analyzer skipped due to inputs.skip_diff_analyzer_on_push is set to 'true'"
echo "diff_analyzer=9" >> $GITHUB_ENV
fi
elif [ "${{ github.event_name }}" = "pull_request_target" ]; then
base_sha=${{ github.event.pull_request.base.sha }}
head_sha=${{ github.event.pull_request.head.sha }}
PR_LABELS=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.number }}/labels" \
| jq -r '.[].name')
echo "labels: $PR_LABELS"
echo "Verifying skip label: ${{ inputs.skip_diff_analyzer_with_label_name }}"
if [ -n "$PR_LABELS" ]; then
for label in $PR_LABELS
do
if [[ "$label" = "${{ inputs.skip_diff_analyzer_with_label_name }}" ]]; then
echo "Diff analyzer skipped due to label ${{ inputs.skip_diff_analyzer_with_label_name }}."
echo "diff_analyzer=9" >> $GITHUB_ENV
break
fi
done
fi
else
echo "wrong github event: ${{ github.event_name }}, must be 'push' or 'pull_request_target'"
echo "diff_analyzer=5" >> $GITHUB_ENV
echo "diff_report='wrong github event'" >> $GITHUB_ENV
exit 1
fi
echo "BASE_SHA=$base_sha" >> $GITHUB_ENV
echo "HEAD_SHA=$head_sha" >> $GITHUB_ENV
- name: Get diff details
if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
run: |
echo "Get diff between base($BASE_SHA) and head($HEAD_SHA)"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3.diff" \
"https://api.github.com/repos/${{ github.repository }}/compare/${BASE_SHA}...${HEAD_SHA}" > $DIFF_CONTENT_PATH
DIFF_SIZE=$(cat $DIFF_CONTENT_PATH | wc -c)
DIFF_TOKEN_EST=$((DIFF_SIZE / 3))
echo "Check diff size of '$DIFF_SIZE', token number '$DIFF_TOKEN_EST', max token limit '$CLAUDE_CODE_MAX_INPUT_TOKENS'"
if [ "$DIFF_TOKEN_EST" -ge "$CLAUDE_CODE_MAX_INPUT_TOKENS" ]; then
echo "Diff too large, requires skip by maintainers after manual review"
echo "diff_analyzer=5" >> $GITHUB_ENV
echo "diff_report='Diff too large, requires skip by maintainers after manual review'" >> $GITHUB_ENV
exit 1
fi
echo "-------------------------------------------------------"
cat $DIFF_CONTENT_PATH
echo "-------------------------------------------------------"
- uses: actions/setup-node@v6
if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
with:
node-version: 24
- name: Install necessary packages
if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
run: |
npm install -g @anthropic-ai/claude-code@stable
pip install json-repair==0.55.2
- name: Configure AWS credentials
if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: ${{ secrets.BEDROCK_ACCESS_ROLE }}
aws-region: us-east-1
# yamllint disable
- name: Verify file diffs
if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
run: |
PROMPT=$(cat <<-EOF
Analyze the git diff for MALICIOUS CODE and INTENTIONAL SECURITY THREATS.
**PRIMARY FOCUS: Detect deliberate attempts to compromise security, not coding mistakes.**
**MANDATORY RULE — SUPPLY CHAIN / DEPENDENCY CHANGES:**
- Any dependency, package registry, or build plugin change MUST be flagged as **high** severity
- Do NOT judge whether a dependency name looks "legitimate" — you cannot verify artifact authenticity
- Namespace hijacking and typosquatting are common attack techniques
- Always flag and let maintainers verify
- This applies to changes including but not limited to:
- Adding, removing, or changing versions of libraries, packages, or modules
- Adding or modifying package source URLs or repository configurations
- Annotation processors, postinstall/prebuild scripts, build plugins, or compiler extensions
- Modifying FROM directives or adding package installs in container files
**Review for security issues including but not limited to:**
- **Data exfiltration**: Unauthorized transmission of secrets, credentials, or sensitive data to external endpoints
- **Backdoors**: Hidden access mechanisms, debug modes left enabled, or authentication bypasses
- **Obfuscated logic**: Base64-encoded payloads, hex-encoded strings, heavily obfuscated code without clear purpose
- **Suspicious network calls**: Unexpected external API calls, DNS queries, or data transmission to unknown domains
- **Credential harvesting**: Code that captures, logs, or transmits credentials/tokens beyond normal usage
- **Time bombs/logic bombs**: Code triggered by specific dates, conditions, or external signals
- **Privilege escalation**: Deliberate attempts to gain elevated permissions or bypass authorization
- **Supply chain attacks**: Any dependency, package registry, or build plugin change (see mandatory rule above)
- **Environment manipulation**: Attempts to modify CI/CD variables, Jenkins secrets, or deployment configurations
- **Steganography or covert channels**: Hidden data transmission in images, comments, or metadata
**IMPORTANT DISTINCTIONS:**
- Ignore common coding mistakes (e.g., missing input validation unless clearly intentional)
- Focus on INTENT: Is this code deliberately trying to do something malicious?
- Consider context: Is this behavior justified by the feature being implemented?
- Flag anomalies: Code that seems unrelated to the stated PR purpose
- Dependency/package changes are NEVER considered justified by context — always flag as high
**Classify each issue by severity:**
- **critical**: Clear evidence of malicious intent with immediate security impact (data exfiltration, backdoors)
- **high**: Highly suspicious patterns that likely indicate malicious intent but may have alternate explanations, OR changes that match a mandatory flagging rule above
- **medium**: Unusual patterns that warrant investigation but could be legitimate
- **low**: Minor anomalies or code that seems out of place but has plausible explanations
**OUTPUT CONSTRAINTS:**
- Maximum 10 issues in the output
- Prioritize by severity: critical > high > medium > low
- If more than 10 issues found, include only the top 10 most severe
- In "counts.total", report the ACTUAL total number of issues found (even if truncated)
- Update the "truncated" boolean field to 'true' if issues were limited in output, else keep it 'false'
**IMPORTANT: Your response must be ONLY the raw JSON object. Do NOT wrap it in markdown code blocks. Do NOT add any explanation before or after. Start your response with { and end with }.**
**Required JSON format:**
{
"counts": {
"total": <number>,
"critical": <number>,
"high": <number>,
"medium": <number>,
"low": <number>
},
"truncated": <boolean>,
"issues": [
{
"path": "path/to/file",
"line": <number>,
"severity": "critical|high|medium|low",
"description": "Brief explanation of the issue"
}
]
}
EOF
)
cat "$DIFF_CONTENT_PATH" | claude -p "$PROMPT" > $DIFF_REPORT_PATH
json_repair $DIFF_REPORT_PATH --inline --strict
echo "Processing AI results"
diff_report_json=$(cat $DIFF_REPORT_PATH | jq -c '.')
echo "-------------------------------------------------------"
echo $diff_report_json | jq -r
echo "-------------------------------------------------------"
unset AWS_ACCESS_KEY_ID
unset AWS_SECRET_ACCESS_KEY
echo "Start issue count"
DIFF_ISSUE_TOTAL=$(echo $diff_report_json | jq -r .counts.total)
DIFF_ISSUE_CRITICAL=$(echo $diff_report_json | jq -r .counts.critical)
DIFF_ISSUE_HIGH=$(echo $diff_report_json | jq -r .counts.high)
DIFF_ISSUE_MEDIUM=$(echo $diff_report_json | jq -r .counts.medium)
DIFF_ISSUE_LOW=$(echo $diff_report_json | jq -r .counts.low)
DIFF_TRUNCATED=$(echo $diff_report_json | jq -r .truncated)
echo "Issue Count: Total($DIFF_ISSUE_TOTAL), Critical($DIFF_ISSUE_CRITICAL), High($DIFF_ISSUE_HIGH), Medium($DIFF_ISSUE_MEDIUM), Low($DIFF_ISSUE_LOW)"
echo "-------------------------------------------------------"
if [ "$DIFF_ISSUE_TOTAL" != "0" ]; then
echo "Diff analyzer found issues, generating report"
diff_report_temp=$(echo "$diff_report_json" | jq -r '
"<table><tr><th>Path</th><th>Line</th><th>Severity</th><th>Description</th></tr>" +
(
.issues | map(
"<tr><td>" + .path + "</td><td>" +
(.line | tostring) + "</td><td>" +
.severity + "</td><td>" +
.description + "</td></tr>"
) | join("")
) +
"</table><p>" +
"<i>The table above displays the top 10 most important findings.</i> <br/><br/>" +
"<strong>Total: " + (.counts.total | tostring) +
" | Critical: " + (.counts.critical | tostring) +
" | High: " + (.counts.high | tostring) +
" | Medium: " + (.counts.medium | tostring) +
" | Low: " + (.counts.low | tostring) + "</strong></p>"
')
echo "diff_report=$diff_report_temp" >> $GITHUB_ENV
else
echo "Diff analyzer did not find any specific issues yet"
echo "diff_report='No specific issues has been found by AI on this PR'" >> $GITHUB_ENV
fi
if [ "$DIFF_ISSUE_CRITICAL" != "0" ]; then
echo "diff_analyzer=4" >> $GITHUB_ENV
elif [ "$DIFF_ISSUE_HIGH" != "0" ]; then
echo "diff_analyzer=3" >> $GITHUB_ENV
elif [ "$DIFF_ISSUE_MEDIUM" != "0" ]; then
echo "diff_analyzer=2" >> $GITHUB_ENV
elif [ "$DIFF_ISSUE_LOW" != "0" ]; then
echo "diff_analyzer=1" >> $GITHUB_ENV
else
echo "diff_analyzer=0" >> $GITHUB_ENV
fi
# yamllint enable
- name: Check final status
id: step-final-status
run: |
echo "DIFF_ANALYZER_LEVELS=${{ env.diff_analyzer }}" >> $GITHUB_OUTPUT
echo "HEAD_COMMIT_SHA=${{ env.HEAD_SHA }}" >> $GITHUB_OUTPUT
if [ -z "${{ env.diff_analyzer }}" ] || [ "${{ env.diff_analyzer }}" = "5" ]; then
echo "Previous steps failed, no diff status"
elif [ "${{ env.diff_analyzer }}" = "0" ]; then
echo "Diff analyzer passed"
elif [ "${{ env.diff_analyzer }}" = "9" ]; then
echo "Diff analyzer skipped"
else
echo "Diff analyzer has found issues in the code changes per AI Analysis"
fi
if [ "${{ env.diff_analyzer }}" != '9' ] && [ "${{ env.diff_analyzer }}" -ge "${{ inputs.hard_fail_level }}" ]; then
echo "Hard fail diff analyzer at level ${{ inputs.hard_fail_level }}"
exit 1
fi
- name: Find existing PR Code Analyzer comment
if: ${{ always() && github.event_name == 'pull_request_target' && inputs.update_pr_comment_with_analyzer_report && env.diff_analyzer != '0' && env.diff_analyzer != '9' }}
id: find-comment
uses: peter-evans/find-comment@v3
with:
issue-number: ${{ github.event.number }}
comment-author: 'github-actions[bot]'
body-includes: '## PR Code Analyzer'
- name: Create Comment Failure Git Diff Analyzer
if: ${{ always() && github.event_name == 'pull_request_target' && inputs.update_pr_comment_with_analyzer_report && env.diff_analyzer != '0' && env.diff_analyzer != '9' }}
uses: peter-evans/create-or-update-comment@v5
with:
comment-id: ${{ steps.find-comment.outputs.comment-id }}
repository: ${{ github.repository }}
issue-number: ${{ github.event.number }}
edit-mode: replace
body: |
## PR Code Analyzer :exclamation:
**AI-powered '*Code-Diff-Analyzer*' found issues on commit ${{ env.HEAD_SHA }}.**
${{ env.diff_report }}
***
**Pull Requests Author(s)**: Please update your Pull Request according to the report above.
**Repository Maintainer(s)**: You can `bypass diff analyzer` by adding label `${{ inputs.skip_diff_analyzer_with_label_name }}` after reviewing the changes carefully, then `re-run failed actions`. To re-enable the analyzer, remove the label, then `re-run all actions`.
***
:warning: Note: The *Code-Diff-Analyzer* helps protect against potentially harmful code patterns. Please ensure you have thoroughly reviewed the changes beforehand.
Thanks.