opensearch-build/.github/workflows/code-diff-analyzer.yml at main · opensearch-project/opensearch-build · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
---
name: Code Diff Analyzer
on:
  workflow_call:
    secrets:
      BEDROCK_ACCESS_ROLE:
        required: true
    inputs:
      skip_diff_analyzer_on_push:  # Only update if event is push
        required: false
        type: boolean
        default: true
      skip_diff_analyzer_with_label_name:  # Only update if event is pull_request_target
        required: false
        type: string
        default: 'skip-diff-analyzer'
      update_pr_comment_with_analyzer_report:  # Only update if event is pull_request_target
        required: false
        type: boolean
        default: false
      hard_fail_level:
        required: false
        type: number
        default: 3  # hard fail the workflow on issues with 'high' severity or above
    outputs:
      # 0 = no issues found
      # 1 = Low severity issues found
      # 2 = Medium severity issues found
      # 3 = High severity issues found
      # 4 = Critical severity issues found
      # 5 = Analizer failed early
      # 9 = Analizer skipped
      DIFF_ANALYZER_LEVELS:
        description: Code diff analyzer levels to output (0/1/2/3/4/5/9)
        value: ${{ jobs.Code-Diff-Analyzer.outputs.OUTPUT_DIFF_ANALYZER_LEVELS }}
      HEAD_COMMIT_SHA:
        description: HEAD commit sha value to output (commit id)
        value: ${{ jobs.Code-Diff-Analyzer.outputs.OUTPUT_HEAD_COMMIT_SHA }}

jobs:
  Code-Diff-Analyzer:
    runs-on: ubuntu-latest
    permissions:
      id-token: write  # github oidc to assume aws roles
      pull-requests: write  # to create or update comment (peter-evans/create-or-update-comment)
    env:
      AWS_REGION: 'us-east-1'
      ANTHROPIC_MODEL: 'us.anthropic.claude-sonnet-4-6'
      ANTHROPIC_SMALL_FAST_MODEL: 'us.anthropic.claude-haiku-4-5-20251001-v1:0'
      CLAUDE_CODE_USE_BEDROCK: '1'
      CLAUDE_CODE_MAX_INPUT_TOKENS: '160000'
      CLAUDE_CODE_MAX_OUTPUT_TOKENS: '4096'
      MAX_THINKING_TOKENS: '1024'
      DIFF_CONTENT_PATH: 'git_diff_content.txt'
      DIFF_REPORT_PATH: 'git_diff_analyzer.json'
    timeout-minutes: 10
    outputs:
      OUTPUT_DIFF_ANALYZER_LEVELS: ${{ steps.step-final-status.outputs.DIFF_ANALYZER_LEVELS }}
      OUTPUT_HEAD_COMMIT_SHA: ${{ steps.step-final-status.outputs.HEAD_COMMIT_SHA }}
    steps:
      - name: Retrive ref sha
        run: |
          if [ "${{ github.event_name }}" = "push" ]; then
            base_sha="${{ github.event.before }}"
            head_sha="${{ github.event.after }}"
            if [ "${{ inputs.skip_diff_analyzer_on_push }}" = "true" ]; then
              echo "Diff analyzer skipped due to inputs.skip_diff_analyzer_on_push is set to 'true'"
              echo "diff_analyzer=9" >> $GITHUB_ENV
            fi
          elif [ "${{ github.event_name }}" = "pull_request_target" ]; then
            base_sha=${{ github.event.pull_request.base.sha }}
            head_sha=${{ github.event.pull_request.head.sha }}
            PR_LABELS=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
              -H "Accept: application/vnd.github.v3+json" \
              "https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.number }}/labels" \
              | jq -r '.[].name')
            echo "labels: $PR_LABELS"
            echo "Verifying skip label: ${{ inputs.skip_diff_analyzer_with_label_name }}"
            if [ -n "$PR_LABELS" ]; then
              for label in $PR_LABELS
              do
                if [[ "$label" = "${{ inputs.skip_diff_analyzer_with_label_name }}" ]]; then
                  echo "Diff analyzer skipped due to label ${{ inputs.skip_diff_analyzer_with_label_name }}."
                  echo "diff_analyzer=9" >> $GITHUB_ENV
                  break
                fi
              done
            fi
          else
            echo "wrong github event: ${{ github.event_name }}, must be 'push' or 'pull_request_target'"
            echo "diff_analyzer=5" >> $GITHUB_ENV
            echo "diff_report='wrong github event'" >> $GITHUB_ENV
            exit 1
          fi

          echo "BASE_SHA=$base_sha" >> $GITHUB_ENV
          echo "HEAD_SHA=$head_sha" >> $GITHUB_ENV

      - name: Get diff details
        if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
        run: |
          echo "Get diff between base($BASE_SHA) and head($HEAD_SHA)"
          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
            -H "Accept: application/vnd.github.v3.diff" \
            "https://api.github.com/repos/${{ github.repository }}/compare/${BASE_SHA}...${HEAD_SHA}" > $DIFF_CONTENT_PATH

          DIFF_SIZE=$(cat $DIFF_CONTENT_PATH | wc -c)
          DIFF_TOKEN_EST=$((DIFF_SIZE / 3))
          echo "Check diff size of '$DIFF_SIZE', token number '$DIFF_TOKEN_EST', max token limit '$CLAUDE_CODE_MAX_INPUT_TOKENS'"
          if [ "$DIFF_TOKEN_EST" -ge "$CLAUDE_CODE_MAX_INPUT_TOKENS" ]; then
            echo "Diff too large, requires skip by maintainers after manual review"
            echo "diff_analyzer=5" >> $GITHUB_ENV
            echo "diff_report='Diff too large, requires skip by maintainers after manual review'" >> $GITHUB_ENV
            exit 1
          fi
          echo "-------------------------------------------------------"
          cat $DIFF_CONTENT_PATH
          echo "-------------------------------------------------------"

      - uses: actions/setup-node@v6
        if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
        with:
          node-version: 24

      - name: Install necessary packages
        if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
        run: |
          npm install -g @anthropic-ai/claude-code@stable
          pip install json-repair==0.55.2

      - name: Configure AWS credentials
        if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
        uses: aws-actions/configure-aws-credentials@v6
        with:
          role-to-assume: ${{ secrets.BEDROCK_ACCESS_ROLE }}
          aws-region: us-east-1

      # yamllint disable
      - name: Verify file diffs
        if: ${{ env.diff_analyzer != '5' && env.diff_analyzer != '9' }}
        run: |
          PROMPT=$(cat <<-EOF
          	Analyze the git diff for MALICIOUS CODE and INTENTIONAL SECURITY THREATS.

          	**PRIMARY FOCUS: Detect deliberate attempts to compromise security, not coding mistakes.**

          	**MANDATORY RULE — SUPPLY CHAIN / DEPENDENCY CHANGES:**
          	- Any dependency, package registry, or build plugin change MUST be flagged as **high** severity
          	- Do NOT judge whether a dependency name looks "legitimate" — you cannot verify artifact authenticity
          	- Namespace hijacking and typosquatting are common attack techniques
          	- Always flag and let maintainers verify
          	- This applies to changes including but not limited to:
          	  - Adding, removing, or changing versions of libraries, packages, or modules
          	  - Adding or modifying package source URLs or repository configurations
          	  - Annotation processors, postinstall/prebuild scripts, build plugins, or compiler extensions
          	  - Modifying FROM directives or adding package installs in container files

          	**Review for security issues including but not limited to:**
          	- **Data exfiltration**: Unauthorized transmission of secrets, credentials, or sensitive data to external endpoints
          	- **Backdoors**: Hidden access mechanisms, debug modes left enabled, or authentication bypasses
          	- **Obfuscated logic**: Base64-encoded payloads, hex-encoded strings, heavily obfuscated code without clear purpose
          	- **Suspicious network calls**: Unexpected external API calls, DNS queries, or data transmission to unknown domains
          	- **Credential harvesting**: Code that captures, logs, or transmits credentials/tokens beyond normal usage
          	- **Time bombs/logic bombs**: Code triggered by specific dates, conditions, or external signals
          	- **Privilege escalation**: Deliberate attempts to gain elevated permissions or bypass authorization
          	- **Supply chain attacks**: Any dependency, package registry, or build plugin change (see mandatory rule above)
          	- **Environment manipulation**: Attempts to modify CI/CD variables, Jenkins secrets, or deployment configurations
          	- **Steganography or covert channels**: Hidden data transmission in images, comments, or metadata

          	**IMPORTANT DISTINCTIONS:**
          	- Ignore common coding mistakes (e.g., missing input validation unless clearly intentional)
          	- Focus on INTENT: Is this code deliberately trying to do something malicious?
          	- Consider context: Is this behavior justified by the feature being implemented?
          	- Flag anomalies: Code that seems unrelated to the stated PR purpose
          	- Dependency/package changes are NEVER considered justified by context — always flag as high

          	**Classify each issue by severity:**
          	- **critical**: Clear evidence of malicious intent with immediate security impact (data exfiltration, backdoors)
          	- **high**: Highly suspicious patterns that likely indicate malicious intent but may have alternate explanations, OR changes that match a mandatory flagging rule above
          	- **medium**: Unusual patterns that warrant investigation but could be legitimate
          	- **low**: Minor anomalies or code that seems out of place but has plausible explanations

          	**OUTPUT CONSTRAINTS:**
          	- Maximum 10 issues in the output
          	- Prioritize by severity: critical > high > medium > low
          	- If more than 10 issues found, include only the top 10 most severe
          	- In "counts.total", report the ACTUAL total number of issues found (even if truncated)
          	- Update the "truncated" boolean field to 'true' if issues were limited in output, else keep it 'false'

          	**IMPORTANT: Your response must be ONLY the raw JSON object. Do NOT wrap it in markdown code blocks. Do NOT add any explanation before or after. Start your response with { and end with }.**

          	**Required JSON format:**
          	{
          	  "counts": {
          	    "total": <number>,
          	    "critical": <number>,
          	    "high": <number>,
          	    "medium": <number>,
          	    "low": <number>
          	  },
          	  "truncated": <boolean>,
          	  "issues": [
          	    {
          	      "path": "path/to/file",
          	      "line": <number>,
          	      "severity": "critical|high|medium|low",
          	      "description": "Brief explanation of the issue"
          	    }
          	  ]
          	}

          	EOF
          )
          cat "$DIFF_CONTENT_PATH" | claude -p "$PROMPT" > $DIFF_REPORT_PATH
          json_repair $DIFF_REPORT_PATH --inline --strict
          echo "Processing AI results"
          diff_report_json=$(cat $DIFF_REPORT_PATH | jq -c '.')
          echo "-------------------------------------------------------"
          echo $diff_report_json | jq -r
          echo "-------------------------------------------------------"
          unset AWS_ACCESS_KEY_ID
          unset AWS_SECRET_ACCESS_KEY

          echo "Start issue count"
          DIFF_ISSUE_TOTAL=$(echo $diff_report_json | jq -r .counts.total)
          DIFF_ISSUE_CRITICAL=$(echo $diff_report_json | jq -r .counts.critical)
          DIFF_ISSUE_HIGH=$(echo $diff_report_json | jq -r .counts.high)
          DIFF_ISSUE_MEDIUM=$(echo $diff_report_json | jq -r .counts.medium)
          DIFF_ISSUE_LOW=$(echo $diff_report_json | jq -r .counts.low)
          DIFF_TRUNCATED=$(echo $diff_report_json | jq -r .truncated)
          echo "Issue Count: Total($DIFF_ISSUE_TOTAL), Critical($DIFF_ISSUE_CRITICAL), High($DIFF_ISSUE_HIGH), Medium($DIFF_ISSUE_MEDIUM), Low($DIFF_ISSUE_LOW)"
          echo "-------------------------------------------------------"

          if [ "$DIFF_ISSUE_TOTAL" != "0" ]; then
            echo "Diff analyzer found issues, generating report"

            diff_report_temp=$(echo "$diff_report_json" | jq -r '
              "<table><tr><th>Path</th><th>Line</th><th>Severity</th><th>Description</th></tr>" +
              (
                .issues | map(
                  "<tr><td>" + .path + "</td><td>" +
                  (.line | tostring) + "</td><td>" +
                  .severity + "</td><td>" +
                  .description + "</td></tr>"
                ) | join("")
              ) +
              "</table><p>" +
              "<i>The table above displays the top 10 most important findings.</i> <br/><br/>" +
              "<strong>Total: " + (.counts.total | tostring) +
              " | Critical: " + (.counts.critical | tostring) +
              " | High: " + (.counts.high | tostring) +
              " | Medium: " + (.counts.medium | tostring) +
              " | Low: " + (.counts.low | tostring) + "</strong></p>"
            ')
            echo "diff_report=$diff_report_temp" >> $GITHUB_ENV
          else
            echo "Diff analyzer did not find any specific issues yet"
            echo "diff_report='No specific issues has been found by AI on this PR'" >> $GITHUB_ENV
          fi

          if [ "$DIFF_ISSUE_CRITICAL" != "0" ]; then
            echo "diff_analyzer=4" >> $GITHUB_ENV
          elif [ "$DIFF_ISSUE_HIGH" != "0" ]; then
            echo "diff_analyzer=3" >> $GITHUB_ENV
          elif [ "$DIFF_ISSUE_MEDIUM" != "0" ]; then
            echo "diff_analyzer=2" >> $GITHUB_ENV
          elif [ "$DIFF_ISSUE_LOW" != "0" ]; then
            echo "diff_analyzer=1" >> $GITHUB_ENV
          else
            echo "diff_analyzer=0" >> $GITHUB_ENV
          fi
      # yamllint enable

      - name: Check final status
        id: step-final-status
        run: |
          echo "DIFF_ANALYZER_LEVELS=${{ env.diff_analyzer }}" >> $GITHUB_OUTPUT
          echo "HEAD_COMMIT_SHA=${{ env.HEAD_SHA }}"  >> $GITHUB_OUTPUT

          if [ -z "${{ env.diff_analyzer }}" ] || [ "${{ env.diff_analyzer }}" = "5" ]; then
            echo "Previous steps failed, no diff status"
          elif [ "${{ env.diff_analyzer }}" = "0" ]; then
            echo "Diff analyzer passed"
          elif [ "${{ env.diff_analyzer }}" = "9" ]; then
            echo "Diff analyzer skipped"
          else
            echo "Diff analyzer has found issues in the code changes per AI Analysis"
          fi

          if [ "${{ env.diff_analyzer }}" != '9' ] && [ "${{ env.diff_analyzer }}" -ge "${{ inputs.hard_fail_level }}" ]; then
            echo "Hard fail diff analyzer at level ${{ inputs.hard_fail_level }}"
            exit 1
          fi

      - name: Find existing PR Code Analyzer comment
        if: ${{ always() && github.event_name == 'pull_request_target' && inputs.update_pr_comment_with_analyzer_report && env.diff_analyzer != '0' && env.diff_analyzer != '9' }}
        id: find-comment
        uses: peter-evans/find-comment@v3
        with:
          issue-number: ${{ github.event.number }}
          comment-author: 'github-actions[bot]'
          body-includes: '## PR Code Analyzer'

      - name: Create Comment Failure Git Diff Analyzer
        if: ${{ always() && github.event_name == 'pull_request_target' && inputs.update_pr_comment_with_analyzer_report && env.diff_analyzer != '0' && env.diff_analyzer != '9' }}
        uses: peter-evans/create-or-update-comment@v5
        with:
          comment-id: ${{ steps.find-comment.outputs.comment-id }}
          repository: ${{ github.repository }}
          issue-number: ${{ github.event.number }}
          edit-mode: replace
          body: |
            ## PR Code Analyzer :exclamation:

            **AI-powered '*Code-Diff-Analyzer*' found issues on commit ${{ env.HEAD_SHA }}.**

            ${{ env.diff_report }}

            ***

            **Pull Requests Author(s)**: Please update your Pull Request according to the report above.

            **Repository Maintainer(s)**: You can `bypass diff analyzer` by adding label `${{ inputs.skip_diff_analyzer_with_label_name }}` after reviewing the changes carefully, then `re-run failed actions`. To re-enable the analyzer, remove the label, then `re-run all actions`.

            ***

            :warning: Note: The *Code-Diff-Analyzer* helps protect against potentially harmful code patterns. Please ensure you have thoroughly reviewed the changes beforehand.

            Thanks.