Skip to content

Commit 3eee73e

Browse files
committed
ci: include high prevalence files
1 parent 3920493 commit 3eee73e

2 files changed

Lines changed: 158 additions & 23 deletions

File tree

.github/workflows/include-analysis-edges.yml

Lines changed: 100 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ on:
88
permissions: {}
99

1010
jobs:
11-
by_added_size:
12-
name: By added size
11+
by_edge_added_size:
12+
name: By edge added size
1313
runs-on: ubuntu-latest
1414
steps:
1515
- name: Checkout
@@ -27,7 +27,7 @@ jobs:
2727
curl https://commondatastorage.googleapis.com/chromium-browser-clang/include-analysis.js > include-analysis.js
2828
- name: Find Heavy Include edges
2929
run: |
30-
python extract_include_analysis_edges.py --filter-generated-files --filter-third-party --weight-threshold 75000000 > heavy-includes.csv
30+
python extract_include_analysis_edges.py --edges --filter-generated-files --filter-third-party --weight-threshold 75000000 > heavy-includes.csv
3131
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
3232
with:
3333
name: heavy-includes
@@ -105,8 +105,98 @@ jobs:
105105
106106
await core.summary.write();
107107
108-
by_prevalence:
109-
name: By prevalence
108+
by_file_prevalence:
109+
name: By file prevalence
110+
runs-on: ubuntu-latest
111+
steps:
112+
- name: Checkout
113+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
114+
with:
115+
repository: dsanders11/chromium-include-cleanup
116+
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
117+
with:
118+
python-version: '3.12'
119+
cache: 'pip'
120+
- name: Install Dependencies
121+
run: pip install -r requirements.txt
122+
- name: Download Include Analysis Output
123+
run: |
124+
curl https://commondatastorage.googleapis.com/chromium-browser-clang/include-analysis.js > include-analysis.js
125+
- name: Find Prevalent Files
126+
run: |
127+
python extract_include_analysis_edges.py --filter-generated-files --filter-third-party --metric prevalence --weight-threshold 10 > prevalent-files.csv
128+
- run: npm install @actions/cache
129+
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
130+
with:
131+
script: |
132+
const fs = require('node:fs');
133+
const cache = require('@actions/cache');
134+
135+
const data = fs.readFileSync('./prevalent-files.csv', 'utf8').trim();
136+
137+
const files = await Promise.all(data.split('\n').map(async (line, idx) => {
138+
const [filename, added_size, prevalence, expanded_size] = line.trim().split(',');
139+
140+
// Check if this is known from a previous run
141+
const cacheKey = `prevalent-chromium-file-${filename}`;
142+
const cacheHit =
143+
(await cache.restoreCache(['/dev/null'], cacheKey, undefined, {
144+
lookupOnly: true,
145+
})) !== undefined;
146+
147+
if (!cacheHit) {
148+
// Create a cache entry (only the name matters) to keep track of
149+
// includes we've seen from previous runs to mark them as stale
150+
await cache.saveCache(['/dev/null'], cacheKey);
151+
}
152+
153+
return [
154+
filename,
155+
parseInt(added_size),
156+
parseFloat(prevalence),
157+
parseInt(expanded_size),
158+
cacheHit,
159+
];
160+
}));
161+
162+
const addTable = (files) => {
163+
core.summary.addTable([
164+
[
165+
{ data: 'File', header: true },
166+
{ data: 'Prevalence', header: true },
167+
{ data: 'Added Size', header: true },
168+
{ data: 'Expanded Size', header: true },
169+
],
170+
// Sort by prevalence, then convert it back to string or it won't render
171+
...files
172+
.sort((a, b) => b[2] - a[2])
173+
.map(([filename, added_size, prevalence, expandedSize]) => [
174+
filename,
175+
`${prevalence.toFixed(2)}%`,
176+
added_size.toLocaleString(),
177+
expandedSize.toLocaleString(),
178+
]),
179+
]);
180+
}
181+
182+
core.summary.addHeading('🔗 Prevalent Chromium Files');
183+
core.summary.addRaw(`Found ${files.length} files at 10%+ prevalence`);
184+
185+
const newlySeen = files.filter(([, , , , cacheHit]) => !cacheHit)
186+
if (newlySeen.length > 0) {
187+
core.summary.addHeading('Not Seen Before', '2');
188+
addTable(newlySeen);
189+
core.summary.addHeading('All Files', '2');
190+
} else {
191+
core.summary.addBreak();
192+
}
193+
194+
addTable(files);
195+
196+
await core.summary.write();
197+
198+
by_edge_prevalence:
199+
name: By edge prevalence
110200
runs-on: ubuntu-latest
111201
steps:
112202
- name: Checkout
@@ -124,7 +214,7 @@ jobs:
124214
curl https://commondatastorage.googleapis.com/chromium-browser-clang/include-analysis.js > include-analysis.js
125215
- name: Find Prevalent Include edges
126216
run: |
127-
python extract_include_analysis_edges.py --filter-generated-files --filter-third-party --metric prevalence --weight-threshold 30 > prevalent-includes.csv
217+
python extract_include_analysis_edges.py --edges --filter-generated-files --filter-third-party --metric prevalence --weight-threshold 30 > prevalent-includes.csv
128218
- run: npm install @actions/cache
129219
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
130220
with:
@@ -198,8 +288,8 @@ jobs:
198288
199289
await core.summary.write();
200290
201-
by_centrality:
202-
name: By centrality
291+
by_edge_centrality:
292+
name: By edge centrality
203293
runs-on: ubuntu-latest
204294
steps:
205295
- name: Checkout
@@ -215,9 +305,9 @@ jobs:
215305
- name: Download Include Analysis Output
216306
run: |
217307
curl https://commondatastorage.googleapis.com/chromium-browser-clang/include-analysis.js > include-analysis.js
218-
- name: Find Prevalent Include edges
308+
- name: Find High Centrality Include edges
219309
run: |
220-
python extract_include_analysis_edges.py --filter-generated-files --filter-third-party --metric centrality --weight-threshold 0.04 > high-centrality-includes.csv
310+
python extract_include_analysis_edges.py --edges --filter-generated-files --filter-third-party --metric centrality --weight-threshold 0.04 > high-centrality-includes.csv
221311
- run: npm install @actions/cache
222312
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
223313
with:
Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
import csv
55
import logging
66
import os
7-
import re
87
import sys
9-
from datetime import datetime
108

119
from include_analysis import IncludeAnalysisOutput, ParseError, load_include_analysis
1210
from suggest_include_changes import filter_filenames
@@ -59,8 +57,43 @@ def extract_include_analysis_edges(
5957
yield file, include, size, prevalence, expanded_size, centrality
6058

6159

60+
def extract_include_analysis_files(
61+
include_analysis: IncludeAnalysisOutput,
62+
metric: str = None,
63+
weight_threshold=None,
64+
filter_generated_files=False,
65+
filter_mojom_headers=False,
66+
filter_third_party=False,
67+
):
68+
filenames = filter_filenames(
69+
include_analysis["files"],
70+
filter_generated_files=filter_generated_files,
71+
filter_mojom_headers=filter_mojom_headers,
72+
filter_third_party=filter_third_party,
73+
)
74+
75+
for file in filenames:
76+
size = include_analysis["asizes"][file]
77+
expanded_size = include_analysis["tsizes"][file]
78+
prevalence = include_analysis["prevalence"][file]
79+
80+
if metric == "input_size":
81+
weight = size
82+
elif metric == "expanded_size":
83+
weight = expanded_size
84+
elif metric == "prevalence":
85+
weight = prevalence
86+
else:
87+
weight = None
88+
89+
if weight_threshold and weight is not None and float(weight) < weight_threshold:
90+
continue
91+
92+
yield file, size, prevalence, expanded_size
93+
94+
6295
def main():
63-
parser = argparse.ArgumentParser(description="Extract include edges from include analysis, with filtering")
96+
parser = argparse.ArgumentParser(description="Extract metrics from include analysis, with filtering")
6497
parser.add_argument(
6598
"include_analysis_output",
6699
type=str,
@@ -71,14 +104,15 @@ def main():
71104
"--metric",
72105
choices=["centrality", "expanded_size", "input_size", "prevalence"],
73106
default="input_size",
74-
help="Metric to use for edge weights.",
107+
help="Metric to use for weights.",
75108
)
76109
parser.add_argument(
77110
"--weight-threshold", type=float, help="Filter out changes with a weight value below the threshold."
78111
)
79112
parser.add_argument(
80113
"--filter-third-party", action="store_true", help="Filter out third_party/ (excluding blink) and v8."
81114
)
115+
parser.add_argument("--edges", action="store_true", help="Output metrics about edges.")
82116
parser.add_argument("--filter-generated-files", action="store_true", help="Filter out generated files.")
83117
parser.add_argument("--filter-mojom-headers", action="store_true", help="Filter out mojom headers.")
84118
group = parser.add_mutually_exclusive_group()
@@ -103,15 +137,26 @@ def main():
103137
csv_writer = csv.writer(sys.stdout)
104138

105139
try:
106-
for row in extract_include_analysis_edges(
107-
include_analysis,
108-
metric=args.metric,
109-
weight_threshold=args.weight_threshold,
110-
filter_generated_files=args.filter_generated_files,
111-
filter_mojom_headers=args.filter_mojom_headers,
112-
filter_third_party=args.filter_third_party,
113-
):
114-
csv_writer.writerow(row)
140+
if args.edges:
141+
for row in extract_include_analysis_edges(
142+
include_analysis,
143+
metric=args.metric,
144+
weight_threshold=args.weight_threshold,
145+
filter_generated_files=args.filter_generated_files,
146+
filter_mojom_headers=args.filter_mojom_headers,
147+
filter_third_party=args.filter_third_party,
148+
):
149+
csv_writer.writerow(row)
150+
else:
151+
for row in extract_include_analysis_files(
152+
include_analysis,
153+
metric=args.metric,
154+
weight_threshold=args.weight_threshold,
155+
filter_generated_files=args.filter_generated_files,
156+
filter_mojom_headers=args.filter_mojom_headers,
157+
filter_third_party=args.filter_third_party,
158+
):
159+
csv_writer.writerow(row)
115160

116161
sys.stdout.flush()
117162
except BrokenPipeError:

0 commit comments

Comments
 (0)