Skip to content

Commit 0839775

Browse files
committed
chore: cache latest include analysis to minimize refetches
1 parent aacb86a commit 0839775

2 files changed

Lines changed: 37 additions & 4 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,5 @@ dmypy.json
130130

131131
*.csv
132132
*.dot
133+
134+
.cached-include-analysis

include_analysis.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import re
33
import sys
44
import urllib.request
5+
from pathlib import Path
56
from typing import Dict, List, Optional, TypedDict
67

78
DATA_REGEX = re.compile(r".*<script>\n?(data = .*?)<\/script>", re.DOTALL)
@@ -130,11 +131,41 @@ def parse_raw_include_analysis_output(output: str) -> Optional[IncludeAnalysisOu
130131

131132

132133
def get_latest_include_analysis():
133-
response = urllib.request.urlopen(
134-
"https://commondatastorage.googleapis.com/chromium-browser-clang/include-analysis.js"
135-
)
134+
cached_file_path = Path(__file__).resolve().parent.joinpath(".cached-include-analysis")
135+
url = "https://commondatastorage.googleapis.com/chromium-browser-clang/include-analysis.js"
136136

137-
return response.read().decode("utf8")
137+
etag = None
138+
raw_include_analysis = None
139+
140+
if cached_file_path.exists():
141+
with open(cached_file_path, "r") as f:
142+
[etag, raw_include_analysis] = f.read().split("\n", 1)
143+
144+
try:
145+
# Make request with ETag if available
146+
request = urllib.request.Request(url)
147+
if etag:
148+
request.add_header("If-None-Match", etag)
149+
150+
response = urllib.request.urlopen(request)
151+
152+
# If we get here, there's new content (200 OK)
153+
raw_include_analysis = response.read().decode("utf8")
154+
155+
# Save the new content with ETag on first line
156+
new_etag = response.headers.get("ETag", "")
157+
with open(cached_file_path, "w") as f:
158+
f.write(f"{new_etag}\n{raw_include_analysis}")
159+
except urllib.error.HTTPError as e:
160+
# If not "304 Not Modified", fall back to cache if available, else raise the error
161+
if e.code != 304 and not raw_include_analysis:
162+
raise
163+
except urllib.error.URLError:
164+
# If there's a network error, fall back to cache if available, else raise the error
165+
if not raw_include_analysis:
166+
raise
167+
168+
return raw_include_analysis
138169

139170

140171
def extract_include_analysis(contents: str) -> str:

0 commit comments

Comments
 (0)