-
-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathimport_and_patch_translators.py
More file actions
executable file
·438 lines (361 loc) · 13.3 KB
/
import_and_patch_translators.py
File metadata and controls
executable file
·438 lines (361 loc) · 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
#!/usr/bin/env python3
"""
Clone or add Zotero translators as a submodule under translators/zotero
and patch each .js file to ensure the initial JSON is commented and
append ES module exports.
Usage: python3 scripts/import_and_patch_translators.py
"""
import json
import re
import shutil
import subprocess
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
TARGET = ROOT / "translators" / "zotero"
ZOTERO_REPO = "https://github.com/zotero/translators"
ZOTERO_SUBMODULES = {
"translators/zotero": "esm",
"sources/zotero-translate": "async-sandbox",
"sources/zotero-utilities": "fix-import",
}
SANDBOX_PATH = "../../sources/sandbox.js"
REQUIRED_SANDBOX_IMPORTS = [
"ZU",
"Zotero",
"Z",
"text",
"requestJSON",
"requestText",
"attr",
]
FW_LINE_PREFIX = "/* FW LINE 59:b820c6d */"
TRANSLATOR_EXPORT_CANDIDATES = [
"detectWeb",
"doWeb",
"detectImport",
"doImport",
"detectSearch",
"doSearch",
"doExport",
]
def should_ignore(path: Path) -> bool:
name = path.name
# Ignore dotfiles and any path component starting with a dot (e.g., .ci/)
for part in path.parts:
if part.startswith("."):
return True
if name in ("jsconfig.json", "AGENTS.md"):
return True
return False
def run(cmd, **kwargs):
print(">", " ".join(cmd))
return subprocess.run(cmd, check=True, **kwargs)
def ensure_repo():
for submodule, branch in ZOTERO_SUBMODULES.items():
submodule_path = str(ROOT / submodule)
run(["git", "-C", submodule_path, "fetch", "upstream", "master"])
run(["git", "-C", submodule_path, "checkout", "master"])
run(["git", "-C", submodule_path, "pull", "--ff-only", "upstream", "master"])
run(["git", "-C", submodule_path, "push", "origin", "master"])
run(["git", "-C", submodule_path, "checkout", branch])
run(["git", "submodule", "update", "--remote", "--merge"])
def export_translator_info(text: str) -> tuple[str, bool]:
# Keep idempotent if already prefixed
if re.match(r"^\s*export\s+const\s+ZOTERO_TRANSLATOR_INFO\s*=", text):
return text, False
# If it starts with '{', convert that leading object to an exported declaration
s = text.lstrip()
prefix_ws = text[: len(text) - len(s)]
if not s.startswith("{"):
return text, False
text = prefix_ws + "export const ZOTERO_TRANSLATOR_INFO = " + s
return text, True
def _is_function_defined(text: str, fn_name: str) -> bool:
patterns = [
rf"(^|\n)\s*(?:async\s+)?function\s+{re.escape(fn_name)}\s*\(",
rf"(^|\n)\s*(?:var|let|const)\s+{re.escape(fn_name)}\s*=\s*(?:async\s+)?function\b",
rf"(^|\n)\s*(?:var|let|const)\s+{re.escape(fn_name)}\s*=\s*(?:async\s+)?\([^)]*\)\s*=>",
rf"(^|\n)\s*{re.escape(fn_name)}\s*=\s*(?:async\s+)?function\b",
]
return any(re.search(pattern, text) for pattern in patterns)
def _parse_generated_export_specs(specs_text: str) -> list[str]:
return [spec.strip() for spec in specs_text.split(",") if spec.strip()]
def _build_exports_body_from_specs(specs: list[str]) -> str:
entries = []
for spec in specs:
if " as " in spec:
local_name, export_name = [part.strip() for part in spec.split(" as ", 1)]
entries.append(f"{export_name}: {local_name}")
else:
entries.append(spec)
if not entries:
return ""
return " " + ", ".join(entries) + " "
def _extract_and_remove_exports_object(text: str) -> tuple[str, str | None, bool]:
m = re.search(r"(?:export\s+)?(?:var|let|const)\s+exports\s*=\s*\{", text)
if not m:
return text, None, False
declaration_start = m.start()
open_brace_index = text.find("{", declaration_start)
if open_brace_index == -1:
return text, None, False
i = open_brace_index
depth = 0
in_str = None
esc = False
close_brace_index = None
while i < len(text):
ch = text[i]
if in_str:
if esc:
esc = False
elif ch == "\\":
esc = True
elif ch == in_str:
in_str = None
else:
if ch == '"' or ch == "'":
in_str = ch
elif ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
close_brace_index = i
break
i += 1
if close_brace_index is None:
return text, None, False
body = text[open_brace_index + 1 : close_brace_index]
end = close_brace_index + 1
if end < len(text) and text[end] == ";":
end += 1
return text[:declaration_start] + text[end:], body, True
def _remove_generated_export_blocks(text: str) -> tuple[str, list[str], str | None, bool]:
generated_block_re = re.compile(
r"\n?(?:(?:// Export translator compatibility exports for adapter\n)+"
r"export\s+const\s+exports\s*=\s*\{([\s\S]*?)\};\n*)?"
r"// Export translator functions as ES module bindings for adapter\n"
r"export\s*\{([^}]*)\};\s*\Z",
re.MULTILINE,
)
m = generated_block_re.search(text)
if m:
return text[:m.start()].rstrip(), _parse_generated_export_specs(m.group(2)), m.group(1), True
compatibility_only_re = re.compile(
r"\n?(?:// Export translator compatibility exports for adapter\n)+"
r"export\s+const\s+exports\s*=\s*\{([\s\S]*?)\};\s*\Z",
re.MULTILINE,
)
m = compatibility_only_re.search(text)
if m:
return text[:m.start()].rstrip(), [], m.group(1), True
return text, [], None, False
def append_exports(text: str) -> tuple[str, bool]:
original_text = text
text, old_generated_specs, old_generated_exports_body, removed_generated_blocks = _remove_generated_export_blocks(text)
text, exports_body, removed_exports_object = _extract_and_remove_exports_object(text)
present = [fn for fn in TRANSLATOR_EXPORT_CANDIDATES if _is_function_defined(text, fn)]
specs: list[str] = []
seen_specs: set[str] = set()
for fn in present:
if fn not in seen_specs:
seen_specs.add(fn)
specs.append(fn)
compatibility_exports_body = exports_body or old_generated_exports_body
if compatibility_exports_body is None and old_generated_specs:
extra_specs = [spec for spec in old_generated_specs if spec not in seen_specs]
compatibility_exports_body = _build_exports_body_from_specs(extra_specs)
snippets = []
if compatibility_exports_body and compatibility_exports_body.strip():
snippets.append(
"\n// Export translator compatibility exports for adapter\n"
+ "export const exports = {"
+ compatibility_exports_body
+ "};\n"
)
if specs:
export_line = f"export {{ {', '.join(specs)} }};"
snippets.append(
"\n// Export translator functions as ES module bindings for adapter\n"
+ export_line
+ "\n"
)
if not snippets:
return text, text != original_text
candidate = text.rstrip() + "".join(snippets)
if candidate == original_text or candidate + "\n" == original_text:
return original_text, False
return candidate, True
def ensure_sandbox_import(text: str) -> tuple[str, bool]:
import_re = re.compile(
r"^\s*import\s*\{\s*([^}]*)\}\s*from\s*[\"'].*sandbox\.js[\"'];?\s*$",
re.MULTILINE,
)
match = import_re.search(text)
if match:
new_line = f'import {{ {", ".join(REQUIRED_SANDBOX_IMPORTS)} }} from "{SANDBOX_PATH}";'
old_line = match.group(0)
if old_line.strip() == new_line:
return text, False
return text.replace(old_line, new_line), True
import_line = (
f'import {{ {", ".join(REQUIRED_SANDBOX_IMPORTS)} }} from "{SANDBOX_PATH}";\n\n'
)
return import_line + text, True
def has_fw_line(text: str) -> bool:
return any(line.lstrip().startswith(FW_LINE_PREFIX) for line in text.splitlines())
def extract_json_from_text(text: str):
"""
Try to parse JSON from the provided text. First attempt a direct json.loads,
otherwise try to locate a balanced {...} substring and parse that.
Returns the parsed object on success, or None on failure.
"""
try:
return json.loads(text)
except Exception:
s = text.strip()
if not s:
return None
start = s.find("{")
if start == -1:
return None
i = start
depth = 0
in_str = None
esc = False
end = None
while i < len(s):
ch = s[i]
if in_str:
if esc:
esc = False
elif ch == "\\":
esc = True
elif ch == in_str:
in_str = None
else:
if ch == '"' or ch == "'":
in_str = ch
elif ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
end = i
break
i += 1
if end is None:
return None
try:
return json.loads(s[start : end + 1])
except Exception:
return None
def extract_declared_translator_info(text: str):
m = re.search(
r"(?:export\s+)?(?:const|let|var)\s+ZOTERO_TRANSLATOR_INFO\s*=\s*",
text,
)
if not m:
return None
return extract_json_from_text(text[m.end() :])
def process_file(path: Path) -> tuple[bool, bool, bool, bool]:
commented = False
imported = False
exported = False
text = path.read_text(encoding="utf-8")
# Delete old translators that still use the deprecated "Zotero Framework"
# These are not valid esm, and are deprecated anyway: https://github.com/zotero/translators/issues/3105
if has_fw_line(text):
path.unlink()
return commented, imported, exported, True
text, commented = export_translator_info(text)
text, imported = ensure_sandbox_import(text)
text, exported = append_exports(text)
if commented or imported or exported:
path.write_text(text, encoding="utf-8")
return commented, imported, exported, False
def patch_all():
js_files = [f for f in TARGET.rglob("*.js") if not should_ignore(f)]
if not js_files:
print("No .js files found under", TARGET)
return
total = 0
commented_count = 0
imported_count = 0
exported_count = 0
deleted_count = 0
for f in js_files:
total += 1
try:
commented, imported, exported, deleted = process_file(f)
if commented:
commented_count += 1
if imported:
imported_count += 1
if exported:
exported_count += 1
if deleted:
deleted_count += 1
except Exception as e:
print("Error processing", f, e)
print(
f"Processed {total} files: deleted {deleted_count}, commented {commented_count}, sandbox imports updated {imported_count}, exports updated {exported_count}"
)
def generate_manifest():
"""Scan translators in TARGET and generate translators/manifest.json
by extracting the leading commented JSON header from each .js file.
"""
out = []
for f in sorted(TARGET.rglob("*.js")):
if should_ignore(f):
continue
try:
txt = f.read_text(encoding="utf-8")
except Exception:
continue
header = extract_declared_translator_info(txt) or None
rel = f.relative_to(ROOT).as_posix()
entry = {
"path": rel,
"label": f.stem,
}
if header:
if "label" in header:
entry["label"] = header.get("label")
if "translatorID" in header:
entry["translatorID"] = header.get("translatorID")
if "target" in header:
entry["target"] = header.get("target")
if "browserSupport" in header:
entry["browserSupport"] = header.get("browserSupport")
if "translatorType" in header:
entry["translatorType"] = header.get("translatorType")
if "creator" in header:
entry["creator"] = header.get("creator")
if "priority" in header:
entry["priority"] = header.get("priority")
if "lastUpdated" in header:
entry["lastUpdated"] = header.get("lastUpdated")
if "minVersion" in header and header.get("minVersion") is not None:
entry["minVersion"] = header.get("minVersion")
if "maxVersion" in header and header.get("maxVersion") is not None:
entry["maxVersion"] = header.get("maxVersion")
out.append(entry)
manifest_path = ROOT / "translators" / "manifest.json"
try:
manifest_path.write_text(json.dumps(out, indent=2) + "\n", encoding="utf-8")
print("Wrote manifest to", manifest_path)
except Exception as e:
print("Failed to write manifest:", e)
def main():
try:
ensure_repo()
except Exception as e:
print("Error ensuring repo:", e)
sys.exit(1)
patch_all()
generate_manifest()
if __name__ == "__main__":
main()