Skip to content

Commit 3cdfd50

Browse files
Fix race condition on GitHub authors (#177)
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
1 parent a4c3f49 commit 3cdfd50

4 files changed

Lines changed: 152 additions & 191 deletions

File tree

plugin/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
22

3-
__version__ = "0.2.3"
3+
__version__ = "0.2.4"
44

55
from .main import MetaPlugin
66
from .postprocess import postprocess_site

plugin/postprocess.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import plugin.processor as processor
1818
from plugin.processor import process_html
19+
from plugin.utils import resolve_all_authors
1920

2021
# Shared worker state for process pools (avoids re-pickling large read-only data per task)
2122
_WORKER_STATE: dict[str, Any] | None = None
@@ -37,7 +38,6 @@ def _process_file(html_file: Path) -> bool:
3738
_WORKER_STATE["repo_url"],
3839
site_url=_WORKER_STATE["site_url"],
3940
default_image=_WORKER_STATE["default_image"],
40-
default_author=_WORKER_STATE["default_author"],
4141
add_desc=_WORKER_STATE["add_desc"],
4242
add_image=_WORKER_STATE["add_image"],
4343
add_keywords=_WORKER_STATE["add_keywords"],
@@ -59,7 +59,6 @@ def process_html_file(
5959
repo_url: str | None,
6060
site_url: str = "",
6161
default_image: str | None = None,
62-
default_author: str | None = None,
6362
add_desc: bool = True,
6463
add_image: bool = True,
6564
add_keywords: bool = True,
@@ -114,7 +113,6 @@ def process_html_file(
114113
git_data=git_data,
115114
repo_url=repo_url,
116115
default_image=default_image,
117-
default_author=default_author,
118116
keywords=keywords,
119117
add_desc=add_desc,
120118
add_image=add_image,
@@ -184,6 +182,9 @@ def postprocess_site(
184182
git_data = None
185183
if (add_authors or add_json_ld) and md_index:
186184
repo_url, git_data = processor.build_git_map(list(md_index.values()))
185+
# Resolve all authors ONCE in main process before spawning workers
186+
# This prevents race conditions when workers try to write to the cache file
187+
git_data = resolve_all_authors(git_data, default_author=default_author, repo_url=repo_url, verbose=verbose)
187188

188189
progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
189190
# Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
@@ -196,7 +197,6 @@ def postprocess_site(
196197
repo_url=repo_url,
197198
site_url=site_url,
198199
default_image=default_image,
199-
default_author=default_author,
200200
add_desc=add_desc,
201201
add_image=add_image,
202202
add_keywords=add_keywords,

plugin/processor.py

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,7 @@
1313

1414
from bs4 import BeautifulSoup
1515

16-
from plugin.utils import (
17-
calculate_time_difference,
18-
get_github_usernames_from_file,
19-
get_youtube_video_ids,
20-
)
16+
from plugin.utils import calculate_time_difference, get_youtube_video_ids
2117

2218
today = datetime.now()
2319
DEFAULT_CREATION_DATE = (today - timedelta(days=365)).strftime("%Y-%m-%d %H:%M:%S +0000")
@@ -30,11 +26,9 @@
3026
def get_git_info(
3127
file_path: str,
3228
add_authors: bool = True,
33-
default_author: str | None = None,
3429
git_data: dict[str, dict[str, Any]] | None = None,
35-
repo_url: str | None = None,
3630
) -> dict[str, Any]:
37-
"""Retrieve git information (dates + optional authors) from precomputed git data."""
31+
"""Retrieve git information (dates + pre-resolved authors) from precomputed git data."""
3832
file_path = str(Path(file_path).resolve())
3933
git_info = {
4034
"creation_date": DEFAULT_CREATION_DATE,
@@ -45,29 +39,12 @@ def get_git_info(
4539
return git_info
4640

4741
cached = git_data[file_path]
48-
git_info.update(
49-
{
50-
"creation_date": cached.get("creation_date", DEFAULT_CREATION_DATE),
51-
"last_modified_date": cached.get("last_modified_date", DEFAULT_MODIFIED_DATE),
52-
}
53-
)
54-
55-
if add_authors and cached.get("emails"):
56-
git_info["authors"] = sorted(
57-
[
58-
(
59-
author,
60-
info["url"],
61-
info["changes"],
62-
info["avatar"],
63-
)
64-
for author, info in get_github_usernames_from_file(
65-
file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
66-
).items()
67-
],
68-
key=lambda x: x[2],
69-
reverse=True,
70-
)
42+
git_info["creation_date"] = cached.get("creation_date", DEFAULT_CREATION_DATE)
43+
git_info["last_modified_date"] = cached.get("last_modified_date", DEFAULT_MODIFIED_DATE)
44+
45+
# Authors are pre-resolved by resolve_all_authors() in the main process
46+
if add_authors and "authors" in cached:
47+
git_info["authors"] = cached["authors"]
7148

7249
return git_info
7350

@@ -309,7 +286,6 @@ def process_html(
309286
git_data: dict[str, dict[str, Any]] | None = None,
310287
repo_url: str | None = None,
311288
default_image: str | None = None,
312-
default_author: str | None = None,
313289
keywords: str | None = None,
314290
add_desc: bool = True,
315291
add_image: bool = True,
@@ -493,9 +469,7 @@ def process_html(
493469
needs_git = (add_authors or add_json_ld) and src_path
494470

495471
if needs_git:
496-
git_info = get_git_info(
497-
src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
498-
)
472+
git_info = get_git_info(src_path, add_authors=add_authors, git_data=git_data)
499473

500474
# Only render git footer if we have real git history (not placeholder defaults)
501475
has_real_git_data = (

0 commit comments

Comments
 (0)