Skip to content

Commit dcbd0d4

Browse files
Single git call for authors and dates (#172)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
1 parent b42d527 commit dcbd0d4

5 files changed

Lines changed: 193 additions & 82 deletions

File tree

plugin/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
22

3-
__version__ = "0.2.1"
3+
__version__ = "0.2.2"
44

55
from .main import MetaPlugin
66
from .postprocess import postprocess_site

plugin/main.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22

33
from __future__ import annotations
44

5+
from pathlib import Path
6+
57
from mkdocs.config import config_options
68
from mkdocs.plugins import BasePlugin
79

10+
import plugin.processor as processor
811
from plugin.processor import process_html
912

1013

@@ -26,6 +29,22 @@ class MetaPlugin(BasePlugin):
2629
("add_copy_llm", config_options.Type(bool, default=True)),
2730
)
2831

32+
def __init__(self):
33+
super().__init__()
34+
self.git_repo_url = None
35+
self.git_data = None
36+
37+
def on_config(self, config):
38+
"""Prepare git metadata once for all pages if authors/JSON-LD are enabled."""
39+
if not self.config.get("enabled", True):
40+
return config
41+
42+
if self.config.get("add_authors") or self.config.get("add_json_ld"):
43+
docs_dir = Path(config["docs_dir"])
44+
md_files = [str(p) for p in docs_dir.rglob("*.md")] if docs_dir.exists() else []
45+
self.git_repo_url, self.git_data = processor.build_git_map(md_files)
46+
return config
47+
2948
def on_post_page(self, output: str, page, config) -> str:
3049
"""Enhance HTML output by delegating to shared processor."""
3150
if not self.config["enabled"]:
@@ -47,6 +66,8 @@ def on_post_page(self, output: str, page, config) -> str:
4766
page_url=page_url,
4867
title=title,
4968
src_path=page.file.abs_src_path,
69+
git_data=self.git_data,
70+
repo_url=self.git_repo_url,
5071
default_image=self.config["default_image"],
5172
default_author=self.config["default_author"],
5273
keywords=keywords,

plugin/postprocess.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,24 @@
33

44
from __future__ import annotations
55

6+
from collections.abc import Callable
67
from pathlib import Path
78

9+
try:
10+
from ultralytics.utils import TQDM # progress bars
11+
except ImportError:
12+
TQDM = None
13+
14+
import plugin.processor as processor
815
from plugin.processor import process_html
916

1017

1118
def process_html_file(
1219
html_path: Path,
1320
site_dir: Path,
1421
md_index: dict[str, str],
22+
git_data: dict[str, dict[str, str | dict]] | None,
23+
repo_url: str | None,
1524
site_url: str = "",
1625
default_image: str | None = None,
1726
default_author: str | None = None,
@@ -24,6 +33,7 @@ def process_html_file(
2433
add_css: bool = True,
2534
add_copy_llm: bool = True,
2635
verbose: bool = False,
36+
log: Callable[[str], None] | None = print,
2737
) -> bool:
2838
"""Process a single HTML file by delegating to shared processor.
2939
@@ -35,8 +45,8 @@ def process_html_file(
3545
try:
3646
html = html_path.read_text(encoding="utf-8")
3747
except (UnicodeDecodeError, FileNotFoundError) as e:
38-
if verbose:
39-
print(f"Error reading {html_path}: {e}")
48+
if verbose and log:
49+
log(f"Error reading {html_path}: {e}")
4050
return False
4151

4252
soup = BeautifulSoup(html, "html.parser")
@@ -65,6 +75,8 @@ def process_html_file(
6575
page_url=page_url,
6676
title=title,
6777
src_path=src_path,
78+
git_data=git_data,
79+
repo_url=repo_url,
6880
default_image=default_image,
6981
default_author=default_author,
7082
keywords=keywords,
@@ -81,12 +93,10 @@ def process_html_file(
8193
# Write back
8294
try:
8395
html_path.write_text(processed_html, encoding="utf-8")
84-
if verbose:
85-
print(f"Processed: {html_path.relative_to(site_dir)}")
8696
return True
8797
except (OSError, PermissionError) as e:
88-
if verbose:
89-
print(f"Error writing {html_path}: {e}")
98+
if verbose and log:
99+
log(f"Error writing {html_path}: {e}")
90100
return False
91101

92102

@@ -129,11 +139,21 @@ def postprocess_site(
129139
print(f"Processing {len(html_files)} HTML files in {site_dir}")
130140

131141
processed = 0
132-
for html_file in html_files:
142+
repo_url = None
143+
git_data = None
144+
if (add_authors or add_json_ld) and md_index:
145+
repo_url, git_data = processor.build_git_map(list(md_index.values()))
146+
147+
progress = TQDM(html_files, desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
148+
log_fn = (progress.write if verbose and progress else print) if verbose else None
149+
iterator = progress if progress else html_files
150+
for html_file in iterator:
133151
success = process_html_file(
134152
html_file,
135153
site_dir,
136154
md_index,
155+
git_data,
156+
repo_url,
137157
site_url=site_url,
138158
default_image=default_image,
139159
default_author=default_author,
@@ -146,9 +166,12 @@ def postprocess_site(
146166
add_css=add_css,
147167
add_copy_llm=add_copy_llm,
148168
verbose=verbose,
169+
log=log_fn,
149170
)
150171
if success:
151172
processed += 1
173+
if progress:
174+
progress.close()
152175

153176
print(f"✅ Postprocessing complete: {processed}/{len(html_files)} files processed")
154177

plugin/processor.py

Lines changed: 126 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,37 +27,47 @@
2727
CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 16.17L4.83 12l-1.42 1.41L9 19L21 7l-1.41-1.41L9 16.17z"></path></svg>'
2828

2929

30-
def get_git_info(file_path: str, add_authors: bool = True, default_author: str | None = None) -> dict[str, Any]:
31-
"""Retrieve git information including creation/modified dates and optional authors."""
30+
def get_git_info(
31+
file_path: str,
32+
add_authors: bool = True,
33+
default_author: str | None = None,
34+
git_data: dict[str, dict[str, Any]] | None = None,
35+
repo_url: str | None = None,
36+
) -> dict[str, Any]:
37+
"""Retrieve git information (dates + optional authors) from precomputed git data."""
3238
file_path = str(Path(file_path).resolve())
3339
git_info = {
3440
"creation_date": DEFAULT_CREATION_DATE,
3541
"last_modified_date": DEFAULT_MODIFIED_DATE,
3642
}
3743

38-
try:
39-
subprocess.check_output(["git", "rev-parse", "--is-inside-work-tree"], stderr=subprocess.DEVNULL)
40-
creation_output = subprocess.check_output(
41-
["git", "log", "--reverse", "--pretty=format:%ai", file_path]
42-
).decode()
43-
creation_date = creation_output.split("\n")[0] if creation_output else ""
44-
last_modified_date = subprocess.check_output(["git", "log", "-1", "--pretty=format:%ai", file_path]).decode()
45-
git_info.update(
46-
{
47-
"creation_date": creation_date or DEFAULT_CREATION_DATE,
48-
"last_modified_date": last_modified_date or DEFAULT_MODIFIED_DATE,
49-
}
50-
)
44+
if not git_data or file_path not in git_data:
45+
return git_info
5146

52-
if add_authors:
53-
authors_info = get_github_usernames_from_file(file_path, default_user=default_author)
54-
git_info["authors"] = sorted(
55-
[(author, info["url"], info["changes"], info["avatar"]) for author, info in authors_info.items()],
56-
key=lambda x: x[2],
57-
reverse=True,
58-
)
59-
except (subprocess.CalledProcessError, FileNotFoundError):
60-
pass
47+
cached = git_data[file_path]
48+
git_info.update(
49+
{
50+
"creation_date": cached.get("creation_date", DEFAULT_CREATION_DATE),
51+
"last_modified_date": cached.get("last_modified_date", DEFAULT_MODIFIED_DATE),
52+
}
53+
)
54+
55+
if add_authors and cached.get("emails"):
56+
git_info["authors"] = sorted(
57+
[
58+
(
59+
author,
60+
info["url"],
61+
info["changes"],
62+
info["avatar"],
63+
)
64+
for author, info in get_github_usernames_from_file(
65+
file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
66+
).items()
67+
],
68+
key=lambda x: x[2],
69+
reverse=True,
70+
)
6171

6272
return git_info
6373

@@ -104,6 +114,90 @@ def insert_content(soup: BeautifulSoup, content_to_insert) -> None:
104114
md_typeset.append(content_to_insert)
105115

106116

117+
def build_git_map(file_paths: list[str] | list[Path]) -> tuple[str | None, dict[str, dict[str, Any]]]:
118+
"""Build git metadata for provided files using a single git log pass."""
119+
git_data: dict[str, dict[str, Any]] = {}
120+
repo_url: str | None = None
121+
122+
if not file_paths:
123+
return repo_url, git_data
124+
125+
try:
126+
repo_root = Path(
127+
subprocess.check_output(["git", "rev-parse", "--show-toplevel"], stderr=subprocess.DEVNULL).decode().strip()
128+
)
129+
except subprocess.CalledProcessError:
130+
return repo_url, git_data
131+
132+
try:
133+
github_repo_url = subprocess.check_output(
134+
["git", "-C", str(repo_root), "config", "--get", "remote.origin.url"], stderr=subprocess.DEVNULL
135+
).decode("utf-8")
136+
github_repo_url = github_repo_url.strip()
137+
if github_repo_url.endswith(".git"):
138+
github_repo_url = github_repo_url[:-4]
139+
if github_repo_url.startswith("git@"):
140+
github_repo_url = "https://" + github_repo_url[4:].replace(":", "/")
141+
repo_url = github_repo_url or None
142+
except subprocess.CalledProcessError:
143+
repo_url = None
144+
145+
rel_paths = []
146+
for fp in file_paths:
147+
path = Path(fp)
148+
if path.exists():
149+
try:
150+
rel_paths.append(path.resolve().relative_to(repo_root))
151+
except ValueError:
152+
continue
153+
if not rel_paths:
154+
return repo_url, git_data
155+
156+
cmd = [
157+
"git",
158+
"-C",
159+
str(repo_root),
160+
"log",
161+
"--name-only",
162+
"--pretty=format:%ad\t%ae",
163+
"--date=format:%Y-%m-%d %H:%M:%S %z",
164+
"--",
165+
*[str(p) for p in rel_paths],
166+
]
167+
168+
try:
169+
output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode().splitlines()
170+
except subprocess.CalledProcessError:
171+
return repo_url, git_data
172+
173+
current_date = None
174+
current_email = None
175+
for line in output:
176+
if not line.strip():
177+
continue
178+
parts = line.split("\t")
179+
if len(parts) == 2:
180+
current_date, current_email = parts
181+
continue
182+
183+
if current_date and current_email:
184+
abs_path = (repo_root / line.strip()).resolve()
185+
key = str(abs_path)
186+
entry = git_data.setdefault(
187+
key,
188+
{
189+
"creation_date": current_date,
190+
"last_modified_date": current_date,
191+
"emails": {},
192+
},
193+
)
194+
entry.setdefault("last_modified_date", current_date)
195+
entry["creation_date"] = current_date
196+
entry["emails"][current_email] = entry["emails"].get(current_email, 0) + 1
197+
198+
return repo_url, git_data
199+
200+
107201
def get_css() -> str:
108202
"""CSS for git info, share buttons, and copy button."""
109203
return """
@@ -212,6 +306,8 @@ def process_html(
212306
page_url: str,
213307
title: str,
214308
src_path: str | None = None,
309+
git_data: dict[str, dict[str, Any]] | None = None,
310+
repo_url: str | None = None,
215311
default_image: str | None = None,
216312
default_author: str | None = None,
217313
keywords: str | None = None,
@@ -389,15 +485,17 @@ def process_html(
389485
"""
390486
soup.body.append(script)
391487

392-
# Initialize git info with defaults
488+
# Initialize git info with defaults and only call git when needed (authors or JSON-LD)
393489
git_info = {
394490
"creation_date": DEFAULT_CREATION_DATE,
395491
"last_modified_date": DEFAULT_MODIFIED_DATE,
396492
}
493+
needs_git = (add_authors or add_json_ld) and src_path
397494

398-
# Add git information if source path available
399-
if src_path:
400-
git_info = get_git_info(src_path, add_authors=add_authors, default_author=default_author)
495+
if needs_git:
496+
git_info = get_git_info(
497+
src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
498+
)
401499

402500
# Only render git footer if we have real git history (not placeholder defaults)
403501
has_real_git_data = (

0 commit comments

Comments
 (0)