Skip to content

Commit 4034c5b

Browse files
committed
feat(trans): add translator work analysis command
1 parent 08081de commit 4034c5b

3 files changed

Lines changed: 241 additions & 0 deletions

File tree

docs/admin/management.rst

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,59 @@ Example:
9696
weblate --author michal@cihar.com add_suggestions weblate application cs /tmp/suggestions-cs.po
9797
9898
99+
analyze_translator_work
100+
-----------------------
101+
102+
.. weblate-admin:: analyze_translator_work
103+
104+
Analyzes change history to estimate realistic translator throughput per day.
105+
The command includes only active human users and unit-backed manual translation
106+
changes, and applies daily minimum and maximum thresholds to skip obvious outliers,
107+
bots, bulk imports, uploads, and other events that would pollute the result.
108+
109+
.. weblate-admin-option:: --days DAYS
110+
111+
Number of recent days to analyze when ``--since`` is not specified.
112+
113+
.. weblate-admin-option:: --since YYYY-MM-DD
114+
115+
Start date for the analysis.
116+
117+
.. weblate-admin-option:: --until YYYY-MM-DD
118+
119+
End date for the analysis.
120+
121+
.. weblate-admin-option:: --project PROJECT
122+
123+
Limit the analysis to a project slug.
124+
125+
.. weblate-admin-option:: --component PROJECT/COMPONENT
126+
127+
Limit the analysis to a component.
128+
129+
.. weblate-admin-option:: --language LANGUAGE
130+
131+
Limit the analysis to a language code.
132+
133+
.. weblate-admin-option:: --min-changes COUNT
134+
135+
Minimum translated strings per user day to include.
136+
137+
.. weblate-admin-option:: --max-changes COUNT
138+
139+
Maximum translated strings per user day to include.
140+
141+
.. weblate-admin-option:: --max-words COUNT
142+
143+
Maximum translated source words per user day to include.
144+
145+
Example:
146+
147+
.. code-block:: sh
148+
149+
weblate analyze_translator_work --project weblate --since 2026-01-01
150+
151+
99152
auto_translate
100153
--------------
101154

docs/changes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Weblate 2026.7
2525
* Added :ref:`distribution-packaging` guidance for distribution maintainers.
2626
* Large component imports now avoid duplicate translation-memory processing.
2727
* :ref:`gettext` files can now be configured to remove obsolete strings on save.
28+
* Added :wladmin:`analyze_translator_work` to estimate realistic daily translator throughput from change history.
2829

2930
.. rubric:: Bug fixes
3031

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# Copyright © Michal Čihař <michal@weblate.org>
2+
#
3+
# SPDX-License-Identifier: GPL-3.0-or-later
4+
from __future__ import annotations
5+
6+
from datetime import datetime, timedelta
7+
from statistics import mean, median
8+
from typing import TYPE_CHECKING, Any
9+
10+
from django.core.management.base import CommandError
11+
from django.db.models import Count, Sum
12+
from django.db.models.functions import TruncDate
13+
from django.utils import timezone
14+
from django.utils.dateparse import parse_date
15+
16+
from weblate.trans.actions import ActionEvents
17+
from weblate.trans.models import Change
18+
from weblate.utils.management.base import BaseCommand
19+
20+
if TYPE_CHECKING:
21+
from collections.abc import Iterable
22+
23+
from django.core.management.base import CommandParser
24+
from django.db.models import QuerySet
25+
26+
27+
TRANSLATOR_ACTIONS = (
28+
ActionEvents.CHANGE,
29+
ActionEvents.NEW,
30+
ActionEvents.ACCEPT,
31+
)
32+
33+
34+
class Command(BaseCommand):
35+
help = "Analyze realistic daily translator throughput from change history"
36+
37+
def add_arguments(self, parser: CommandParser) -> None:
38+
parser.add_argument(
39+
"--days",
40+
type=int,
41+
default=365,
42+
help="Number of recent days to analyze when --since is not specified",
43+
)
44+
parser.add_argument(
45+
"--since",
46+
help="Start date to analyze, in YYYY-MM-DD format",
47+
)
48+
parser.add_argument(
49+
"--until",
50+
help="End date to analyze, in YYYY-MM-DD format (defaults to now)",
51+
)
52+
parser.add_argument(
53+
"--project",
54+
help="Limit analysis to a project slug",
55+
)
56+
parser.add_argument(
57+
"--component",
58+
help="Limit analysis to a component path as project/component",
59+
)
60+
parser.add_argument(
61+
"--language",
62+
help="Limit analysis to a language code",
63+
)
64+
parser.add_argument(
65+
"--min-changes",
66+
type=int,
67+
default=5,
68+
help="Minimum translated strings per user day to include",
69+
)
70+
parser.add_argument(
71+
"--max-changes",
72+
type=int,
73+
default=1000,
74+
help="Maximum translated strings per user day to include",
75+
)
76+
parser.add_argument(
77+
"--max-words",
78+
type=int,
79+
default=10000,
80+
help="Maximum translated source words per user day to include",
81+
)
82+
83+
def handle(self, *args: Any, **options: Any) -> None:
84+
since = self.get_since(options)
85+
until = self.get_until(options)
86+
if since >= until:
87+
msg = "The analysis start date must be before the end date."
88+
raise CommandError(msg)
89+
90+
base = self.get_queryset(options).filter(
91+
timestamp__gte=since, timestamp__lt=until
92+
)
93+
rows = list(
94+
base.annotate(day=TruncDate("timestamp"))
95+
.values("day", "user_id", "user__username")
96+
.annotate(strings=Count("id"), words=Sum("unit__num_words"))
97+
.order_by("day", "user__username")
98+
)
99+
100+
included = [
101+
row
102+
for row in rows
103+
if options["min_changes"] <= row["strings"] <= options["max_changes"]
104+
and (row["words"] or 0) <= options["max_words"]
105+
]
106+
excluded = len(rows) - len(included)
107+
108+
self.stdout.write("Translator work analysis")
109+
self.stdout.write(f"Period: {since:%Y-%m-%d} to {until:%Y-%m-%d}")
110+
self.stdout.write(
111+
"Included actions: "
112+
+ ", ".join(
113+
ActionEvents(action).name.lower() for action in TRANSLATOR_ACTIONS
114+
)
115+
)
116+
self.stdout.write(
117+
"Filtering: active human users, unit-backed changes, "
118+
f"{options['min_changes']}..{options['max_changes']} strings/day, "
119+
f"up to {options['max_words']} words/day"
120+
)
121+
self.stdout.write(f"User days: {len(included)} included, {excluded} excluded")
122+
123+
if not included:
124+
self.stdout.write("No matching translator activity found.")
125+
return
126+
127+
strings = [row["strings"] for row in included]
128+
words = [row["words"] or 0 for row in included]
129+
self.stdout.write("")
130+
self.write_metric("Translated strings per day", strings)
131+
self.write_metric("Source words per day", words)
132+
133+
def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]:
134+
queryset = Change.objects.filter(
135+
action__in=TRANSLATOR_ACTIONS,
136+
unit__isnull=False,
137+
user__isnull=False,
138+
user__is_active=True,
139+
user__is_bot=False,
140+
)
141+
if options["project"]:
142+
queryset = queryset.filter(project__slug=options["project"])
143+
if options["component"]:
144+
try:
145+
project, component = options["component"].split("/", 1)
146+
except ValueError as error:
147+
msg = "Component must be specified as project/component."
148+
raise CommandError(msg) from error
149+
queryset = queryset.filter(project__slug=project, component__slug=component)
150+
if options["language"]:
151+
queryset = queryset.filter(language__code=options["language"])
152+
return queryset
153+
154+
def get_since(self, options: dict[str, Any]) -> datetime:
155+
if options["since"]:
156+
parsed = parse_date(options["since"])
157+
if parsed is None:
158+
msg = "--since must use YYYY-MM-DD format."
159+
raise CommandError(msg)
160+
return timezone.make_aware(datetime.combine(parsed, datetime.min.time()))
161+
return timezone.now() - timedelta(days=options["days"])
162+
163+
def get_until(self, options: dict[str, Any]) -> datetime:
164+
if options["until"]:
165+
parsed = parse_date(options["until"])
166+
if parsed is None:
167+
msg = "--until must use YYYY-MM-DD format."
168+
raise CommandError(msg)
169+
return timezone.make_aware(datetime.combine(parsed, datetime.max.time()))
170+
return timezone.now()
171+
172+
def write_metric(self, label: str, values: Iterable[int]) -> None:
173+
ordered = sorted(values)
174+
self.stdout.write(f"{label}:")
175+
self.stdout.write(f" median: {median(ordered):.0f}")
176+
self.stdout.write(f" average: {mean(ordered):.0f}")
177+
self.stdout.write(f" p75: {self.percentile(ordered, 75):.0f}")
178+
self.stdout.write(f" p90: {self.percentile(ordered, 90):.0f}")
179+
180+
def percentile(self, values: list[int], percent: int) -> float:
181+
if len(values) == 1:
182+
return values[0]
183+
position = (len(values) - 1) * percent / 100
184+
lower = int(position)
185+
upper = min(lower + 1, len(values) - 1)
186+
weight = position - lower
187+
return values[lower] * (1 - weight) + values[upper] * weight

0 commit comments

Comments
 (0)