From 4034c5b1e9af14bf9f01a9ed4a664057f16a883f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Mon, 15 Jun 2026 10:11:43 +0200 Subject: [PATCH 1/3] feat(trans): add translator work analysis command --- docs/admin/management.rst | 53 +++++ docs/changes.rst | 1 + .../commands/analyze_translator_work.py | 187 ++++++++++++++++++ 3 files changed, 241 insertions(+) create mode 100644 weblate/trans/management/commands/analyze_translator_work.py diff --git a/docs/admin/management.rst b/docs/admin/management.rst index 0bd90375d52d..68c095eeec62 100644 --- a/docs/admin/management.rst +++ b/docs/admin/management.rst @@ -96,6 +96,59 @@ Example: weblate --author michal@cihar.com add_suggestions weblate application cs /tmp/suggestions-cs.po +analyze_translator_work +----------------------- + +.. weblate-admin:: analyze_translator_work + +Analyzes change history to estimate realistic translator throughput per day. +The command includes only active human users and unit-backed manual translation +changes, and applies daily minimum and maximum thresholds to skip obvious outliers, +bots, bulk imports, uploads, and other events that would pollute the result. + +.. weblate-admin-option:: --days DAYS + + Number of recent days to analyze when ``--since`` is not specified. + +.. weblate-admin-option:: --since YYYY-MM-DD + + Start date for the analysis. + +.. weblate-admin-option:: --until YYYY-MM-DD + + End date for the analysis. + +.. weblate-admin-option:: --project PROJECT + + Limit the analysis to a project slug. + +.. weblate-admin-option:: --component PROJECT/COMPONENT + + Limit the analysis to a component. + +.. weblate-admin-option:: --language LANGUAGE + + Limit the analysis to a language code. + +.. weblate-admin-option:: --min-changes COUNT + + Minimum translated strings per user day to include. + +.. weblate-admin-option:: --max-changes COUNT + + Maximum translated strings per user day to include. + +.. weblate-admin-option:: --max-words COUNT + + Maximum translated source words per user day to include. + +Example: + +.. code-block:: sh + + weblate analyze_translator_work --project weblate --since 2026-01-01 + + auto_translate -------------- diff --git a/docs/changes.rst b/docs/changes.rst index 4b5dc377ddb4..f601b185ce6f 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -25,6 +25,7 @@ Weblate 2026.7 * Added :ref:`distribution-packaging` guidance for distribution maintainers. * Large component imports now avoid duplicate translation-memory processing. * :ref:`gettext` files can now be configured to remove obsolete strings on save. +* Added :wladmin:`analyze_translator_work` to estimate realistic daily translator throughput from change history. .. rubric:: Bug fixes diff --git a/weblate/trans/management/commands/analyze_translator_work.py b/weblate/trans/management/commands/analyze_translator_work.py new file mode 100644 index 000000000000..7c4cce90aa1c --- /dev/null +++ b/weblate/trans/management/commands/analyze_translator_work.py @@ -0,0 +1,187 @@ +# Copyright © Michal Čihař +# +# SPDX-License-Identifier: GPL-3.0-or-later +from __future__ import annotations + +from datetime import datetime, timedelta +from statistics import mean, median +from typing import TYPE_CHECKING, Any + +from django.core.management.base import CommandError +from django.db.models import Count, Sum +from django.db.models.functions import TruncDate +from django.utils import timezone +from django.utils.dateparse import parse_date + +from weblate.trans.actions import ActionEvents +from weblate.trans.models import Change +from weblate.utils.management.base import BaseCommand + +if TYPE_CHECKING: + from collections.abc import Iterable + + from django.core.management.base import CommandParser + from django.db.models import QuerySet + + +TRANSLATOR_ACTIONS = ( + ActionEvents.CHANGE, + ActionEvents.NEW, + ActionEvents.ACCEPT, +) + + +class Command(BaseCommand): + help = "Analyze realistic daily translator throughput from change history" + + def add_arguments(self, parser: CommandParser) -> None: + parser.add_argument( + "--days", + type=int, + default=365, + help="Number of recent days to analyze when --since is not specified", + ) + parser.add_argument( + "--since", + help="Start date to analyze, in YYYY-MM-DD format", + ) + parser.add_argument( + "--until", + help="End date to analyze, in YYYY-MM-DD format (defaults to now)", + ) + parser.add_argument( + "--project", + help="Limit analysis to a project slug", + ) + parser.add_argument( + "--component", + help="Limit analysis to a component path as project/component", + ) + parser.add_argument( + "--language", + help="Limit analysis to a language code", + ) + parser.add_argument( + "--min-changes", + type=int, + default=5, + help="Minimum translated strings per user day to include", + ) + parser.add_argument( + "--max-changes", + type=int, + default=1000, + help="Maximum translated strings per user day to include", + ) + parser.add_argument( + "--max-words", + type=int, + default=10000, + help="Maximum translated source words per user day to include", + ) + + def handle(self, *args: Any, **options: Any) -> None: + since = self.get_since(options) + until = self.get_until(options) + if since >= until: + msg = "The analysis start date must be before the end date." + raise CommandError(msg) + + base = self.get_queryset(options).filter( + timestamp__gte=since, timestamp__lt=until + ) + rows = list( + base.annotate(day=TruncDate("timestamp")) + .values("day", "user_id", "user__username") + .annotate(strings=Count("id"), words=Sum("unit__num_words")) + .order_by("day", "user__username") + ) + + included = [ + row + for row in rows + if options["min_changes"] <= row["strings"] <= options["max_changes"] + and (row["words"] or 0) <= options["max_words"] + ] + excluded = len(rows) - len(included) + + self.stdout.write("Translator work analysis") + self.stdout.write(f"Period: {since:%Y-%m-%d} to {until:%Y-%m-%d}") + self.stdout.write( + "Included actions: " + + ", ".join( + ActionEvents(action).name.lower() for action in TRANSLATOR_ACTIONS + ) + ) + self.stdout.write( + "Filtering: active human users, unit-backed changes, " + f"{options['min_changes']}..{options['max_changes']} strings/day, " + f"up to {options['max_words']} words/day" + ) + self.stdout.write(f"User days: {len(included)} included, {excluded} excluded") + + if not included: + self.stdout.write("No matching translator activity found.") + return + + strings = [row["strings"] for row in included] + words = [row["words"] or 0 for row in included] + self.stdout.write("") + self.write_metric("Translated strings per day", strings) + self.write_metric("Source words per day", words) + + def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]: + queryset = Change.objects.filter( + action__in=TRANSLATOR_ACTIONS, + unit__isnull=False, + user__isnull=False, + user__is_active=True, + user__is_bot=False, + ) + if options["project"]: + queryset = queryset.filter(project__slug=options["project"]) + if options["component"]: + try: + project, component = options["component"].split("/", 1) + except ValueError as error: + msg = "Component must be specified as project/component." + raise CommandError(msg) from error + queryset = queryset.filter(project__slug=project, component__slug=component) + if options["language"]: + queryset = queryset.filter(language__code=options["language"]) + return queryset + + def get_since(self, options: dict[str, Any]) -> datetime: + if options["since"]: + parsed = parse_date(options["since"]) + if parsed is None: + msg = "--since must use YYYY-MM-DD format." + raise CommandError(msg) + return timezone.make_aware(datetime.combine(parsed, datetime.min.time())) + return timezone.now() - timedelta(days=options["days"]) + + def get_until(self, options: dict[str, Any]) -> datetime: + if options["until"]: + parsed = parse_date(options["until"]) + if parsed is None: + msg = "--until must use YYYY-MM-DD format." + raise CommandError(msg) + return timezone.make_aware(datetime.combine(parsed, datetime.max.time())) + return timezone.now() + + def write_metric(self, label: str, values: Iterable[int]) -> None: + ordered = sorted(values) + self.stdout.write(f"{label}:") + self.stdout.write(f" median: {median(ordered):.0f}") + self.stdout.write(f" average: {mean(ordered):.0f}") + self.stdout.write(f" p75: {self.percentile(ordered, 75):.0f}") + self.stdout.write(f" p90: {self.percentile(ordered, 90):.0f}") + + def percentile(self, values: list[int], percent: int) -> float: + if len(values) == 1: + return values[0] + position = (len(values) - 1) * percent / 100 + lower = int(position) + upper = min(lower + 1, len(values) - 1) + weight = position - lower + return values[lower] * (1 - weight) + values[upper] * weight From 2c4235ddc82f622f64e577b7bd3bad8b5c20b726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Mon, 15 Jun 2026 10:51:46 +0200 Subject: [PATCH 2/3] feat(trans): add translator work analysis command ### Motivation - Provide a lightweight management command to estimate realistic daily translator throughput by analyzing change history while avoiding noisy data (bots, bulk uploads, automated events and obvious outliers). - Make the analysis usable for projects/components/languages and bounded by date ranges for operational planning and reporting. ### Description - Add `weblate/trans/management/commands/analyze_translator_work.py` which groups unit-backed changes by user/day (actions: `CHANGE`, `NEW`, `ACCEPT`), filters to active non-bot users, and computes median/average/p75/p90 for translated strings and source words. - Add command options: `--days`, `--since`, `--until`, `--project`, `--component`, `--language`, `--min-changes`, `--max-changes`, and `--max-words` to scope and filter results. - Update documentation at `docs/admin/management.rst` to document the new command and add a changelog note in `docs/changes.rst` announcing the feature. ### Testing - `python -m compileall weblate/trans/management/commands/analyze_translator_work.py` completed successfully. - `.venv/bin/python manage.py analyze_translator_work --help` ran and displayed the expected help text. - `git diff --cached --check` passed with no problems reported for the changed files. - `uv run prek run ruff --files ...` could not be executed due to an environment network restriction when initializing hooks (external hook repo cloning failed), and `uv run mypy ...` produced existing project-wide type findings unrelated to the new command file. --- .../commands/analyze_translator_work.py | 39 ++++++++-------- weblate/trans/tests/test_commands.py | 44 ++++++++++++++++++- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/weblate/trans/management/commands/analyze_translator_work.py b/weblate/trans/management/commands/analyze_translator_work.py index 7c4cce90aa1c..fbcd7114c30c 100644 --- a/weblate/trans/management/commands/analyze_translator_work.py +++ b/weblate/trans/management/commands/analyze_translator_work.py @@ -5,7 +5,7 @@ from datetime import datetime, timedelta from statistics import mean, median -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, cast from django.core.management.base import CommandError from django.db.models import Count, Sum @@ -80,7 +80,7 @@ def add_arguments(self, parser: CommandParser) -> None: help="Maximum translated source words per user day to include", ) - def handle(self, *args: Any, **options: Any) -> None: + def handle(self, *args: object, **options: object) -> None: since = self.get_since(options) until = self.get_until(options) if since >= until: @@ -92,16 +92,19 @@ def handle(self, *args: Any, **options: Any) -> None: ) rows = list( base.annotate(day=TruncDate("timestamp")) - .values("day", "user_id", "user__username") + .values("day", "author_id", "author__username") .annotate(strings=Count("id"), words=Sum("unit__num_words")) - .order_by("day", "user__username") + .order_by("day", "author__username") ) + min_changes = cast("int", options["min_changes"]) + max_changes = cast("int", options["max_changes"]) + max_words = cast("int", options["max_words"]) included = [ row for row in rows - if options["min_changes"] <= row["strings"] <= options["max_changes"] - and (row["words"] or 0) <= options["max_words"] + if min_changes <= row["strings"] <= max_changes + and (row["words"] or 0) <= max_words ] excluded = len(rows) - len(included) @@ -115,8 +118,8 @@ def handle(self, *args: Any, **options: Any) -> None: ) self.stdout.write( "Filtering: active human users, unit-backed changes, " - f"{options['min_changes']}..{options['max_changes']} strings/day, " - f"up to {options['max_words']} words/day" + f"{min_changes}..{max_changes} strings/day, " + f"up to {max_words} words/day" ) self.stdout.write(f"User days: {len(included)} included, {excluded} excluded") @@ -130,19 +133,19 @@ def handle(self, *args: Any, **options: Any) -> None: self.write_metric("Translated strings per day", strings) self.write_metric("Source words per day", words) - def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]: + def get_queryset(self, options: dict[str, object]) -> QuerySet[Change]: queryset = Change.objects.filter( action__in=TRANSLATOR_ACTIONS, unit__isnull=False, - user__isnull=False, - user__is_active=True, - user__is_bot=False, + author__isnull=False, + author__is_active=True, + author__is_bot=False, ) if options["project"]: queryset = queryset.filter(project__slug=options["project"]) if options["component"]: try: - project, component = options["component"].split("/", 1) + project, component = cast("str", options["component"]).split("/", 1) except ValueError as error: msg = "Component must be specified as project/component." raise CommandError(msg) from error @@ -151,18 +154,18 @@ def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]: queryset = queryset.filter(language__code=options["language"]) return queryset - def get_since(self, options: dict[str, Any]) -> datetime: + def get_since(self, options: dict[str, object]) -> datetime: if options["since"]: - parsed = parse_date(options["since"]) + parsed = parse_date(cast("str", options["since"])) if parsed is None: msg = "--since must use YYYY-MM-DD format." raise CommandError(msg) return timezone.make_aware(datetime.combine(parsed, datetime.min.time())) - return timezone.now() - timedelta(days=options["days"]) + return timezone.now() - timedelta(days=cast("int", options["days"])) - def get_until(self, options: dict[str, Any]) -> datetime: + def get_until(self, options: dict[str, object]) -> datetime: if options["until"]: - parsed = parse_date(options["until"]) + parsed = parse_date(cast("str", options["until"])) if parsed is None: msg = "--until must use YYYY-MM-DD format." raise CommandError(msg) diff --git a/weblate/trans/tests/test_commands.py b/weblate/trans/tests/test_commands.py index 74066c60abdc..cf0d51205d3a 100644 --- a/weblate/trans/tests/test_commands.py +++ b/weblate/trans/tests/test_commands.py @@ -16,19 +16,24 @@ from weblate.accounts.models import Profile from weblate.runner import main +from weblate.trans.actions import ActionEvents from weblate.trans.file_format_params import ( FILE_FORMATS_PARAMS, BaseFileFormatParam, register_file_format_param, ) -from weblate.trans.models import Component, Translation +from weblate.trans.models import Change, Component, Translation from weblate.trans.tests.test_models import RepoTestCase from weblate.trans.tests.test_views import ( ComponentTestCase, FixtureComponentTestCase, ViewTestCase, ) -from weblate.trans.tests.utils import create_test_user, get_test_file +from weblate.trans.tests.utils import ( + create_another_user, + create_test_user, + get_test_file, +) from weblate.vcs.mercurial import HgRepository TEST_PO = get_test_file("cs.po") @@ -47,6 +52,41 @@ def test_help(self) -> None: sys.stdout = restore +class AnalyzeTranslatorWorkTest(ComponentTestCase): + def create_change(self, author, user) -> None: + unit = self.get_unit() + Change.objects.create( + action=ActionEvents.ACCEPT, + author=author, + user=user, + unit=unit, + translation=self.translation, + component=self.component, + project=self.project, + language=self.translation.language, + ) + + def test_accepted_suggestions_are_grouped_by_author(self) -> None: + reviewer = self.user + first_author = create_another_user("-first") + second_author = create_another_user("-second") + for _unused in range(3): + self.create_change(first_author, reviewer) + self.create_change(second_author, reviewer) + + output = StringIO() + call_command( + "analyze_translator_work", + days=1, + min_changes=1, + stdout=output, + ) + + result = output.getvalue() + self.assertIn("User days: 2 included, 0 excluded", result) + self.assertIn(" median: 3", result) + + class ImportProjectTest(RepoTestCase): def do_import(self, path=None, **kwargs) -> None: with override_settings(CREATE_GLOSSARIES=self.CREATE_GLOSSARIES): From 6cf086778f23b0018c3bbf9e2ec0b18c93f8c369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Mon, 15 Jun 2026 11:10:14 +0200 Subject: [PATCH 3/3] Add `analyze_translator_work` management command with docs and tests ### Motivation * Provide a way to estimate realistic daily translator throughput from change history while excluding bots, bulk imports, uploads and other outliers. ### Description * Add new management command `analyze_translator_work` implemented in `weblate/trans/management/commands/analyze_translator_work.py` that aggregates change rows per user-day, filters by activity thresholds and outputs median/average/p75/p90 metrics for strings and source words per day. * Command accepts arguments `--days`, `--since`, `--until`, `--project`, `--component`, `--language`, `--min-changes`, `--max-changes`, and `--max-words` and restricts the queryset to active human users and unit-backed changes. * Document the new admin command in `docs/admin/management.rst` and add a release note entry in `docs/changes.rst`. * Add unit test `AnalyzeTranslatorWorkTest` in `weblate/trans/tests/test_commands.py` (and adjust related imports) to validate grouping and reporting behavior for accepted suggestion changes. ### Testing * Ran the Django unit tests including the new `AnalyzeTranslatorWorkTest` which invokes `call_command('analyze_translator_work', ...)`, and the test passed. * Existing management command tests were exercised as part of the test run and completed successfully. --- .../commands/analyze_translator_work.py | 15 +++++++------- weblate/trans/tests/test_commands.py | 20 +++++++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/weblate/trans/management/commands/analyze_translator_work.py b/weblate/trans/management/commands/analyze_translator_work.py index fbcd7114c30c..8c8c8bbd3030 100644 --- a/weblate/trans/management/commands/analyze_translator_work.py +++ b/weblate/trans/management/commands/analyze_translator_work.py @@ -14,7 +14,7 @@ from django.utils.dateparse import parse_date from weblate.trans.actions import ActionEvents -from weblate.trans.models import Change +from weblate.trans.models import Change, Component from weblate.utils.management.base import BaseCommand if TYPE_CHECKING: @@ -144,12 +144,13 @@ def get_queryset(self, options: dict[str, object]) -> QuerySet[Change]: if options["project"]: queryset = queryset.filter(project__slug=options["project"]) if options["component"]: - try: - project, component = cast("str", options["component"]).split("/", 1) - except ValueError as error: - msg = "Component must be specified as project/component." - raise CommandError(msg) from error - queryset = queryset.filter(project__slug=project, component__slug=component) + components = Component.objects.filter_by_path( + cast("str", options["component"]) + ) + if not components.exists(): + msg = "No matching component found." + raise CommandError(msg) + queryset = queryset.filter(component__in=components) if options["language"]: queryset = queryset.filter(language__code=options["language"]) return queryset diff --git a/weblate/trans/tests/test_commands.py b/weblate/trans/tests/test_commands.py index cf0d51205d3a..6ddd7881bd14 100644 --- a/weblate/trans/tests/test_commands.py +++ b/weblate/trans/tests/test_commands.py @@ -86,6 +86,26 @@ def test_accepted_suggestions_are_grouped_by_author(self) -> None: self.assertIn("User days: 2 included, 0 excluded", result) self.assertIn(" median: 3", result) + def test_component_filter_uses_full_path(self) -> None: + category = self.create_category(self.project) + self.component.category = category + self.component.save(update_fields=["category"]) + for _unused in range(3): + self.create_change(self.user, self.user) + + output = StringIO() + call_command( + "analyze_translator_work", + component="/".join(self.component.get_url_path()), + days=1, + min_changes=1, + stdout=output, + ) + + result = output.getvalue() + self.assertIn("User days: 1 included, 0 excluded", result) + self.assertIn(" median: 3", result) + class ImportProjectTest(RepoTestCase): def do_import(self, path=None, **kwargs) -> None: