diff --git a/docs/admin/management.rst b/docs/admin/management.rst index 0bd90375d52d..68c095eeec62 100644 --- a/docs/admin/management.rst +++ b/docs/admin/management.rst @@ -96,6 +96,59 @@ Example: weblate --author michal@cihar.com add_suggestions weblate application cs /tmp/suggestions-cs.po +analyze_translator_work +----------------------- + +.. weblate-admin:: analyze_translator_work + +Analyzes change history to estimate realistic translator throughput per day. +The command includes only active human users and unit-backed manual translation +changes, and applies daily minimum and maximum thresholds to skip obvious outliers, +bots, bulk imports, uploads, and other events that would pollute the result. + +.. weblate-admin-option:: --days DAYS + + Number of recent days to analyze when ``--since`` is not specified. + +.. weblate-admin-option:: --since YYYY-MM-DD + + Start date for the analysis. + +.. weblate-admin-option:: --until YYYY-MM-DD + + End date for the analysis. + +.. weblate-admin-option:: --project PROJECT + + Limit the analysis to a project slug. + +.. weblate-admin-option:: --component PROJECT/COMPONENT + + Limit the analysis to a component. + +.. weblate-admin-option:: --language LANGUAGE + + Limit the analysis to a language code. + +.. weblate-admin-option:: --min-changes COUNT + + Minimum translated strings per user day to include. + +.. weblate-admin-option:: --max-changes COUNT + + Maximum translated strings per user day to include. + +.. weblate-admin-option:: --max-words COUNT + + Maximum translated source words per user day to include. + +Example: + +.. code-block:: sh + + weblate analyze_translator_work --project weblate --since 2026-01-01 + + auto_translate -------------- diff --git a/docs/changes.rst b/docs/changes.rst index 4b5dc377ddb4..f601b185ce6f 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -25,6 +25,7 @@ Weblate 2026.7 * Added :ref:`distribution-packaging` guidance for distribution maintainers. * Large component imports now avoid duplicate translation-memory processing. * :ref:`gettext` files can now be configured to remove obsolete strings on save. +* Added :wladmin:`analyze_translator_work` to estimate realistic daily translator throughput from change history. .. rubric:: Bug fixes diff --git a/weblate/trans/management/commands/analyze_translator_work.py b/weblate/trans/management/commands/analyze_translator_work.py new file mode 100644 index 000000000000..8c8c8bbd3030 --- /dev/null +++ b/weblate/trans/management/commands/analyze_translator_work.py @@ -0,0 +1,191 @@ +# Copyright © Michal Čihař +# +# SPDX-License-Identifier: GPL-3.0-or-later +from __future__ import annotations + +from datetime import datetime, timedelta +from statistics import mean, median +from typing import TYPE_CHECKING, cast + +from django.core.management.base import CommandError +from django.db.models import Count, Sum +from django.db.models.functions import TruncDate +from django.utils import timezone +from django.utils.dateparse import parse_date + +from weblate.trans.actions import ActionEvents +from weblate.trans.models import Change, Component +from weblate.utils.management.base import BaseCommand + +if TYPE_CHECKING: + from collections.abc import Iterable + + from django.core.management.base import CommandParser + from django.db.models import QuerySet + + +TRANSLATOR_ACTIONS = ( + ActionEvents.CHANGE, + ActionEvents.NEW, + ActionEvents.ACCEPT, +) + + +class Command(BaseCommand): + help = "Analyze realistic daily translator throughput from change history" + + def add_arguments(self, parser: CommandParser) -> None: + parser.add_argument( + "--days", + type=int, + default=365, + help="Number of recent days to analyze when --since is not specified", + ) + parser.add_argument( + "--since", + help="Start date to analyze, in YYYY-MM-DD format", + ) + parser.add_argument( + "--until", + help="End date to analyze, in YYYY-MM-DD format (defaults to now)", + ) + parser.add_argument( + "--project", + help="Limit analysis to a project slug", + ) + parser.add_argument( + "--component", + help="Limit analysis to a component path as project/component", + ) + parser.add_argument( + "--language", + help="Limit analysis to a language code", + ) + parser.add_argument( + "--min-changes", + type=int, + default=5, + help="Minimum translated strings per user day to include", + ) + parser.add_argument( + "--max-changes", + type=int, + default=1000, + help="Maximum translated strings per user day to include", + ) + parser.add_argument( + "--max-words", + type=int, + default=10000, + help="Maximum translated source words per user day to include", + ) + + def handle(self, *args: object, **options: object) -> None: + since = self.get_since(options) + until = self.get_until(options) + if since >= until: + msg = "The analysis start date must be before the end date." + raise CommandError(msg) + + base = self.get_queryset(options).filter( + timestamp__gte=since, timestamp__lt=until + ) + rows = list( + base.annotate(day=TruncDate("timestamp")) + .values("day", "author_id", "author__username") + .annotate(strings=Count("id"), words=Sum("unit__num_words")) + .order_by("day", "author__username") + ) + + min_changes = cast("int", options["min_changes"]) + max_changes = cast("int", options["max_changes"]) + max_words = cast("int", options["max_words"]) + included = [ + row + for row in rows + if min_changes <= row["strings"] <= max_changes + and (row["words"] or 0) <= max_words + ] + excluded = len(rows) - len(included) + + self.stdout.write("Translator work analysis") + self.stdout.write(f"Period: {since:%Y-%m-%d} to {until:%Y-%m-%d}") + self.stdout.write( + "Included actions: " + + ", ".join( + ActionEvents(action).name.lower() for action in TRANSLATOR_ACTIONS + ) + ) + self.stdout.write( + "Filtering: active human users, unit-backed changes, " + f"{min_changes}..{max_changes} strings/day, " + f"up to {max_words} words/day" + ) + self.stdout.write(f"User days: {len(included)} included, {excluded} excluded") + + if not included: + self.stdout.write("No matching translator activity found.") + return + + strings = [row["strings"] for row in included] + words = [row["words"] or 0 for row in included] + self.stdout.write("") + self.write_metric("Translated strings per day", strings) + self.write_metric("Source words per day", words) + + def get_queryset(self, options: dict[str, object]) -> QuerySet[Change]: + queryset = Change.objects.filter( + action__in=TRANSLATOR_ACTIONS, + unit__isnull=False, + author__isnull=False, + author__is_active=True, + author__is_bot=False, + ) + if options["project"]: + queryset = queryset.filter(project__slug=options["project"]) + if options["component"]: + components = Component.objects.filter_by_path( + cast("str", options["component"]) + ) + if not components.exists(): + msg = "No matching component found." + raise CommandError(msg) + queryset = queryset.filter(component__in=components) + if options["language"]: + queryset = queryset.filter(language__code=options["language"]) + return queryset + + def get_since(self, options: dict[str, object]) -> datetime: + if options["since"]: + parsed = parse_date(cast("str", options["since"])) + if parsed is None: + msg = "--since must use YYYY-MM-DD format." + raise CommandError(msg) + return timezone.make_aware(datetime.combine(parsed, datetime.min.time())) + return timezone.now() - timedelta(days=cast("int", options["days"])) + + def get_until(self, options: dict[str, object]) -> datetime: + if options["until"]: + parsed = parse_date(cast("str", options["until"])) + if parsed is None: + msg = "--until must use YYYY-MM-DD format." + raise CommandError(msg) + return timezone.make_aware(datetime.combine(parsed, datetime.max.time())) + return timezone.now() + + def write_metric(self, label: str, values: Iterable[int]) -> None: + ordered = sorted(values) + self.stdout.write(f"{label}:") + self.stdout.write(f" median: {median(ordered):.0f}") + self.stdout.write(f" average: {mean(ordered):.0f}") + self.stdout.write(f" p75: {self.percentile(ordered, 75):.0f}") + self.stdout.write(f" p90: {self.percentile(ordered, 90):.0f}") + + def percentile(self, values: list[int], percent: int) -> float: + if len(values) == 1: + return values[0] + position = (len(values) - 1) * percent / 100 + lower = int(position) + upper = min(lower + 1, len(values) - 1) + weight = position - lower + return values[lower] * (1 - weight) + values[upper] * weight diff --git a/weblate/trans/tests/test_commands.py b/weblate/trans/tests/test_commands.py index 74066c60abdc..6ddd7881bd14 100644 --- a/weblate/trans/tests/test_commands.py +++ b/weblate/trans/tests/test_commands.py @@ -16,19 +16,24 @@ from weblate.accounts.models import Profile from weblate.runner import main +from weblate.trans.actions import ActionEvents from weblate.trans.file_format_params import ( FILE_FORMATS_PARAMS, BaseFileFormatParam, register_file_format_param, ) -from weblate.trans.models import Component, Translation +from weblate.trans.models import Change, Component, Translation from weblate.trans.tests.test_models import RepoTestCase from weblate.trans.tests.test_views import ( ComponentTestCase, FixtureComponentTestCase, ViewTestCase, ) -from weblate.trans.tests.utils import create_test_user, get_test_file +from weblate.trans.tests.utils import ( + create_another_user, + create_test_user, + get_test_file, +) from weblate.vcs.mercurial import HgRepository TEST_PO = get_test_file("cs.po") @@ -47,6 +52,61 @@ def test_help(self) -> None: sys.stdout = restore +class AnalyzeTranslatorWorkTest(ComponentTestCase): + def create_change(self, author, user) -> None: + unit = self.get_unit() + Change.objects.create( + action=ActionEvents.ACCEPT, + author=author, + user=user, + unit=unit, + translation=self.translation, + component=self.component, + project=self.project, + language=self.translation.language, + ) + + def test_accepted_suggestions_are_grouped_by_author(self) -> None: + reviewer = self.user + first_author = create_another_user("-first") + second_author = create_another_user("-second") + for _unused in range(3): + self.create_change(first_author, reviewer) + self.create_change(second_author, reviewer) + + output = StringIO() + call_command( + "analyze_translator_work", + days=1, + min_changes=1, + stdout=output, + ) + + result = output.getvalue() + self.assertIn("User days: 2 included, 0 excluded", result) + self.assertIn(" median: 3", result) + + def test_component_filter_uses_full_path(self) -> None: + category = self.create_category(self.project) + self.component.category = category + self.component.save(update_fields=["category"]) + for _unused in range(3): + self.create_change(self.user, self.user) + + output = StringIO() + call_command( + "analyze_translator_work", + component="/".join(self.component.get_url_path()), + days=1, + min_changes=1, + stdout=output, + ) + + result = output.getvalue() + self.assertIn("User days: 1 included, 0 excluded", result) + self.assertIn(" median: 3", result) + + class ImportProjectTest(RepoTestCase): def do_import(self, path=None, **kwargs) -> None: with override_settings(CREATE_GLOSSARIES=self.CREATE_GLOSSARIES):