From 4034c5b1e9af14bf9f01a9ed4a664057f16a883f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Mon, 15 Jun 2026 10:11:43 +0200
Subject: [PATCH 1/3] feat(trans): add translator work analysis command

---
 docs/admin/management.rst                     |  53 +++++
 docs/changes.rst                              |   1 +
 .../commands/analyze_translator_work.py       | 187 ++++++++++++++++++
 3 files changed, 241 insertions(+)
 create mode 100644 weblate/trans/management/commands/analyze_translator_work.py

diff --git a/docs/admin/management.rst b/docs/admin/management.rst
index 0bd90375d52d..68c095eeec62 100644
--- a/docs/admin/management.rst
+++ b/docs/admin/management.rst
@@ -96,6 +96,59 @@ Example:
     weblate --author michal@cihar.com add_suggestions weblate application cs /tmp/suggestions-cs.po
 
 
+analyze_translator_work
+-----------------------
+
+.. weblate-admin:: analyze_translator_work
+
+Analyzes change history to estimate realistic translator throughput per day.
+The command includes only active human users and unit-backed manual translation
+changes, and applies daily minimum and maximum thresholds to skip obvious outliers,
+bots, bulk imports, uploads, and other events that would pollute the result.
+
+.. weblate-admin-option:: --days DAYS
+
+    Number of recent days to analyze when ``--since`` is not specified.
+
+.. weblate-admin-option:: --since YYYY-MM-DD
+
+    Start date for the analysis.
+
+.. weblate-admin-option:: --until YYYY-MM-DD
+
+    End date for the analysis.
+
+.. weblate-admin-option:: --project PROJECT
+
+    Limit the analysis to a project slug.
+
+.. weblate-admin-option:: --component PROJECT/COMPONENT
+
+    Limit the analysis to a component.
+
+.. weblate-admin-option:: --language LANGUAGE
+
+    Limit the analysis to a language code.
+
+.. weblate-admin-option:: --min-changes COUNT
+
+    Minimum translated strings per user day to include.
+
+.. weblate-admin-option:: --max-changes COUNT
+
+    Maximum translated strings per user day to include.
+
+.. weblate-admin-option:: --max-words COUNT
+
+    Maximum translated source words per user day to include.
+
+Example:
+
+.. code-block:: sh
+
+    weblate analyze_translator_work --project weblate --since 2026-01-01
+
+
 auto_translate
 --------------
 
diff --git a/docs/changes.rst b/docs/changes.rst
index 4b5dc377ddb4..f601b185ce6f 100644
--- a/docs/changes.rst
+++ b/docs/changes.rst
@@ -25,6 +25,7 @@ Weblate 2026.7
 * Added :ref:`distribution-packaging` guidance for distribution maintainers.
 * Large component imports now avoid duplicate translation-memory processing.
 * :ref:`gettext` files can now be configured to remove obsolete strings on save.
+* Added :wladmin:`analyze_translator_work` to estimate realistic daily translator throughput from change history.
 
 .. rubric:: Bug fixes
 
diff --git a/weblate/trans/management/commands/analyze_translator_work.py b/weblate/trans/management/commands/analyze_translator_work.py
new file mode 100644
index 000000000000..7c4cce90aa1c
--- /dev/null
+++ b/weblate/trans/management/commands/analyze_translator_work.py
@@ -0,0 +1,187 @@
+# Copyright © Michal Čihař <michal@weblate.org>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+from statistics import mean, median
+from typing import TYPE_CHECKING, Any
+
+from django.core.management.base import CommandError
+from django.db.models import Count, Sum
+from django.db.models.functions import TruncDate
+from django.utils import timezone
+from django.utils.dateparse import parse_date
+
+from weblate.trans.actions import ActionEvents
+from weblate.trans.models import Change
+from weblate.utils.management.base import BaseCommand
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from django.core.management.base import CommandParser
+    from django.db.models import QuerySet
+
+
+TRANSLATOR_ACTIONS = (
+    ActionEvents.CHANGE,
+    ActionEvents.NEW,
+    ActionEvents.ACCEPT,
+)
+
+
+class Command(BaseCommand):
+    help = "Analyze realistic daily translator throughput from change history"
+
+    def add_arguments(self, parser: CommandParser) -> None:
+        parser.add_argument(
+            "--days",
+            type=int,
+            default=365,
+            help="Number of recent days to analyze when --since is not specified",
+        )
+        parser.add_argument(
+            "--since",
+            help="Start date to analyze, in YYYY-MM-DD format",
+        )
+        parser.add_argument(
+            "--until",
+            help="End date to analyze, in YYYY-MM-DD format (defaults to now)",
+        )
+        parser.add_argument(
+            "--project",
+            help="Limit analysis to a project slug",
+        )
+        parser.add_argument(
+            "--component",
+            help="Limit analysis to a component path as project/component",
+        )
+        parser.add_argument(
+            "--language",
+            help="Limit analysis to a language code",
+        )
+        parser.add_argument(
+            "--min-changes",
+            type=int,
+            default=5,
+            help="Minimum translated strings per user day to include",
+        )
+        parser.add_argument(
+            "--max-changes",
+            type=int,
+            default=1000,
+            help="Maximum translated strings per user day to include",
+        )
+        parser.add_argument(
+            "--max-words",
+            type=int,
+            default=10000,
+            help="Maximum translated source words per user day to include",
+        )
+
+    def handle(self, *args: Any, **options: Any) -> None:
+        since = self.get_since(options)
+        until = self.get_until(options)
+        if since >= until:
+            msg = "The analysis start date must be before the end date."
+            raise CommandError(msg)
+
+        base = self.get_queryset(options).filter(
+            timestamp__gte=since, timestamp__lt=until
+        )
+        rows = list(
+            base.annotate(day=TruncDate("timestamp"))
+            .values("day", "user_id", "user__username")
+            .annotate(strings=Count("id"), words=Sum("unit__num_words"))
+            .order_by("day", "user__username")
+        )
+
+        included = [
+            row
+            for row in rows
+            if options["min_changes"] <= row["strings"] <= options["max_changes"]
+            and (row["words"] or 0) <= options["max_words"]
+        ]
+        excluded = len(rows) - len(included)
+
+        self.stdout.write("Translator work analysis")
+        self.stdout.write(f"Period: {since:%Y-%m-%d} to {until:%Y-%m-%d}")
+        self.stdout.write(
+            "Included actions: "
+            + ", ".join(
+                ActionEvents(action).name.lower() for action in TRANSLATOR_ACTIONS
+            )
+        )
+        self.stdout.write(
+            "Filtering: active human users, unit-backed changes, "
+            f"{options['min_changes']}..{options['max_changes']} strings/day, "
+            f"up to {options['max_words']} words/day"
+        )
+        self.stdout.write(f"User days: {len(included)} included, {excluded} excluded")
+
+        if not included:
+            self.stdout.write("No matching translator activity found.")
+            return
+
+        strings = [row["strings"] for row in included]
+        words = [row["words"] or 0 for row in included]
+        self.stdout.write("")
+        self.write_metric("Translated strings per day", strings)
+        self.write_metric("Source words per day", words)
+
+    def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]:
+        queryset = Change.objects.filter(
+            action__in=TRANSLATOR_ACTIONS,
+            unit__isnull=False,
+            user__isnull=False,
+            user__is_active=True,
+            user__is_bot=False,
+        )
+        if options["project"]:
+            queryset = queryset.filter(project__slug=options["project"])
+        if options["component"]:
+            try:
+                project, component = options["component"].split("/", 1)
+            except ValueError as error:
+                msg = "Component must be specified as project/component."
+                raise CommandError(msg) from error
+            queryset = queryset.filter(project__slug=project, component__slug=component)
+        if options["language"]:
+            queryset = queryset.filter(language__code=options["language"])
+        return queryset
+
+    def get_since(self, options: dict[str, Any]) -> datetime:
+        if options["since"]:
+            parsed = parse_date(options["since"])
+            if parsed is None:
+                msg = "--since must use YYYY-MM-DD format."
+                raise CommandError(msg)
+            return timezone.make_aware(datetime.combine(parsed, datetime.min.time()))
+        return timezone.now() - timedelta(days=options["days"])
+
+    def get_until(self, options: dict[str, Any]) -> datetime:
+        if options["until"]:
+            parsed = parse_date(options["until"])
+            if parsed is None:
+                msg = "--until must use YYYY-MM-DD format."
+                raise CommandError(msg)
+            return timezone.make_aware(datetime.combine(parsed, datetime.max.time()))
+        return timezone.now()
+
+    def write_metric(self, label: str, values: Iterable[int]) -> None:
+        ordered = sorted(values)
+        self.stdout.write(f"{label}:")
+        self.stdout.write(f"  median: {median(ordered):.0f}")
+        self.stdout.write(f"  average: {mean(ordered):.0f}")
+        self.stdout.write(f"  p75: {self.percentile(ordered, 75):.0f}")
+        self.stdout.write(f"  p90: {self.percentile(ordered, 90):.0f}")
+
+    def percentile(self, values: list[int], percent: int) -> float:
+        if len(values) == 1:
+            return values[0]
+        position = (len(values) - 1) * percent / 100
+        lower = int(position)
+        upper = min(lower + 1, len(values) - 1)
+        weight = position - lower
+        return values[lower] * (1 - weight) + values[upper] * weight

From 2c4235ddc82f622f64e577b7bd3bad8b5c20b726 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Mon, 15 Jun 2026 10:51:46 +0200
Subject: [PATCH 2/3] feat(trans): add translator work analysis command

### Motivation
- Provide a lightweight management command to estimate realistic daily translator throughput by analyzing change history while avoiding noisy data (bots, bulk uploads, automated events and obvious outliers).
- Make the analysis usable for projects/components/languages and bounded by date ranges for operational planning and reporting.

### Description
- Add `weblate/trans/management/commands/analyze_translator_work.py` which groups unit-backed changes by user/day (actions: `CHANGE`, `NEW`, `ACCEPT`), filters to active non-bot users, and computes median/average/p75/p90 for translated strings and source words.
- Add command options: `--days`, `--since`, `--until`, `--project`, `--component`, `--language`, `--min-changes`, `--max-changes`, and `--max-words` to scope and filter results.
- Update documentation at `docs/admin/management.rst` to document the new command and add a changelog note in `docs/changes.rst` announcing the feature.

### Testing
- `python -m compileall weblate/trans/management/commands/analyze_translator_work.py` completed successfully.
- `.venv/bin/python manage.py analyze_translator_work --help` ran and displayed the expected help text.
- `git diff --cached --check` passed with no problems reported for the changed files.
- `uv run prek run ruff --files ...` could not be executed due to an environment network restriction when initializing hooks (external hook repo cloning failed), and `uv run mypy ...` produced existing project-wide type findings unrelated to the new command file.
---
 .../commands/analyze_translator_work.py       | 39 ++++++++--------
 weblate/trans/tests/test_commands.py          | 44 ++++++++++++++++++-
 2 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/weblate/trans/management/commands/analyze_translator_work.py b/weblate/trans/management/commands/analyze_translator_work.py
index 7c4cce90aa1c..fbcd7114c30c 100644
--- a/weblate/trans/management/commands/analyze_translator_work.py
+++ b/weblate/trans/management/commands/analyze_translator_work.py
@@ -5,7 +5,7 @@
 
 from datetime import datetime, timedelta
 from statistics import mean, median
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, cast
 
 from django.core.management.base import CommandError
 from django.db.models import Count, Sum
@@ -80,7 +80,7 @@ def add_arguments(self, parser: CommandParser) -> None:
             help="Maximum translated source words per user day to include",
         )
 
-    def handle(self, *args: Any, **options: Any) -> None:
+    def handle(self, *args: object, **options: object) -> None:
         since = self.get_since(options)
         until = self.get_until(options)
         if since >= until:
@@ -92,16 +92,19 @@ def handle(self, *args: Any, **options: Any) -> None:
         )
         rows = list(
             base.annotate(day=TruncDate("timestamp"))
-            .values("day", "user_id", "user__username")
+            .values("day", "author_id", "author__username")
             .annotate(strings=Count("id"), words=Sum("unit__num_words"))
-            .order_by("day", "user__username")
+            .order_by("day", "author__username")
         )
 
+        min_changes = cast("int", options["min_changes"])
+        max_changes = cast("int", options["max_changes"])
+        max_words = cast("int", options["max_words"])
         included = [
             row
             for row in rows
-            if options["min_changes"] <= row["strings"] <= options["max_changes"]
-            and (row["words"] or 0) <= options["max_words"]
+            if min_changes <= row["strings"] <= max_changes
+            and (row["words"] or 0) <= max_words
         ]
         excluded = len(rows) - len(included)
 
@@ -115,8 +118,8 @@ def handle(self, *args: Any, **options: Any) -> None:
         )
         self.stdout.write(
             "Filtering: active human users, unit-backed changes, "
-            f"{options['min_changes']}..{options['max_changes']} strings/day, "
-            f"up to {options['max_words']} words/day"
+            f"{min_changes}..{max_changes} strings/day, "
+            f"up to {max_words} words/day"
         )
         self.stdout.write(f"User days: {len(included)} included, {excluded} excluded")
 
@@ -130,19 +133,19 @@ def handle(self, *args: Any, **options: Any) -> None:
         self.write_metric("Translated strings per day", strings)
         self.write_metric("Source words per day", words)
 
-    def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]:
+    def get_queryset(self, options: dict[str, object]) -> QuerySet[Change]:
         queryset = Change.objects.filter(
             action__in=TRANSLATOR_ACTIONS,
             unit__isnull=False,
-            user__isnull=False,
-            user__is_active=True,
-            user__is_bot=False,
+            author__isnull=False,
+            author__is_active=True,
+            author__is_bot=False,
         )
         if options["project"]:
             queryset = queryset.filter(project__slug=options["project"])
         if options["component"]:
             try:
-                project, component = options["component"].split("/", 1)
+                project, component = cast("str", options["component"]).split("/", 1)
             except ValueError as error:
                 msg = "Component must be specified as project/component."
                 raise CommandError(msg) from error
@@ -151,18 +154,18 @@ def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]:
             queryset = queryset.filter(language__code=options["language"])
         return queryset
 
-    def get_since(self, options: dict[str, Any]) -> datetime:
+    def get_since(self, options: dict[str, object]) -> datetime:
         if options["since"]:
-            parsed = parse_date(options["since"])
+            parsed = parse_date(cast("str", options["since"]))
             if parsed is None:
                 msg = "--since must use YYYY-MM-DD format."
                 raise CommandError(msg)
             return timezone.make_aware(datetime.combine(parsed, datetime.min.time()))
-        return timezone.now() - timedelta(days=options["days"])
+        return timezone.now() - timedelta(days=cast("int", options["days"]))
 
-    def get_until(self, options: dict[str, Any]) -> datetime:
+    def get_until(self, options: dict[str, object]) -> datetime:
         if options["until"]:
-            parsed = parse_date(options["until"])
+            parsed = parse_date(cast("str", options["until"]))
             if parsed is None:
                 msg = "--until must use YYYY-MM-DD format."
                 raise CommandError(msg)
diff --git a/weblate/trans/tests/test_commands.py b/weblate/trans/tests/test_commands.py
index 74066c60abdc..cf0d51205d3a 100644
--- a/weblate/trans/tests/test_commands.py
+++ b/weblate/trans/tests/test_commands.py
@@ -16,19 +16,24 @@
 
 from weblate.accounts.models import Profile
 from weblate.runner import main
+from weblate.trans.actions import ActionEvents
 from weblate.trans.file_format_params import (
     FILE_FORMATS_PARAMS,
     BaseFileFormatParam,
     register_file_format_param,
 )
-from weblate.trans.models import Component, Translation
+from weblate.trans.models import Change, Component, Translation
 from weblate.trans.tests.test_models import RepoTestCase
 from weblate.trans.tests.test_views import (
     ComponentTestCase,
     FixtureComponentTestCase,
     ViewTestCase,
 )
-from weblate.trans.tests.utils import create_test_user, get_test_file
+from weblate.trans.tests.utils import (
+    create_another_user,
+    create_test_user,
+    get_test_file,
+)
 from weblate.vcs.mercurial import HgRepository
 
 TEST_PO = get_test_file("cs.po")
@@ -47,6 +52,41 @@ def test_help(self) -> None:
             sys.stdout = restore
 
 
+class AnalyzeTranslatorWorkTest(ComponentTestCase):
+    def create_change(self, author, user) -> None:
+        unit = self.get_unit()
+        Change.objects.create(
+            action=ActionEvents.ACCEPT,
+            author=author,
+            user=user,
+            unit=unit,
+            translation=self.translation,
+            component=self.component,
+            project=self.project,
+            language=self.translation.language,
+        )
+
+    def test_accepted_suggestions_are_grouped_by_author(self) -> None:
+        reviewer = self.user
+        first_author = create_another_user("-first")
+        second_author = create_another_user("-second")
+        for _unused in range(3):
+            self.create_change(first_author, reviewer)
+            self.create_change(second_author, reviewer)
+
+        output = StringIO()
+        call_command(
+            "analyze_translator_work",
+            days=1,
+            min_changes=1,
+            stdout=output,
+        )
+
+        result = output.getvalue()
+        self.assertIn("User days: 2 included, 0 excluded", result)
+        self.assertIn("  median: 3", result)
+
+
 class ImportProjectTest(RepoTestCase):
     def do_import(self, path=None, **kwargs) -> None:
         with override_settings(CREATE_GLOSSARIES=self.CREATE_GLOSSARIES):

From 6cf086778f23b0018c3bbf9e2ec0b18c93f8c369 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Mon, 15 Jun 2026 11:10:14 +0200
Subject: [PATCH 3/3] Add `analyze_translator_work` management command with
 docs and tests

### Motivation

* Provide a way to estimate realistic daily translator throughput from change history while excluding bots, bulk imports, uploads and other outliers.

### Description

* Add new management command `analyze_translator_work` implemented in `weblate/trans/management/commands/analyze_translator_work.py` that aggregates change rows per user-day, filters by activity thresholds and outputs median/average/p75/p90 metrics for strings and source words per day.
* Command accepts arguments `--days`, `--since`, `--until`, `--project`, `--component`, `--language`, `--min-changes`, `--max-changes`, and `--max-words` and restricts the queryset to active human users and unit-backed changes.
* Document the new admin command in `docs/admin/management.rst` and add a release note entry in `docs/changes.rst`.
* Add unit test `AnalyzeTranslatorWorkTest` in `weblate/trans/tests/test_commands.py` (and adjust related imports) to validate grouping and reporting behavior for accepted suggestion changes.

### Testing

* Ran the Django unit tests including the new `AnalyzeTranslatorWorkTest` which invokes `call_command('analyze_translator_work', ...)`, and the test passed.
* Existing management command tests were exercised as part of the test run and completed successfully.
---
 .../commands/analyze_translator_work.py       | 15 +++++++-------
 weblate/trans/tests/test_commands.py          | 20 +++++++++++++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/weblate/trans/management/commands/analyze_translator_work.py b/weblate/trans/management/commands/analyze_translator_work.py
index fbcd7114c30c..8c8c8bbd3030 100644
--- a/weblate/trans/management/commands/analyze_translator_work.py
+++ b/weblate/trans/management/commands/analyze_translator_work.py
@@ -14,7 +14,7 @@
 from django.utils.dateparse import parse_date
 
 from weblate.trans.actions import ActionEvents
-from weblate.trans.models import Change
+from weblate.trans.models import Change, Component
 from weblate.utils.management.base import BaseCommand
 
 if TYPE_CHECKING:
@@ -144,12 +144,13 @@ def get_queryset(self, options: dict[str, object]) -> QuerySet[Change]:
         if options["project"]:
             queryset = queryset.filter(project__slug=options["project"])
         if options["component"]:
-            try:
-                project, component = cast("str", options["component"]).split("/", 1)
-            except ValueError as error:
-                msg = "Component must be specified as project/component."
-                raise CommandError(msg) from error
-            queryset = queryset.filter(project__slug=project, component__slug=component)
+            components = Component.objects.filter_by_path(
+                cast("str", options["component"])
+            )
+            if not components.exists():
+                msg = "No matching component found."
+                raise CommandError(msg)
+            queryset = queryset.filter(component__in=components)
         if options["language"]:
             queryset = queryset.filter(language__code=options["language"])
         return queryset
diff --git a/weblate/trans/tests/test_commands.py b/weblate/trans/tests/test_commands.py
index cf0d51205d3a..6ddd7881bd14 100644
--- a/weblate/trans/tests/test_commands.py
+++ b/weblate/trans/tests/test_commands.py
@@ -86,6 +86,26 @@ def test_accepted_suggestions_are_grouped_by_author(self) -> None:
         self.assertIn("User days: 2 included, 0 excluded", result)
         self.assertIn("  median: 3", result)
 
+    def test_component_filter_uses_full_path(self) -> None:
+        category = self.create_category(self.project)
+        self.component.category = category
+        self.component.save(update_fields=["category"])
+        for _unused in range(3):
+            self.create_change(self.user, self.user)
+
+        output = StringIO()
+        call_command(
+            "analyze_translator_work",
+            component="/".join(self.component.get_url_path()),
+            days=1,
+            min_changes=1,
+            stdout=output,
+        )
+
+        result = output.getvalue()
+        self.assertIn("User days: 1 included, 0 excluded", result)
+        self.assertIn("  median: 3", result)
+
 
 class ImportProjectTest(RepoTestCase):
     def do_import(self, path=None, **kwargs) -> None: