Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions docs/admin/management.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,59 @@ Example:
weblate --author michal@cihar.com add_suggestions weblate application cs /tmp/suggestions-cs.po


analyze_translator_work
-----------------------

.. weblate-admin:: analyze_translator_work

Analyzes change history to estimate realistic translator throughput per day.
The command includes only active human users and unit-backed manual translation
changes, and applies daily minimum and maximum thresholds to skip obvious outliers,
bots, bulk imports, uploads, and other events that would pollute the result.

.. weblate-admin-option:: --days DAYS

Number of recent days to analyze when ``--since`` is not specified.

.. weblate-admin-option:: --since YYYY-MM-DD

Start date for the analysis.

.. weblate-admin-option:: --until YYYY-MM-DD

End date for the analysis.

.. weblate-admin-option:: --project PROJECT

Limit the analysis to a project slug.

.. weblate-admin-option:: --component PROJECT/COMPONENT

Limit the analysis to a component.

.. weblate-admin-option:: --language LANGUAGE

Limit the analysis to a language code.

.. weblate-admin-option:: --min-changes COUNT

Minimum translated strings per user day to include.

.. weblate-admin-option:: --max-changes COUNT

Maximum translated strings per user day to include.

.. weblate-admin-option:: --max-words COUNT

Maximum translated source words per user day to include.

Example:

.. code-block:: sh

weblate analyze_translator_work --project weblate --since 2026-01-01


auto_translate
--------------

Expand Down
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Weblate 2026.7
* Added :ref:`distribution-packaging` guidance for distribution maintainers.
* Large component imports now avoid duplicate translation-memory processing.
* :ref:`gettext` files can now be configured to remove obsolete strings on save.
* Added :wladmin:`analyze_translator_work` to estimate realistic daily translator throughput from change history.

.. rubric:: Bug fixes

Expand Down
191 changes: 191 additions & 0 deletions weblate/trans/management/commands/analyze_translator_work.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import annotations

from datetime import datetime, timedelta
from statistics import mean, median
from typing import TYPE_CHECKING, cast

from django.core.management.base import CommandError
from django.db.models import Count, Sum
from django.db.models.functions import TruncDate
from django.utils import timezone
from django.utils.dateparse import parse_date

from weblate.trans.actions import ActionEvents
from weblate.trans.models import Change, Component
from weblate.utils.management.base import BaseCommand

if TYPE_CHECKING:
from collections.abc import Iterable

from django.core.management.base import CommandParser
from django.db.models import QuerySet


TRANSLATOR_ACTIONS = (
ActionEvents.CHANGE,
ActionEvents.NEW,
ActionEvents.ACCEPT,
)


class Command(BaseCommand):
help = "Analyze realistic daily translator throughput from change history"

def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"--days",
type=int,
default=365,
help="Number of recent days to analyze when --since is not specified",
)
parser.add_argument(
"--since",
help="Start date to analyze, in YYYY-MM-DD format",
)
parser.add_argument(
"--until",
help="End date to analyze, in YYYY-MM-DD format (defaults to now)",
)
parser.add_argument(
"--project",
help="Limit analysis to a project slug",
)
parser.add_argument(
"--component",
help="Limit analysis to a component path as project/component",
)
parser.add_argument(
"--language",
help="Limit analysis to a language code",
)
parser.add_argument(
"--min-changes",
type=int,
default=5,
help="Minimum translated strings per user day to include",
)
parser.add_argument(
"--max-changes",
type=int,
default=1000,
help="Maximum translated strings per user day to include",
)
parser.add_argument(
"--max-words",
type=int,
default=10000,
help="Maximum translated source words per user day to include",
)

def handle(self, *args: object, **options: object) -> None:
since = self.get_since(options)
until = self.get_until(options)
if since >= until:
msg = "The analysis start date must be before the end date."
raise CommandError(msg)

base = self.get_queryset(options).filter(
timestamp__gte=since, timestamp__lt=until
)
rows = list(
base.annotate(day=TruncDate("timestamp"))
.values("day", "author_id", "author__username")
.annotate(strings=Count("id"), words=Sum("unit__num_words"))
.order_by("day", "author__username")
)

min_changes = cast("int", options["min_changes"])
max_changes = cast("int", options["max_changes"])
max_words = cast("int", options["max_words"])
included = [
row
for row in rows
if min_changes <= row["strings"] <= max_changes
and (row["words"] or 0) <= max_words
]
excluded = len(rows) - len(included)

self.stdout.write("Translator work analysis")
self.stdout.write(f"Period: {since:%Y-%m-%d} to {until:%Y-%m-%d}")
self.stdout.write(
"Included actions: "
+ ", ".join(
ActionEvents(action).name.lower() for action in TRANSLATOR_ACTIONS
)
)
self.stdout.write(
"Filtering: active human users, unit-backed changes, "
f"{min_changes}..{max_changes} strings/day, "
f"up to {max_words} words/day"
)
self.stdout.write(f"User days: {len(included)} included, {excluded} excluded")

if not included:
self.stdout.write("No matching translator activity found.")
return

strings = [row["strings"] for row in included]
words = [row["words"] or 0 for row in included]
self.stdout.write("")
self.write_metric("Translated strings per day", strings)
self.write_metric("Source words per day", words)

def get_queryset(self, options: dict[str, object]) -> QuerySet[Change]:
queryset = Change.objects.filter(
action__in=TRANSLATOR_ACTIONS,
unit__isnull=False,
author__isnull=False,
author__is_active=True,
author__is_bot=False,
)
if options["project"]:
queryset = queryset.filter(project__slug=options["project"])
if options["component"]:
components = Component.objects.filter_by_path(
cast("str", options["component"])
)
if not components.exists():
msg = "No matching component found."
raise CommandError(msg)
queryset = queryset.filter(component__in=components)
if options["language"]:
queryset = queryset.filter(language__code=options["language"])
return queryset

def get_since(self, options: dict[str, object]) -> datetime:
if options["since"]:
parsed = parse_date(cast("str", options["since"]))
if parsed is None:
msg = "--since must use YYYY-MM-DD format."
raise CommandError(msg)
return timezone.make_aware(datetime.combine(parsed, datetime.min.time()))
return timezone.now() - timedelta(days=cast("int", options["days"]))

def get_until(self, options: dict[str, object]) -> datetime:
if options["until"]:
parsed = parse_date(cast("str", options["until"]))
if parsed is None:
msg = "--until must use YYYY-MM-DD format."
raise CommandError(msg)
return timezone.make_aware(datetime.combine(parsed, datetime.max.time()))
return timezone.now()

def write_metric(self, label: str, values: Iterable[int]) -> None:
ordered = sorted(values)
self.stdout.write(f"{label}:")
self.stdout.write(f" median: {median(ordered):.0f}")
self.stdout.write(f" average: {mean(ordered):.0f}")
self.stdout.write(f" p75: {self.percentile(ordered, 75):.0f}")
self.stdout.write(f" p90: {self.percentile(ordered, 90):.0f}")

def percentile(self, values: list[int], percent: int) -> float:
if len(values) == 1:
return values[0]
position = (len(values) - 1) * percent / 100
lower = int(position)
upper = min(lower + 1, len(values) - 1)
weight = position - lower
return values[lower] * (1 - weight) + values[upper] * weight
64 changes: 62 additions & 2 deletions weblate/trans/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,24 @@

from weblate.accounts.models import Profile
from weblate.runner import main
from weblate.trans.actions import ActionEvents
from weblate.trans.file_format_params import (
FILE_FORMATS_PARAMS,
BaseFileFormatParam,
register_file_format_param,
)
from weblate.trans.models import Component, Translation
from weblate.trans.models import Change, Component, Translation
from weblate.trans.tests.test_models import RepoTestCase
from weblate.trans.tests.test_views import (
ComponentTestCase,
FixtureComponentTestCase,
ViewTestCase,
)
from weblate.trans.tests.utils import create_test_user, get_test_file
from weblate.trans.tests.utils import (
create_another_user,
create_test_user,
get_test_file,
)
from weblate.vcs.mercurial import HgRepository

TEST_PO = get_test_file("cs.po")
Expand All @@ -47,6 +52,61 @@
sys.stdout = restore


class AnalyzeTranslatorWorkTest(ComponentTestCase):
def create_change(self, author, user) -> None:
unit = self.get_unit()
Change.objects.create(
action=ActionEvents.ACCEPT,
author=author,
user=user,
unit=unit,
translation=self.translation,
component=self.component,
project=self.project,
language=self.translation.language,
)

def test_accepted_suggestions_are_grouped_by_author(self) -> None:
reviewer = self.user
first_author = create_another_user("-first")
second_author = create_another_user("-second")
for _unused in range(3):
self.create_change(first_author, reviewer)
self.create_change(second_author, reviewer)

output = StringIO()
call_command(
"analyze_translator_work",
days=1,
min_changes=1,
stdout=output,
)

result = output.getvalue()
self.assertIn("User days: 2 included, 0 excluded", result)
self.assertIn(" median: 3", result)

def test_component_filter_uses_full_path(self) -> None:
category = self.create_category(self.project)
self.component.category = category
self.component.save(update_fields=["category"])
for _unused in range(3):
self.create_change(self.user, self.user)

output = StringIO()
call_command(
"analyze_translator_work",
component="/".join(self.component.get_url_path()),
days=1,
min_changes=1,
stdout=output,
)

result = output.getvalue()
self.assertIn("User days: 1 included, 0 excluded", result)
self.assertIn(" median: 3", result)


class ImportProjectTest(RepoTestCase):
def do_import(self, path=None, **kwargs) -> None:
with override_settings(CREATE_GLOSSARIES=self.CREATE_GLOSSARIES):
Expand Down Expand Up @@ -700,7 +760,7 @@
class DocumentationCommandTest(TestCase):
def test_list_file_format_params(self) -> None:
class TestJSONFileFormatParam(BaseFileFormatParam):
name = "json-test"

Check failure on line 763 in weblate/trans/tests/test_commands.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "Literal['json-test']", base class "BaseFileFormatParam" defined the type as "Literal['dos_eol', 'json_sort_keys', 'json_indent', 'json_indent_style', 'json_use_compact_separators', 'po_line_wrap', 'po_keep_previous', 'po_remove_obsolete', 'po_no_location', 'po_fuzzy_matching', 'po_set_language_team', 'po_set_last_translator', 'po_set_x_generator', 'po_report_msgid_bugs_to', 'yaml_indent', 'yaml_line_wrap', 'yaml_line_break', 'xml_closing_tags', 'flatxml_root_name', 'flatxml_value_name', 'flatxml_key_name', 'strings_encoding', 'properties_encoding', 'csv_encoding', 'csv_simple_encoding', 'gwt_encoding', 'merge_duplicates', 'line_max_length', 'md_extract_code_blocks', 'md_extract_frontmatter', 'md_no_placeholders']")
label = "JSONTest"
file_formats = ("test", "json")
help_text = "Test JSON file format parameter"
Expand Down
Loading