|
| 1 | +# Copyright © Michal Čihař <michal@weblate.org> |
| 2 | +# |
| 3 | +# SPDX-License-Identifier: GPL-3.0-or-later |
| 4 | +from __future__ import annotations |
| 5 | + |
| 6 | +from datetime import datetime, timedelta |
| 7 | +from statistics import mean, median |
| 8 | +from typing import TYPE_CHECKING, Any |
| 9 | + |
| 10 | +from django.core.management.base import CommandError |
| 11 | +from django.db.models import Count, Sum |
| 12 | +from django.db.models.functions import TruncDate |
| 13 | +from django.utils import timezone |
| 14 | +from django.utils.dateparse import parse_date |
| 15 | + |
| 16 | +from weblate.trans.actions import ActionEvents |
| 17 | +from weblate.trans.models import Change |
| 18 | +from weblate.utils.management.base import BaseCommand |
| 19 | + |
| 20 | +if TYPE_CHECKING: |
| 21 | + from collections.abc import Iterable |
| 22 | + |
| 23 | + from django.core.management.base import CommandParser |
| 24 | + from django.db.models import QuerySet |
| 25 | + |
| 26 | + |
| 27 | +TRANSLATOR_ACTIONS = ( |
| 28 | + ActionEvents.CHANGE, |
| 29 | + ActionEvents.NEW, |
| 30 | + ActionEvents.ACCEPT, |
| 31 | +) |
| 32 | + |
| 33 | + |
| 34 | +class Command(BaseCommand): |
| 35 | + help = "Analyze realistic daily translator throughput from change history" |
| 36 | + |
| 37 | + def add_arguments(self, parser: CommandParser) -> None: |
| 38 | + parser.add_argument( |
| 39 | + "--days", |
| 40 | + type=int, |
| 41 | + default=365, |
| 42 | + help="Number of recent days to analyze when --since is not specified", |
| 43 | + ) |
| 44 | + parser.add_argument( |
| 45 | + "--since", |
| 46 | + help="Start date to analyze, in YYYY-MM-DD format", |
| 47 | + ) |
| 48 | + parser.add_argument( |
| 49 | + "--until", |
| 50 | + help="End date to analyze, in YYYY-MM-DD format (defaults to now)", |
| 51 | + ) |
| 52 | + parser.add_argument( |
| 53 | + "--project", |
| 54 | + help="Limit analysis to a project slug", |
| 55 | + ) |
| 56 | + parser.add_argument( |
| 57 | + "--component", |
| 58 | + help="Limit analysis to a component path as project/component", |
| 59 | + ) |
| 60 | + parser.add_argument( |
| 61 | + "--language", |
| 62 | + help="Limit analysis to a language code", |
| 63 | + ) |
| 64 | + parser.add_argument( |
| 65 | + "--min-changes", |
| 66 | + type=int, |
| 67 | + default=5, |
| 68 | + help="Minimum translated strings per user day to include", |
| 69 | + ) |
| 70 | + parser.add_argument( |
| 71 | + "--max-changes", |
| 72 | + type=int, |
| 73 | + default=1000, |
| 74 | + help="Maximum translated strings per user day to include", |
| 75 | + ) |
| 76 | + parser.add_argument( |
| 77 | + "--max-words", |
| 78 | + type=int, |
| 79 | + default=10000, |
| 80 | + help="Maximum translated source words per user day to include", |
| 81 | + ) |
| 82 | + |
| 83 | + def handle(self, *args: Any, **options: Any) -> None: |
| 84 | + since = self.get_since(options) |
| 85 | + until = self.get_until(options) |
| 86 | + if since >= until: |
| 87 | + msg = "The analysis start date must be before the end date." |
| 88 | + raise CommandError(msg) |
| 89 | + |
| 90 | + base = self.get_queryset(options).filter( |
| 91 | + timestamp__gte=since, timestamp__lt=until |
| 92 | + ) |
| 93 | + rows = list( |
| 94 | + base.annotate(day=TruncDate("timestamp")) |
| 95 | + .values("day", "user_id", "user__username") |
| 96 | + .annotate(strings=Count("id"), words=Sum("unit__num_words")) |
| 97 | + .order_by("day", "user__username") |
| 98 | + ) |
| 99 | + |
| 100 | + included = [ |
| 101 | + row |
| 102 | + for row in rows |
| 103 | + if options["min_changes"] <= row["strings"] <= options["max_changes"] |
| 104 | + and (row["words"] or 0) <= options["max_words"] |
| 105 | + ] |
| 106 | + excluded = len(rows) - len(included) |
| 107 | + |
| 108 | + self.stdout.write("Translator work analysis") |
| 109 | + self.stdout.write(f"Period: {since:%Y-%m-%d} to {until:%Y-%m-%d}") |
| 110 | + self.stdout.write( |
| 111 | + "Included actions: " |
| 112 | + + ", ".join( |
| 113 | + ActionEvents(action).name.lower() for action in TRANSLATOR_ACTIONS |
| 114 | + ) |
| 115 | + ) |
| 116 | + self.stdout.write( |
| 117 | + "Filtering: active human users, unit-backed changes, " |
| 118 | + f"{options['min_changes']}..{options['max_changes']} strings/day, " |
| 119 | + f"up to {options['max_words']} words/day" |
| 120 | + ) |
| 121 | + self.stdout.write(f"User days: {len(included)} included, {excluded} excluded") |
| 122 | + |
| 123 | + if not included: |
| 124 | + self.stdout.write("No matching translator activity found.") |
| 125 | + return |
| 126 | + |
| 127 | + strings = [row["strings"] for row in included] |
| 128 | + words = [row["words"] or 0 for row in included] |
| 129 | + self.stdout.write("") |
| 130 | + self.write_metric("Translated strings per day", strings) |
| 131 | + self.write_metric("Source words per day", words) |
| 132 | + |
| 133 | + def get_queryset(self, options: dict[str, Any]) -> QuerySet[Change]: |
| 134 | + queryset = Change.objects.filter( |
| 135 | + action__in=TRANSLATOR_ACTIONS, |
| 136 | + unit__isnull=False, |
| 137 | + user__isnull=False, |
| 138 | + user__is_active=True, |
| 139 | + user__is_bot=False, |
| 140 | + ) |
| 141 | + if options["project"]: |
| 142 | + queryset = queryset.filter(project__slug=options["project"]) |
| 143 | + if options["component"]: |
| 144 | + try: |
| 145 | + project, component = options["component"].split("/", 1) |
| 146 | + except ValueError as error: |
| 147 | + msg = "Component must be specified as project/component." |
| 148 | + raise CommandError(msg) from error |
| 149 | + queryset = queryset.filter(project__slug=project, component__slug=component) |
| 150 | + if options["language"]: |
| 151 | + queryset = queryset.filter(language__code=options["language"]) |
| 152 | + return queryset |
| 153 | + |
| 154 | + def get_since(self, options: dict[str, Any]) -> datetime: |
| 155 | + if options["since"]: |
| 156 | + parsed = parse_date(options["since"]) |
| 157 | + if parsed is None: |
| 158 | + msg = "--since must use YYYY-MM-DD format." |
| 159 | + raise CommandError(msg) |
| 160 | + return timezone.make_aware(datetime.combine(parsed, datetime.min.time())) |
| 161 | + return timezone.now() - timedelta(days=options["days"]) |
| 162 | + |
| 163 | + def get_until(self, options: dict[str, Any]) -> datetime: |
| 164 | + if options["until"]: |
| 165 | + parsed = parse_date(options["until"]) |
| 166 | + if parsed is None: |
| 167 | + msg = "--until must use YYYY-MM-DD format." |
| 168 | + raise CommandError(msg) |
| 169 | + return timezone.make_aware(datetime.combine(parsed, datetime.max.time())) |
| 170 | + return timezone.now() |
| 171 | + |
| 172 | + def write_metric(self, label: str, values: Iterable[int]) -> None: |
| 173 | + ordered = sorted(values) |
| 174 | + self.stdout.write(f"{label}:") |
| 175 | + self.stdout.write(f" median: {median(ordered):.0f}") |
| 176 | + self.stdout.write(f" average: {mean(ordered):.0f}") |
| 177 | + self.stdout.write(f" p75: {self.percentile(ordered, 75):.0f}") |
| 178 | + self.stdout.write(f" p90: {self.percentile(ordered, 90):.0f}") |
| 179 | + |
| 180 | + def percentile(self, values: list[int], percent: int) -> float: |
| 181 | + if len(values) == 1: |
| 182 | + return values[0] |
| 183 | + position = (len(values) - 1) * percent / 100 |
| 184 | + lower = int(position) |
| 185 | + upper = min(lower + 1, len(values) - 1) |
| 186 | + weight = position - lower |
| 187 | + return values[lower] * (1 - weight) + values[upper] * weight |
0 commit comments