Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/admin/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,30 @@ This is currently used in the following places:

* :setting:`ALLOWED_ASSET_SIZE`

.. setting:: ALLOWED_MACHINERY_DOMAINS

ALLOWED_MACHINERY_DOMAINS
-------------------------

Configures which custom machinery domains are explicitly allowed in project-level
machine translation configuration.

This setting applies only to machinery services and does not affect
:setting:`ALLOWED_ASSET_DOMAINS`.

It expects a list of host/domain names. You can use fully qualified names or
prepend with a period as a wildcard to match all subdomains.

Defaults to ``[]``.

The allowlist affects project-managed machinery in two ways: it permits the
configured endpoint during outbound validation, and it marks matching hosts as
trusted when deciding whether remote provider error details or response bodies
can be shown to the user. For direct connections, runtime checks still reject
destinations that resolve to private or otherwise non-public addresses. When an
HTTP(S) proxy is used, runtime validation falls back to hostname validation and
does not perform the same local DNS or peer-IP checks.

.. setting:: ALLOWED_ASSET_SIZE

ALLOWED_ASSET_SIZE
Expand Down
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Weblate 5.17
.. rubric:: Bug fixes

* Component file handling now validates repository symlinks.
* Hardened project-level machine translation against SSRF by blocking private-network targets for untrusted endpoints and hiding untrusted remote error details.
* Prevented removing the last team from a project token.
* Batch automatic translation now uses project-level machinery configuration instead of only site-wide settings.
* Fixed sorting by the **Unreviewed** column in listings.
Expand Down
18 changes: 18 additions & 0 deletions weblate/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3501,6 +3501,24 @@ def test_install_machinery(self) -> None:

self.assertEqual(new_config, response.data)

def test_install_machinery_blocks_private_project_target(self) -> None:
self.component.project.add_user(self.user, "Administration")

response = self.do_request(
"api:project-machinery-settings",
self.project_kwargs,
method="post",
code=400,
superuser=False,
request={
"service": "deepl",
"configuration": {"key": "x", "url": "http://127.0.0.1:11434/"},
},
format="json",
)

self.assertIn("URL domain is not allowed.", str(response.data))


class ComponentAPITest(APIBaseTest):
def setUp(self) -> None:
Expand Down
8 changes: 6 additions & 2 deletions weblate/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1515,7 +1515,9 @@ def machinery_settings(self, request: Request, **kwargs):
raise ValidationError({"service": "Missing service name"}) from error

service, configuration, errors = validate_service_configuration(
service_name, request.data.get("configuration", "{}")
service_name,
request.data.get("configuration", "{}"),
allow_private_targets=False,
)

if service is None or errors:
Expand Down Expand Up @@ -1554,7 +1556,9 @@ def machinery_settings(self, request: Request, **kwargs):
valid_configurations: dict[str, dict] = {}
for service_name, configuration in request.data.items():
service, configuration, errors = validate_service_configuration(
service_name, configuration
service_name,
configuration,
allow_private_targets=False,
)

if service is None or errors:
Expand Down
2 changes: 2 additions & 0 deletions weblate/machinery/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from __future__ import annotations

from typing import ClassVar
from urllib.parse import urljoin

from .base import MachineryRateLimitError
Expand All @@ -20,6 +21,7 @@ class AnthropicTranslation(BaseLLMTranslation):
"""

name = "Anthropic"
trusted_error_hosts: ClassVar[set[str]] = {"api.anthropic.com"}
end_point = "/v1/messages"
settings_form = AnthropicMachineryForm
version_added = "5.16"
Expand Down
150 changes: 124 additions & 26 deletions weblate/machinery/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
from html import escape, unescape
from itertools import chain
from typing import TYPE_CHECKING, ClassVar
from urllib.parse import quote
from urllib.parse import quote, urlparse

from django.conf import settings
from django.core.cache import cache
from django.core.exceptions import ValidationError
from django.utils.functional import cached_property
Expand All @@ -28,8 +29,10 @@
from weblate.machinery.forms import BaseMachineryForm
from weblate.utils.docs import DocVersionsMixin
from weblate.utils.errors import report_error
from weblate.utils.forms import WeblateServiceURLField
from weblate.utils.hash import calculate_dict_hash, calculate_hash, hash_to_checksum
from weblate.utils.requests import http_request
from weblate.utils.outbound import is_allowlisted_hostname
from weblate.utils.requests import http_request, validate_request_url
from weblate.utils.similarity import Comparer
from weblate.utils.site import get_site_url

Expand Down Expand Up @@ -108,20 +111,21 @@

validate_source_language = "en"
validate_target_language = "de"
trusted_error_hosts: ClassVar[set[str]] = set()

@classmethod
def get_rank(cls):
return cls.max_score + cls.rank_boost

def __init__(self, settings: SettingsDict) -> None:
def __init__(self, configuration: SettingsDict) -> None:
"""Create new machine translation object."""
self.mtid = self.get_identifier()
self.rate_limit_cache = f"{self.mtid}-rate-limit"
self.languages_cache = f"{self.mtid}-languages"
self.comparer = Comparer()
self.supported_languages_error: Exception | None = None
self.supported_languages_error_age: float = 0
self.settings = settings
self.settings = configuration

def delete_cache(self) -> None:
cache.delete_many([self.rate_limit_cache, self.languages_cache])
Expand Down Expand Up @@ -187,29 +191,117 @@
try:
response.raise_for_status()
except HTTPError as error:
detail = response.text
try:
payload = response.json()
except JSONDecodeError:
pass
else:
if isinstance(payload, dict) and payload:
if detail_error := payload.get("error"):
if isinstance(detail_error, str):
detail = detail_error
elif isinstance(detail_error, dict):
if "message" in detail_error:
detail = detail_error["message"]
else:
detail = str(detail_error)
else:
detail = str(payload)

if detail:
if detail := self.get_error_detail(response):
message = f"{error.args[0]}: {detail[:200]}"
raise HTTPError(message, response=response) from error
raise
Comment thread
nijel marked this conversation as resolved.

@property
def allow_private_targets(self) -> bool:
return "_project" not in self.settings

def validate_runtime_url(self, url: str) -> None:
validate_request_url(
url,
allow_private_targets=self.allow_private_targets,
allowed_domains=settings.ALLOWED_MACHINERY_DOMAINS,
)

@staticmethod
def get_host_from_setting(value: object) -> str | None:
if not isinstance(value, str):
return None
if "://" in value:
return urlparse(value).hostname
return value or None

def get_trusted_error_hosts(self) -> set[str]:
hosts = set(settings.ALLOWED_MACHINERY_DOMAINS)
hosts.update(self.trusted_error_hosts)
if self.allow_private_targets or self.settings_form is None:
return hosts

form = self.settings_form(self.__class__)
for field_name, field in form.fields.items():
is_endpoint_field = isinstance(field, WeblateServiceURLField) or (
field_name in form.network_host_fields
)
if not is_endpoint_field:
continue

values: set[str] = set()
if initial := getattr(field, "initial", None):
values.add(initial)
values.update(value for value, _label in getattr(field, "choices", ()))

current_value = self.settings.get(field_name)
if current_value in values and (
host := self.get_host_from_setting(current_value)
):
hosts.add(host)

Comment thread
nijel marked this conversation as resolved.
for value in values:
if host := self.get_host_from_setting(value):
hosts.add(host)
return hosts

@classmethod
def has_configurable_outbound_target(cls) -> bool:
if cls.settings_form is None:
return False

form = cls.settings_form(cls)
for field_name, field in form.fields.items():
if isinstance(field, WeblateServiceURLField):
return True
if field_name in form.network_host_fields:
return True
return False

def can_display_error_detail(self, response: Response) -> bool:
if self.allow_private_targets:
return True
return self.is_trusted_error_host(response)

def is_trusted_error_host(self, response: Response) -> bool:
if (
self.settings_form is not None
and not self.has_configurable_outbound_target()
):
return True
hostname = urlparse(response.url).hostname or ""
return is_allowlisted_hostname(hostname, list(self.get_trusted_error_hosts()))

def get_error_detail(self, response: Response) -> str | None:
if not self.can_display_error_detail(response):
return None
trusted_host = self.is_trusted_error_host(response)

try:
payload = response.json()
except JSONDecodeError:
if trusted_host:
return response.text or None
return None

Comment thread
nijel marked this conversation as resolved.
if isinstance(payload, dict):
if (message := payload.get("message")) and isinstance(message, str):
return message
if (detail := payload.get("detail")) and isinstance(detail, str):
return detail
if error := payload.get("error"):
if isinstance(error, str):
return error
if isinstance(error, dict):
detail_message = error.get("message")
if isinstance(detail_message, str):
return detail_message
elif isinstance(payload, str) and trusted_host:
return payload
if trusted_host:
return response.text or None
return None

Comment thread
nijel marked this conversation as resolved.
def request(self, method, url, skip_auth=False, **kwargs):
"""Perform JSON request."""
# Create custom headers
Expand All @@ -231,6 +323,9 @@
timeout=self.request_timeout,
auth=self.get_auth(),
raise_for_status=False,
validate_url=not self.allow_private_targets,
allow_private_targets=self.allow_private_targets,
allowed_domains=settings.ALLOWED_MACHINERY_DOMAINS,
**kwargs,
)

Expand Down Expand Up @@ -638,9 +733,12 @@
return output

def get_error_message(self, exc: Exception) -> str:
if isinstance(exc, RequestException) and exc.response and exc.response.text:
return f"{exc.__class__.__name__}: {exc}: {exc.response.text}"
return f"{exc.__class__.__name__}: {exc}"
message = f"{exc.__class__.__name__}: {exc}"
if isinstance(exc, RequestException) and exc.response:
detail = self.get_error_detail(exc.response)
if detail and detail not in str(exc):
return f"{message}: {detail}"
Comment on lines 735 to +740

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Route all error formatting through trust-filtered detail extraction

The new sanitization here only helps machinery classes that use the base implementation, but project-scoped backends can bypass it by overriding get_error_message; fresh evidence is DeepLTranslation.get_error_message (weblate/machinery/deepl.py, lines 95-110), which still returns exc.response.json()["message"] directly. In a project using a custom DeepL URL, an untrusted endpoint can therefore surface arbitrary response text to users despite this commit’s untrusted-error redaction logic. Please enforce trust-aware detail extraction at a non-overridable layer (or update overrides to call get_error_detail).

Useful? React with 👍 / 👎.

return message

def signed_salt(self, appid, secret, text):
"""Generate salt and sign as used by Chinese services."""
Expand Down Expand Up @@ -697,7 +795,7 @@
continue
translation_lists = [translations[text] for text in unit.plural_map]
plural_count = len(translation_lists)
translation = result.setdefault("translation", [""] * plural_count)

Check failure on line 798 in weblate/machinery/base.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "list[str]", variable has type "Translation")
quality = result.setdefault("quality", [0] * plural_count)
origin = result.setdefault("origin", [None] * plural_count)
for plural, possible_translations in enumerate(translation_lists):
Expand All @@ -705,7 +803,7 @@
if quality[plural] > item["quality"]:
continue
quality[plural] = item["quality"]
translation[plural] = item["text"]

Check failure on line 806 in weblate/machinery/base.py

View workflow job for this annotation

GitHub Actions / mypy

Unsupported target for indexed assignment ("Translation")
origin[plural] = self

@cached_property
Expand Down
4 changes: 4 additions & 0 deletions weblate/machinery/deepl.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ class DeepLTranslation(
target_language_map: ClassVar[dict[str, str]] = {
"PT": "PT-PT",
}
trusted_error_hosts: ClassVar[set[str]] = {
"api.deepl.com",
"api-free.deepl.com",
}
highlight_syntax = True
settings_form = DeepLMachineryForm
glossary_count_limit = 1000
Expand Down
24 changes: 23 additions & 1 deletion weblate/machinery/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
from django.utils.translation import gettext, gettext_lazy, pgettext_lazy

from weblate.utils.forms import WeblateServiceURLField
from weblate.utils.validators import validate_machinery_hostname, validate_machinery_url

from .types import SourceLanguageChoices


class BaseMachineryForm(forms.Form):
network_host_fields = frozenset({"base_url", "endpoint_url"})

source_language = forms.ChoiceField(
label=pgettext_lazy(
"Automatic suggestion service configuration", "Source language selection"
Expand All @@ -26,8 +29,11 @@
required=False,
)

def __init__(self, machinery, *args, **kwargs) -> None:
def __init__(
self, machinery, *args, allow_private_targets: bool = True, **kwargs
) -> None:
self.machinery = machinery
self.allow_private_targets = allow_private_targets
super().__init__(*args, **kwargs)

def serialize_form(self):
Expand All @@ -40,9 +46,25 @@
continue
if field not in settings:
return
self.validate_endpoint_fields(settings)
if not self.allow_private_targets:
settings = {**settings, "_project": True}
machinery = self.machinery(settings)
machinery.validate_settings()

def validate_endpoint_fields(self, settings) -> None:
for field_name, field in self.fields.items():
if (value := settings.get(field_name)) in {"", None}:
continue
if isinstance(field, WeblateServiceURLField):
validate_machinery_url(
value, allow_private_targets=self.allow_private_targets
)
elif field_name in self.network_host_fields and isinstance(value, str):
validate_machinery_hostname(
value, allow_private_targets=self.allow_private_targets
)


class KeyMachineryForm(BaseMachineryForm):
key = forms.CharField(
Expand Down Expand Up @@ -423,7 +445,7 @@
),
required=False,
)
model = forms.ChoiceField(

Check failure on line 448 in weblate/machinery/forms.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "ChoiceField", base class "LLMBasicMachineryForm" defined the type as "CharField")
label=pgettext_lazy(
"Automatic suggestion service configuration",
"OpenAI model",
Expand Down Expand Up @@ -523,7 +545,7 @@
initial="https://api.anthropic.com",
required=False,
)
model = forms.ChoiceField(

Check failure on line 548 in weblate/machinery/forms.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "ChoiceField", base class "LLMBasicMachineryForm" defined the type as "CharField")
label=pgettext_lazy(
"Automatic suggestion service configuration",
"Anthropic model",
Expand Down
3 changes: 2 additions & 1 deletion weblate/machinery/libretranslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, ClassVar

from .base import BatchMachineTranslation
from .forms import LibreTranslateMachineryForm
Expand All @@ -22,6 +22,7 @@ class LibreTranslateTranslation(BatchMachineTranslation):

name = "LibreTranslate"
max_score = 89
trusted_error_hosts: ClassVar[set[str]] = {"libretranslate.com"}
version_added = "4.7.1"
settings_form = LibreTranslateMachineryForm
request_timeout = 20
Expand Down
Loading
Loading