Skip to content

Commit fc3268e

Browse files
committed
Security updates
1 parent 244df66 commit fc3268e

16 files changed

Lines changed: 1482 additions & 69 deletions

plugins/communication_protocols/gql/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "utcp-gql"
7-
version = "1.1.0"
7+
version = "1.1.1"
88
authors = [
99
{ name = "UTCP Contributors" },
1010
]
@@ -14,6 +14,7 @@ requires-python = ">=3.10"
1414
dependencies = [
1515
"pydantic>=2.0",
1616
"gql>=3.0",
17+
"aiohttp>=3.8",
1718
"utcp>=1.1"
1819
]
1920
classifiers = [
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
"""URL validation for the GraphQL communication protocol.
2+
3+
Mirror of ``utcp_http._security`` -- intentionally duplicated rather
4+
than cross-plugin-imported so ``utcp-gql`` does not gain a runtime
5+
dependency on ``utcp-http``. Keep the two files in sync when changing
6+
the validator behavior. Backs GHSA-ppx3-28rw-8fpf (the original CVE
7+
fix did not reach this plugin) and GHSA-9qhg-99ww-9mqc (redirect
8+
SSRF on the GraphQL endpoint).
9+
"""
10+
11+
from __future__ import annotations
12+
13+
from contextlib import asynccontextmanager
14+
from ipaddress import ip_address
15+
from typing import Any, AsyncIterator, Optional
16+
from urllib.parse import urljoin, urlparse
17+
18+
# Hostnames considered safe to talk to over plain HTTP.
19+
_LOOPBACK_HOSTNAMES = frozenset({"localhost", "127.0.0.1", "::1", "[::1]"})
20+
21+
22+
def is_secure_url(url: str) -> bool:
23+
"""Return True if ``url`` is safe to fetch from a UTCP HTTP protocol.
24+
25+
Allowed:
26+
- Any ``https://`` URL.
27+
- ``http://`` URLs whose host is exactly ``localhost``, ``127.0.0.1``,
28+
or ``::1``.
29+
30+
Disallowed:
31+
- Plain ``http://`` to any other host (MITM exposure).
32+
- URLs whose hostname *starts* with ``localhost`` / ``127.0.0.1`` but
33+
isn't actually loopback (e.g. ``http://localhost.evil.com``,
34+
``http://127.0.0.1.attacker.example``). The earlier ``startswith``
35+
check let these through.
36+
- Anything without a scheme/host (file://, gopher://, javascript:, ...).
37+
"""
38+
if not isinstance(url, str) or not url:
39+
return False
40+
41+
try:
42+
parsed = urlparse(url)
43+
except ValueError:
44+
return False
45+
46+
scheme = (parsed.scheme or "").lower()
47+
if scheme not in {"http", "https"}:
48+
return False
49+
50+
host = (parsed.hostname or "").lower()
51+
if not host:
52+
return False
53+
54+
if scheme == "https":
55+
return True
56+
57+
# http:// is only allowed for loopback.
58+
if host in _LOOPBACK_HOSTNAMES:
59+
return True
60+
61+
# Catch any other literal loopback IP that urlparse normalised
62+
# (e.g. ``http://127.000.000.001``).
63+
try:
64+
return ip_address(host).is_loopback
65+
except ValueError:
66+
return False
67+
68+
69+
def is_loopback_url(url: str) -> bool:
70+
"""Return True if ``url``'s host is a literal loopback address.
71+
72+
Used by the OpenAPI converter to detect the SSRF case where a remote spec
73+
declares ``servers: [{ url: "http://127.0.0.1:..." }]`` to redirect tool
74+
invocation at the host running the agent. Hostname-based — not a string
75+
prefix — so ``http://localhost.evil.com`` returns False.
76+
"""
77+
if not isinstance(url, str) or not url:
78+
return False
79+
80+
try:
81+
parsed = urlparse(url)
82+
except ValueError:
83+
return False
84+
85+
host = (parsed.hostname or "").lower()
86+
if not host:
87+
return False
88+
89+
if host in _LOOPBACK_HOSTNAMES:
90+
return True
91+
92+
try:
93+
return ip_address(host).is_loopback
94+
except ValueError:
95+
return False
96+
97+
98+
def ensure_secure_url(url: str, *, context: Optional[str] = None) -> None:
99+
"""Raise ``ValueError`` if ``url`` is not safe to fetch.
100+
101+
``context`` is a short label (``"manual discovery"``, ``"tool invocation"``,
102+
etc.) included in the error so log readers can tell which trust boundary
103+
was breached.
104+
"""
105+
if is_secure_url(url):
106+
return
107+
108+
where = f" during {context}" if context else ""
109+
raise ValueError(
110+
f"Security error{where}: URL must use HTTPS or be a literal loopback "
111+
f"address (localhost / 127.0.0.1 / ::1). Got: {url!r}. "
112+
"Plain HTTP to any other host is rejected to prevent MITM attacks "
113+
"and SSRF into internal services."
114+
)
115+
116+
117+
# HTTP statuses where the server expects the client to re-issue the request
118+
# against the URL given in the ``Location`` header. 303 forces a GET; the
119+
# rest preserve the original method.
120+
_REDIRECT_STATUSES = frozenset({301, 302, 303, 307, 308})
121+
122+
123+
@asynccontextmanager
124+
async def safe_request_with_redirects(
125+
session: Any,
126+
method: str,
127+
url: str,
128+
*,
129+
context: str,
130+
max_redirects: int = 5,
131+
**kwargs: Any,
132+
) -> AsyncIterator[Any]:
133+
"""Issue an aiohttp request that re-validates every redirect hop.
134+
135+
Closes the residual SSRF window left by ``ensure_secure_url`` (which
136+
only inspects the initial URL): aiohttp by default follows 3xx
137+
redirects without rechecking, so an attacker-controlled server could
138+
302 the client into ``http://169.254.169.254/...`` (cloud metadata)
139+
or any internal HTTP service and the response body would be handed
140+
back to the caller. Backs GHSA-9qhg-99ww-9mqc.
141+
142+
Behavior:
143+
* Calls ``ensure_secure_url(url, context=context)`` on the initial
144+
URL.
145+
* Disables aiohttp's auto-follow (``allow_redirects=False``).
146+
* On a 3xx response with a ``Location`` header, resolves the
147+
target against the current URL and runs ``ensure_secure_url``
148+
on it before issuing the next hop. Rejection raises and the
149+
redirect chain is aborted with the connection released.
150+
* Caps the chain at ``max_redirects`` hops. Exceeding that raises
151+
``RuntimeError``.
152+
* Mirrors RFC 7231 method semantics: 303 forces ``GET`` and drops
153+
any request body; 301/302/307/308 preserve method and body.
154+
155+
Usage:
156+
```python
157+
async with safe_request_with_redirects(
158+
session, "GET", url, context="tool invocation", params=...
159+
) as response:
160+
response.raise_for_status()
161+
...
162+
```
163+
"""
164+
ensure_secure_url(url, context=context)
165+
# We control redirect behavior ourselves; refuse to let callers override.
166+
kwargs.pop("allow_redirects", None)
167+
168+
current_url = url
169+
current_method = method
170+
hops = 0
171+
final_response = None
172+
173+
try:
174+
while True:
175+
response = await session.request(
176+
current_method,
177+
current_url,
178+
allow_redirects=False,
179+
**kwargs,
180+
)
181+
if response.status not in _REDIRECT_STATUSES:
182+
final_response = response
183+
break
184+
185+
location = response.headers.get("Location")
186+
if not location:
187+
# 3xx with no Location header — nothing to follow. Let
188+
# the caller handle the unusual response.
189+
final_response = response
190+
break
191+
192+
if hops >= max_redirects:
193+
response.release()
194+
raise RuntimeError(
195+
f"Too many redirects (>{max_redirects}) during {context} "
196+
f"starting from {url!r}."
197+
)
198+
199+
next_url = urljoin(current_url, location)
200+
try:
201+
ensure_secure_url(
202+
next_url, context=f"{context} (redirect target)"
203+
)
204+
except Exception:
205+
response.release()
206+
raise
207+
208+
response.release()
209+
if response.status == 303:
210+
current_method = "GET"
211+
kwargs.pop("json", None)
212+
kwargs.pop("data", None)
213+
current_url = next_url
214+
hops += 1
215+
216+
yield final_response
217+
finally:
218+
if final_response is not None:
219+
final_response.release()

plugins/communication_protocols/gql/src/utcp_gql/gql_communication_protocol.py

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from utcp.data.auth_implementations.oauth2_auth import OAuth2Auth
1616

1717
from utcp_gql.gql_call_template import GraphQLCallTemplate
18+
from utcp_gql._security import ensure_secure_url, safe_request_with_redirects
1819

1920
if TYPE_CHECKING:
2021
from utcp.utcp_client import UtcpClient
@@ -40,30 +41,35 @@ class GraphQLCommunicationProtocol(CommunicationProtocol):
4041
def __init__(self) -> None:
4142
self._oauth_tokens: Dict[str, Dict[str, Any]] = {}
4243

43-
def _enforce_https_or_localhost(self, url: str) -> None:
44-
if not (
45-
url.startswith("https://")
46-
or url.startswith("http://localhost")
47-
or url.startswith("http://127.0.0.1")
48-
):
49-
raise ValueError(
50-
"Security error: URL must use HTTPS or start with 'http://localhost' or 'http://127.0.0.1'. "
51-
"Non-secure URLs are vulnerable to man-in-the-middle attacks. "
52-
f"Got: {url}."
53-
)
54-
5544
async def _handle_oauth2(self, auth: OAuth2Auth) -> str:
45+
"""Fetch an OAuth2 access token.
46+
47+
Validates the token URL with the hostname-based ``ensure_secure_url``
48+
helper before any credential bytes leave the process, and follows
49+
redirects only after re-validating each hop -- defends against the
50+
sibling SSRF / credential-exfiltration patterns in
51+
GHSA-8cp3-qxj6-px34 and GHSA-9qhg-99ww-9mqc.
52+
"""
5653
client_id = auth.client_id
5754
if client_id in self._oauth_tokens:
5855
return self._oauth_tokens[client_id]["access_token"]
56+
57+
ensure_secure_url(auth.token_url, context="OAuth2 token URL")
58+
5959
async with aiohttp.ClientSession() as session:
6060
data = {
6161
"grant_type": "client_credentials",
6262
"client_id": client_id,
6363
"client_secret": auth.client_secret,
6464
"scope": auth.scope,
6565
}
66-
async with session.post(auth.token_url, data=data) as resp:
66+
async with safe_request_with_redirects(
67+
session,
68+
"POST",
69+
auth.token_url,
70+
context="OAuth2 token fetch",
71+
data=data,
72+
) as resp:
6773
resp.raise_for_status()
6874
token_response = await resp.json()
6975
self._oauth_tokens[client_id] = token_response
@@ -94,17 +100,45 @@ async def _prepare_headers(
94100

95101
return headers
96102

103+
@staticmethod
104+
def _disable_transport_redirects(transport: AIOHTTPTransport) -> None:
105+
"""Patch the underlying aiohttp session used by AIOHTTPTransport
106+
so its ``session.post`` refuses to follow 3xx responses.
107+
108+
gql's AIOHTTPTransport does not expose ``allow_redirects`` and
109+
the default ClientSession setting would let an attacker-
110+
controlled GraphQL endpoint 302 the client into an internal
111+
service after the URL had already passed ``ensure_secure_url``.
112+
See GHSA-9qhg-99ww-9mqc / GHSA-ppx3-28rw-8fpf.
113+
"""
114+
aio_session = getattr(transport, "session", None)
115+
if aio_session is None:
116+
return
117+
original_post = aio_session.post
118+
119+
def _no_redirect_post(*args: Any, **kwargs: Any):
120+
kwargs["allow_redirects"] = False
121+
return original_post(*args, **kwargs)
122+
123+
aio_session.post = _no_redirect_post # type: ignore[method-assign]
124+
97125
async def register_manual(
98126
self, caller: "UtcpClient", manual_call_template: CallTemplate
99127
) -> RegisterManualResult:
100128
if not isinstance(manual_call_template, GraphQLCallTemplate):
101129
raise ValueError("GraphQLCommunicationProtocol requires a GraphQLCallTemplate call template")
102-
self._enforce_https_or_localhost(manual_call_template.url)
130+
# Hostname-based validation -- replaces the broken ``startswith``
131+
# prefix check that let ``http://127.0.0.1.attacker.example``
132+
# through (GHSA-ppx3-28rw-8fpf).
133+
ensure_secure_url(
134+
manual_call_template.url, context="GraphQL manual discovery"
135+
)
103136

104137
try:
105138
headers = await self._prepare_headers(manual_call_template)
106139
transport = AIOHTTPTransport(url=manual_call_template.url, headers=headers)
107140
async with GqlClient(transport=transport, fetch_schema_from_transport=True) as session:
141+
self._disable_transport_redirects(transport)
108142
schema = session.client.schema
109143
tools: List[Tool] = []
110144

@@ -178,11 +212,14 @@ async def call_tool(
178212
) -> Any:
179213
if not isinstance(tool_call_template, GraphQLCallTemplate):
180214
raise ValueError("GraphQLCommunicationProtocol requires a GraphQLCallTemplate call template")
181-
self._enforce_https_or_localhost(tool_call_template.url)
215+
ensure_secure_url(
216+
tool_call_template.url, context="GraphQL tool invocation"
217+
)
182218

183219
headers = await self._prepare_headers(tool_call_template, tool_args)
184220
transport = AIOHTTPTransport(url=tool_call_template.url, headers=headers)
185221
async with GqlClient(transport=transport, fetch_schema_from_transport=True) as session:
222+
self._disable_transport_redirects(transport)
186223
# Filter out header fields from GraphQL variables; these are sent via HTTP headers
187224
header_fields = tool_call_template.header_fields or []
188225
filtered_args = {k: v for k, v in tool_args.items() if k not in header_fields}

0 commit comments

Comments
 (0)