Skip to content

Commit dd150cb

Browse files
authored
Merge pull request #84 from universal-tool-calling-protocol/dev
Dev
2 parents 6bf6d66 + f873ed6 commit dd150cb

7 files changed

Lines changed: 337 additions & 28 deletions

File tree

plugins/communication_protocols/http/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "utcp-http"
7-
version = "1.1.1"
7+
version = "1.1.3"
88
authors = [
99
{ name = "UTCP Contributors" },
1010
]
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
"""URL validation shared by every HTTP-based communication protocol.
2+
3+
Centralised so all three HTTP protocols (http, streamable_http, sse) enforce
4+
the same trust boundary at every network edge — manual discovery AND tool
5+
invocation. Issue #83 (CVE-class SSRF) was caused by the runtime invocation
6+
path forgetting the discovery-time check, so this module also provides an
7+
explicit ``ensure_secure_url`` to call before every aiohttp request.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
from ipaddress import ip_address
13+
from typing import Optional
14+
from urllib.parse import urlparse
15+
16+
# Hostnames considered safe to talk to over plain HTTP.
17+
_LOOPBACK_HOSTNAMES = frozenset({"localhost", "127.0.0.1", "::1", "[::1]"})
18+
19+
20+
def is_secure_url(url: str) -> bool:
21+
"""Return True if ``url`` is safe to fetch from a UTCP HTTP protocol.
22+
23+
Allowed:
24+
- Any ``https://`` URL.
25+
- ``http://`` URLs whose host is exactly ``localhost``, ``127.0.0.1``,
26+
or ``::1``.
27+
28+
Disallowed:
29+
- Plain ``http://`` to any other host (MITM exposure).
30+
- URLs whose hostname *starts* with ``localhost`` / ``127.0.0.1`` but
31+
isn't actually loopback (e.g. ``http://localhost.evil.com``,
32+
``http://127.0.0.1.attacker.example``). The earlier ``startswith``
33+
check let these through.
34+
- Anything without a scheme/host (file://, gopher://, javascript:, ...).
35+
"""
36+
if not isinstance(url, str) or not url:
37+
return False
38+
39+
try:
40+
parsed = urlparse(url)
41+
except ValueError:
42+
return False
43+
44+
scheme = (parsed.scheme or "").lower()
45+
if scheme not in {"http", "https"}:
46+
return False
47+
48+
host = (parsed.hostname or "").lower()
49+
if not host:
50+
return False
51+
52+
if scheme == "https":
53+
return True
54+
55+
# http:// is only allowed for loopback.
56+
if host in _LOOPBACK_HOSTNAMES:
57+
return True
58+
59+
# Catch any other literal loopback IP that urlparse normalised
60+
# (e.g. ``http://127.000.000.001``).
61+
try:
62+
return ip_address(host).is_loopback
63+
except ValueError:
64+
return False
65+
66+
67+
def is_loopback_url(url: str) -> bool:
68+
"""Return True if ``url``'s host is a literal loopback address.
69+
70+
Used by the OpenAPI converter to detect the SSRF case where a remote spec
71+
declares ``servers: [{ url: "http://127.0.0.1:..." }]`` to redirect tool
72+
invocation at the host running the agent. Hostname-based — not a string
73+
prefix — so ``http://localhost.evil.com`` returns False.
74+
"""
75+
if not isinstance(url, str) or not url:
76+
return False
77+
78+
try:
79+
parsed = urlparse(url)
80+
except ValueError:
81+
return False
82+
83+
host = (parsed.hostname or "").lower()
84+
if not host:
85+
return False
86+
87+
if host in _LOOPBACK_HOSTNAMES:
88+
return True
89+
90+
try:
91+
return ip_address(host).is_loopback
92+
except ValueError:
93+
return False
94+
95+
96+
def ensure_secure_url(url: str, *, context: Optional[str] = None) -> None:
97+
"""Raise ``ValueError`` if ``url`` is not safe to fetch.
98+
99+
``context`` is a short label (``"manual discovery"``, ``"tool invocation"``,
100+
etc.) included in the error so log readers can tell which trust boundary
101+
was breached.
102+
"""
103+
if is_secure_url(url):
104+
return
105+
106+
where = f" during {context}" if context else ""
107+
raise ValueError(
108+
f"Security error{where}: URL must use HTTPS or be a literal loopback "
109+
f"address (localhost / 127.0.0.1 / ::1). Got: {url!r}. "
110+
"Plain HTTP to any other host is rejected to prevent MITM attacks "
111+
"and SSRF into internal services."
112+
)

plugins/communication_protocols/http/src/utcp_http/http_communication_protocol.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from utcp_http.http_call_template import HttpCallTemplate
3434
from aiohttp import ClientSession, BasicAuth as AiohttpBasicAuth
3535
from utcp_http.openapi_converter import OpenApiConverter
36+
from utcp_http._security import ensure_secure_url
3637
import logging
3738

3839
logging.basicConfig(
@@ -123,14 +124,10 @@ async def register_manual(self, caller, manual_call_template: CallTemplate) -> R
123124

124125
try:
125126
url = manual_call_template.url
126-
127-
# Security check: Enforce HTTPS or localhost to prevent MITM attacks
128-
if not (url.startswith("https://") or url.startswith("http://localhost") or url.startswith("http://127.0.0.1")):
129-
raise ValueError(
130-
f"Security error: URL must use HTTPS or start with 'http://localhost' or 'http://127.0.0.1'. Got: {url}. "
131-
"Non-secure URLs are vulnerable to man-in-the-middle attacks."
132-
)
133-
127+
128+
# Security check: only HTTPS or loopback HTTP allowed for manual discovery.
129+
ensure_secure_url(url, context="manual discovery")
130+
134131
logger.info(f"Discovering tools from '{manual_call_template.name}' (HTTP) at {url}")
135132

136133
# Use the call template's configuration (headers, auth, HTTP method, etc.)
@@ -274,7 +271,15 @@ async def call_tool(self, caller, tool_name: str, tool_args: Dict[str, Any], too
274271

275272
# Build the URL with path parameters substituted
276273
url = self._build_url_with_path_params(tool_call_template.url, remaining_args)
277-
274+
275+
# Security check: re-validate the resolved URL before each invocation.
276+
# An attacker-controlled OpenAPI spec discovered over a legitimate HTTPS
277+
# URL can declare ``servers[0].url`` pointing at internal services
278+
# (e.g. http://169.254.169.254 for cloud metadata, http://127.0.0.1:9200
279+
# for an unauthenticated Elasticsearch). Without this re-check, tool
280+
# invocation is a blind SSRF primitive — see GHSA / issue #83.
281+
ensure_secure_url(url, context="tool invocation")
282+
278283
# The rest of the arguments are query parameters
279284
query_params = remaining_args
280285

plugins/communication_protocols/http/src/utcp_http/openapi_converter.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from utcp.data.utcp_manual import UtcpManual
2828
from utcp.data.tool import Tool, JsonSchema
2929
from utcp_http.http_call_template import HttpCallTemplate
30+
from utcp_http._security import is_loopback_url
3031

3132
class OpenApiConverter:
3233
"""REQUIRED
@@ -148,9 +149,32 @@ def convert(self) -> UtcpManual:
148149

149150
# Determine base URL: override > servers > spec_url > fallback
150151
if self._base_url_override:
152+
# Explicit override from UTCP config — caller has accepted the
153+
# trust decision, no further validation here.
151154
base_url = self._base_url_override
152155
elif self.spec.get("servers"):
153156
base_url = self.spec["servers"][0].get("url", "/")
157+
158+
# Rule: a spec fetched from a non-loopback source cannot declare
159+
# a loopback server URL. A user pointing the converter at their
160+
# own localhost OpenAPI spec is allowed to declare loopback
161+
# servers, and an explicit ``base_url`` override always wins
162+
# (handled above).
163+
if (
164+
self.spec_url
165+
and not is_loopback_url(self.spec_url)
166+
and is_loopback_url(base_url)
167+
):
168+
raise ValueError(
169+
"Security error: OpenAPI spec fetched from "
170+
f"{self.spec_url!r} declares a loopback server URL "
171+
f"({base_url!r}). A remote spec is not allowed to "
172+
"redirect tool calls at the agent's own loopback "
173+
"interface — this is the SSRF pattern from "
174+
"GHSA-39j6-4867-gg4w. If you trust this spec, set "
175+
"the call template's ``base_url`` override "
176+
"explicitly to bypass this check."
177+
)
154178
elif self.spec_url:
155179
parsed_url = urlparse(self.spec_url)
156180
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"

plugins/communication_protocols/http/src/utcp_http/sse_communication_protocol.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from utcp.data.auth_implementations.oauth2_auth import OAuth2Auth
1818
from utcp_http.sse_call_template import SseCallTemplate
1919
from aiohttp import ClientSession, BasicAuth as AiohttpBasicAuth
20+
from utcp_http._security import ensure_secure_url
2021
import traceback
2122
import logging
2223

@@ -77,14 +78,10 @@ async def register_manual(self, caller, manual_call_template: CallTemplate) -> R
7778

7879
try:
7980
url = manual_call_template.url
80-
81-
# Security check: Enforce HTTPS or localhost to prevent MITM attacks
82-
if not (url.startswith("https://") or url.startswith("http://localhost") or url.startswith("http://127.0.0.1")):
83-
raise ValueError(
84-
f"Security error: URL must use HTTPS or start with 'http://localhost' or 'http://127.0.0.1'. Got: {url}. "
85-
"Non-secure URLs are vulnerable to man-in-the-middle attacks."
86-
)
87-
81+
82+
# Security check: only HTTPS or loopback HTTP allowed for manual discovery.
83+
ensure_secure_url(url, context="manual discovery")
84+
8885
logger.info(f"Discovering tools from '{manual_call_template.name}' (SSE) at {url}")
8986

9087
# Use the provider's configuration (headers, auth, etc.)
@@ -188,7 +185,12 @@ async def call_tool_streaming(self, caller, tool_name: str, tool_args: Dict[str,
188185

189186
# Build the URL with path parameters substituted
190187
url = self._build_url_with_path_params(tool_call_template.url, remaining_args)
191-
188+
189+
# Security check: re-validate the resolved URL before each invocation.
190+
# Defends against SSRF via attacker-controlled OpenAPI specs that point
191+
# ``servers[0].url`` at internal services. See issue #83.
192+
ensure_secure_url(url, context="tool invocation")
193+
192194
# The rest of the arguments are query parameters
193195
query_params = remaining_args
194196

plugins/communication_protocols/http/src/utcp_http/streamable_http_communication_protocol.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from utcp.data.auth_implementations import OAuth2Auth
1616
from utcp_http.streamable_http_call_template import StreamableHttpCallTemplate
1717
from aiohttp import ClientSession, BasicAuth as AiohttpBasicAuth, ClientResponse
18+
from utcp_http._security import ensure_secure_url
1819
import logging
1920

2021
logging.basicConfig(
@@ -78,14 +79,11 @@ async def register_manual(self, caller, manual_call_template: CallTemplate) -> R
7879
raise ValueError("StreamableHttpCommunicationProtocol can only be used with StreamableHttpCallTemplate")
7980

8081
url = manual_call_template.url
81-
82-
# Security check: Enforce HTTPS or localhost to prevent MITM attacks
83-
if not (url.startswith("https://") or url.startswith("http://localhost") or url.startswith("http://127.0.0.1")):
84-
raise ValueError(
85-
f"Security error: URL must use HTTPS or start with 'http://localhost' or 'http://127.0.0.1'. Got: {url}. "
86-
"Non-secure URLs are vulnerable to man-in-the-middle attacks."
87-
)
88-
82+
83+
# Security check: only HTTPS or loopback HTTP allowed for manual discovery.
84+
ensure_secure_url(url, context="manual discovery")
85+
86+
8987
logger.info(f"Discovering tools from '{manual_call_template.name}' (HTTP Stream) at {url}")
9088

9189
try:
@@ -216,7 +214,12 @@ async def call_tool_streaming(self, caller, tool_name: str, tool_args: Dict[str,
216214

217215
# Build the URL with path parameters substituted
218216
url = self._build_url_with_path_params(tool_call_template.url, remaining_args)
219-
217+
218+
# Security check: re-validate the resolved URL before each invocation.
219+
# Defends against SSRF via attacker-controlled OpenAPI specs that point
220+
# ``servers[0].url`` at internal services. See issue #83.
221+
ensure_secure_url(url, context="tool invocation")
222+
220223
# The rest of the arguments are query parameters
221224
query_params = remaining_args
222225

0 commit comments

Comments
 (0)