Skip to content

Commit 6bdf58b

Browse files
committed
Support IPv6 addresses in URLs
URLs didn't distinguish between IPv6 addresses and domain names, so a bracketed IPv6 address was treated as a syntax error. Fix this by parsing them properly. The parser also handles percent-encoded zone IDs and properly percent-decodes them. Full regression tests are included.
1 parent 4168c90 commit 6bdf58b

File tree

2 files changed

+127
-4
lines changed

2 files changed

+127
-4
lines changed

misc.c

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#endif
6060

6161
#include "xmalloc.h"
62+
#include "addr.h"
6263
#include "misc.h"
6364
#include "log.h"
6465
#include "ssh.h"
@@ -1081,6 +1082,88 @@ urldecode(const char *src)
10811082
return ret;
10821083
}
10831084

1085+
static char *
1086+
decode_ipv6(const char *address)
1087+
{
1088+
const char *tmp_percent;
1089+
struct xaddr n;
1090+
char *output, *percent;
1091+
1092+
percent = strchr(address, '%');
1093+
if (percent != NULL) {
1094+
/*
1095+
* Only the fragment specifier is allowed to be %-encoded.
1096+
* Therefore, the first %-encoded octet must itself be %,
1097+
* which is encoded as %25.
1098+
*/
1099+
if (percent[1] != '2' || percent[2] != '5')
1100+
fatal("%s has first %% not part of %%25", address);
1101+
1102+
/*
1103+
* RFC6874 says that non-unreserved characters MUST be percent-encoded.
1104+
* Validate that here. urldecode() will check for bad characters after
1105+
* the '%', as well as for %00.
1106+
*/
1107+
for (tmp_percent = percent; *tmp_percent != '\0'; tmp_percent++) {
1108+
if (!(isalnum((unsigned char)*tmp_percent) ||
1109+
*tmp_percent == '-' ||
1110+
*tmp_percent == '.' ||
1111+
*tmp_percent == '_' ||
1112+
*tmp_percent == '~' ||
1113+
*tmp_percent == '%'))
1114+
fatal("Non-unreserved, non-%% character in encoded IPv6 zone ID %s",
1115+
address);
1116+
}
1117+
}
1118+
1119+
/* URL-decode the fragment portion. */
1120+
output = urldecode(address);
1121+
if (output == NULL)
1122+
fatal("Invalid %%-encoding character in IPv6 address %s",
1123+
address);
1124+
1125+
/*
1126+
* glibc getaddrinfo() validates that zone IDs are only found on
1127+
* link-scope addresses and correspond to links that actually
1128+
* exist. However, a parsing function should not depend on what
1129+
* links are present. Therefore, only pass the part of the
1130+
* string before the zone ID to getaddrinfo().
1131+
*
1132+
* "%25" decodes to "%", so the output buffer and the
1133+
* address have their first "%" at the same offset from
1134+
* the start of the string.
1135+
*/
1136+
if (percent != NULL) {
1137+
/*
1138+
* Update percent to point into the decoded buffer.
1139+
* Use output + (percent - address) because pointer
1140+
* arithmetic between pointers to different buffers
1141+
* is undefined behavior in C.
1142+
*/
1143+
percent = output + (percent - address);
1144+
*percent = '\0';
1145+
}
1146+
1147+
if (addr_pton(output, &n) == -1)
1148+
fatal("%s is not a valid IPv6 address", output);
1149+
1150+
/*
1151+
* The only caller already checks for there being a ':' in the address,
1152+
* and ':' after the first '%' must be %-encoded. Therefore, this must
1153+
* be an IPv6 address, not an IPv4 address. Nevertheless, check for
1154+
* AF_INET6 as defense in depth.
1155+
*/
1156+
if (n.af != AF_INET6)
1157+
fatal("internal error: %s is an IP address, but not an IPv6 address",
1158+
output);
1159+
1160+
/* Restore the zone identifier if present. */
1161+
if (percent != NULL)
1162+
*percent = '%';
1163+
1164+
return output;
1165+
}
1166+
10841167
/*
10851168
* Parse an (scp|ssh|sftp)://[user@]host[:port][/path] URI.
10861169
* See https://tools.ietf.org/html/draft-ietf-secsh-scp-sftp-ssh-uri-04
@@ -1097,7 +1180,7 @@ int
10971180
parse_uri(const char *scheme, const char *uri, char **userp, char **hostp,
10981181
int *portp, char **pathp)
10991182
{
1100-
char *uridup, *cp, *tmp, ch;
1183+
char *uridup, *cp, *tmp_host, *tmp, ch;
11011184
char *user = NULL, *host = NULL, *path = NULL;
11021185
int port = -1, ret = -1;
11031186
size_t len;
@@ -1140,9 +1223,16 @@ parse_uri(const char *scheme, const char *uri, char **userp, char **hostp,
11401223
/* Extract mandatory hostname */
11411224
if ((cp = hpdelim2(&tmp, &ch)) == NULL || *cp == '\0')
11421225
goto out;
1143-
host = xstrdup(cleanhostname(cp));
1144-
if (!valid_domain(host, 0, NULL))
1145-
goto out;
1226+
tmp_host = cleanhostname(cp);
1227+
if (strchr(tmp_host, ':') != NULL) {
1228+
/* Validate the IPv6 address and URL-decode the zone ID. */
1229+
if ((host = decode_ipv6(tmp_host)) == NULL)
1230+
goto out;
1231+
} else {
1232+
if (!valid_domain(tmp_host, 0, NULL))
1233+
goto out;
1234+
host = xstrdup(tmp_host);
1235+
}
11461236

11471237
if (tmp != NULL && *tmp != '\0') {
11481238
if (ch == ':') {

regress/connect-uri.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,36 @@ ${SSH} -F $OBJ/ssh_config "ssh://${USER}@somehost:${PORT}/${DATA}" true \
2727
if [ $? -eq 0 ]; then
2828
fail "ssh connection succeeded, expected failure"
2929
fi
30+
31+
check_v6_pass () {
32+
_tag="$1"
33+
_url="$2"
34+
verbose "$tid: Valid IPv6 URL $_tag"
35+
${SSH} -F $OBJ/ssh_config "$_url" true >/dev/null ||
36+
fail "IPv6 URL connection failed: $_tag: $_url"
37+
}
38+
39+
check_v6_fail () {
40+
_tag="$1"
41+
_url="$2"
42+
verbose "$tid: Invalid IPv6 URL $_tag"
43+
if ${SSH} -F $OBJ/ssh_config "$_url" true >/dev/null 2>&1; then
44+
fail "IPv6 URL connection succeeded: $_tag: $_url"
45+
fi
46+
}
47+
48+
check_v6_pass loopback "ssh://${USER}@[::1]:${PORT}/"
49+
check_v6_pass 'leading double colon' "ssh://${USER}@[::a:a:a:a:a:a]:${PORT}/"
50+
check_v6_pass 'trailing double colon' "ssh://${USER}@[a:a:a:a:a:a::]:${PORT}/"
51+
check_v6_pass 'no double colon' "ssh://${USER}@[a:a:a:a:a:a:a:a]:${PORT}/"
52+
check_v6_pass 'good zone ID' "ssh://${USER}@[fe80::1%25lo5]:${PORT}/"
53+
check_v6_pass 'good encoded zone ID' "ssh://${USER}@[fe80::1%25l%6f]:${PORT}/"
54+
check_v6_fail 'bad zone ID: % not %-encoded' "ssh://${USER}@[::1%2]:${PORT}/"
55+
check_v6_fail 'bad zone ID: non-unreserved character' "ssh://${USER}@[::1%25/]:${PORT}/"
56+
check_v6_fail 'bad zone ID: bad % encoding' "ssh://${USER}@[::1%25a%2]:${PORT}/"
57+
check_v6_fail 'bad zone ID: NUL byte' "ssh://${USER}@[::1%25a%00]:${PORT}/"
58+
check_v6_fail 'too many hex digits' "ssh://${USER}@[::aaaaa]:${PORT}/"
59+
check_v6_fail 'single leading colon' "ssh://${USER}@[:0:0:0:0:0:0:0]:${PORT}/"
60+
check_v6_fail 'single trailing colon' "ssh://${USER}@[0:0:0:0:0:0:0:]:${PORT}/"
61+
check_v6_fail 'too many components' "ssh://${USER}@[0:0:0:0:0:0:0:0:0]:${PORT}/"
62+
check_v6_fail 'leading and trailing double colon' "ssh://${USER}@[::0::]:${PORT}/"

0 commit comments

Comments
 (0)