Skip to content

Commit 3dffcf1

Browse files
committed
Support IPv6 addresses in URLs
URLs didn't distinguish between IPv6 addresses and domain names, so a bracketed IPv6 address was treated as a syntax error. Fix this by parsing them properly. The parser also handles percent-encoded zone IDs and properly percent-decodes them. Full regression tests are included.
1 parent 832a770 commit 3dffcf1

File tree

2 files changed

+121
-4
lines changed

2 files changed

+121
-4
lines changed

misc.c

Lines changed: 88 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#endif
6161

6262
#include "xmalloc.h"
63+
#include "addr.h"
6364
#include "misc.h"
6465
#include "log.h"
6566
#include "ssh.h"
@@ -1082,6 +1083,82 @@ urldecode(const char *src)
10821083
return ret;
10831084
}
10841085

1086+
static char *
1087+
decode_ipv6(const char *address)
1088+
{
1089+
const char *percent = strchr(address, '%');
1090+
const char *tmp_percent;
1091+
struct xaddr n;
1092+
char *output;
1093+
1094+
if (percent != NULL) {
1095+
/*
1096+
* Only the fragment specifier is allowed to be %-encoded.
1097+
* Therefore, the first %-encoded octet must itself be %,
1098+
* which is encoded as %25.
1099+
*/
1100+
if (percent[1] != '2' || percent[2] != '5')
1101+
fatal("%s has first %% not part of %%25", address);
1102+
1103+
/*
1104+
* RFC6874 says that non-unreserved characters MUST be percent-encoded.
1105+
* Validate that here. urldecode() will check for bad characters after
1106+
* the '%', as well as for %00.
1107+
*/
1108+
for (tmp_percent = percent; *tmp_percent != '\0'; tmp_percent++) {
1109+
if (!(isalnum((unsigned char)*tmp_percent) ||
1110+
*tmp_percent == '-' ||
1111+
*tmp_percent == '.' ||
1112+
*tmp_percent == '_' ||
1113+
*tmp_percent == '~' ||
1114+
*tmp_percent == '%'))
1115+
fatal("Non-unreserved, non-%% character in encoded IPv6 zone ID %s",
1116+
address);
1117+
}
1118+
}
1119+
1120+
/* URL-decode the fragment portion. */
1121+
output = urldecode(address);
1122+
if (output == NULL)
1123+
fatal("Invalid %%-encoding character in IPv6 address %s",
1124+
address);
1125+
1126+
/*
1127+
* glibc getaddrinfo() validates that zone IDs are only found on
1128+
* link-scope addresses and correspond to links that actually
1129+
* exist. However, a parsing function should not depend on what
1130+
* links are present. Therefore, only pass the part of the
1131+
* string before the zone ID to getaddrinfo().
1132+
*/
1133+
if (percent != NULL) {
1134+
/*
1135+
* "%25" decodes to "%", so the output buffer and the
1136+
* address have their first "%" at the same offset from
1137+
* the start of the string.
1138+
*/
1139+
output[percent - address] = '\0';
1140+
}
1141+
1142+
if (addr_pton(output, &n) == -1)
1143+
fatal("%s is not a valid IPv6 address", output);
1144+
1145+
/*
1146+
* The only caller already checks for there being a ':' in the address,
1147+
* and ':' after the first '%' must be %-encoded. Therefore, this must
1148+
* be an IPv6 address, not an IPv4 address. Nevertheless, check for
1149+
* AF_INET6 as defense in depth.
1150+
*/
1151+
if (n.af != AF_INET6)
1152+
fatal("internal error: %s is an IP address, but not an IPv6 address",
1153+
output);
1154+
1155+
/* Restore the zone identifier if present. */
1156+
if (percent != NULL)
1157+
output[percent - address] = '%';
1158+
1159+
return output;
1160+
}
1161+
10851162
/*
10861163
* Parse an (scp|ssh|sftp)://[user@]host[:port][/path] URI.
10871164
* See https://tools.ietf.org/html/draft-ietf-secsh-scp-sftp-ssh-uri-04
@@ -1098,7 +1175,7 @@ int
10981175
parse_uri(const char *scheme, const char *uri, char **userp, char **hostp,
10991176
int *portp, char **pathp)
11001177
{
1101-
char *uridup, *cp, *tmp, ch;
1178+
char *uridup, *cp, *tmp_host, *tmp, ch;
11021179
char *user = NULL, *host = NULL, *path = NULL;
11031180
int port = -1, ret = -1;
11041181
size_t len;
@@ -1141,9 +1218,16 @@ parse_uri(const char *scheme, const char *uri, char **userp, char **hostp,
11411218
/* Extract mandatory hostname */
11421219
if ((cp = hpdelim2(&tmp, &ch)) == NULL || *cp == '\0')
11431220
goto out;
1144-
host = xstrdup(cleanhostname(cp));
1145-
if (!valid_domain(host, 0, NULL))
1146-
goto out;
1221+
tmp_host = cleanhostname(cp);
1222+
if (strchr(tmp_host, ':') != NULL) {
1223+
/* Validate the IPv6 address and URL-decode the zone ID. */
1224+
if ((host = decode_ipv6(tmp_host)) == NULL)
1225+
goto out;
1226+
} else {
1227+
if (!valid_domain(tmp_host, 0, NULL))
1228+
goto out;
1229+
host = xstrdup(tmp_host);
1230+
}
11471231

11481232
if (tmp != NULL && *tmp != '\0') {
11491233
if (ch == ':') {

regress/connect-uri.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,36 @@ ${SSH} -F $OBJ/ssh_config "ssh://${USER}@somehost:${PORT}/${DATA}" true \
2727
if [ $? -eq 0 ]; then
2828
fail "ssh connection succeeded, expected failure"
2929
fi
30+
31+
check_v6_pass () {
32+
_tag="$1"
33+
_url="$2"
34+
verbose "$tid: Valid IPv6 URL $_tag"
35+
${SSH} -F $OBJ/ssh_config "$_url" true >/dev/null ||
36+
fail "IPv6 URL connection failed: $_tag: $_url"
37+
}
38+
39+
check_v6_fail () {
40+
_tag="$1"
41+
_url="$2"
42+
verbose "$tid: Invalid IPv6 URL $_tag"
43+
if ${SSH} -F $OBJ/ssh_config "$_url" true >/dev/null 2>&1; then
44+
fail "IPv6 URL connection succeeded: $_tag: $_url"
45+
fi
46+
}
47+
48+
check_v6_pass loopback "ssh://${USER}@[::1]:${PORT}/"
49+
check_v6_pass 'leading double colon' "ssh://${USER}@[::a:a:a:a:a:a]:${PORT}/"
50+
check_v6_pass 'trailing double colon' "ssh://${USER}@[a:a:a:a:a:a::]:${PORT}/"
51+
check_v6_pass 'no double colon' "ssh://${USER}@[a:a:a:a:a:a:a:a]:${PORT}/"
52+
check_v6_pass 'good zone ID' "ssh://${USER}@[fe80::1%25lo5]:${PORT}/"
53+
check_v6_pass 'good encoded zone ID' "ssh://${USER}@[fe80::1%25l%6f]:${PORT}/"
54+
check_v6_fail 'bad zone ID: % not %-encoded' "ssh://${USER}@[::1%2]:${PORT}/"
55+
check_v6_fail 'bad zone ID: non-unreserved character' "ssh://${USER}@[::1%25/]:${PORT}/"
56+
check_v6_fail 'bad zone ID: bad % encoding' "ssh://${USER}@[::1%25a%2]:${PORT}/"
57+
check_v6_fail 'bad zone ID: NUL byte' "ssh://${USER}@[::1%25a%00]:${PORT}/"
58+
check_v6_fail 'too many hex digits' "ssh://${USER}@[::aaaaa]:${PORT}/"
59+
check_v6_fail 'single leading colon' "ssh://${USER}@[:0:0:0:0:0:0:0]:${PORT}/"
60+
check_v6_fail 'single trailing colon' "ssh://${USER}@[0:0:0:0:0:0:0:]:${PORT}/"
61+
check_v6_fail 'too many components' "ssh://${USER}@[0:0:0:0:0:0:0:0:0]:${PORT}/"
62+
check_v6_fail 'leading and trailing double colon' "ssh://${USER}@[::0::]:${PORT}/"

0 commit comments

Comments
 (0)