Skip to content

Commit 743134c

Browse files
committed
Support IPv6 addresses in URLs
URLs didn't distinguish between IPv6 addresses and domain names, so a bracketed IPv6 address was treated as a syntax error. Fix this by parsing them properly. The parser also handles percent-encoded zone IDs and properly percent-decodes them. Full regression tests are included.
1 parent a6f8f79 commit 743134c

File tree

2 files changed

+195
-4
lines changed

2 files changed

+195
-4
lines changed

misc.c

Lines changed: 102 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,95 @@ cleanhostname(char *host)
866866
return host;
867867
}
868868

869+
static int
870+
valid_ipv6(const char *address)
871+
{
872+
int leading_colon, trailing_colon;
873+
int seen_double_colon = 0;
874+
size_t seen_colons = 0, xdigits = 0;
875+
size_t len, max_colons, i;
876+
const char *percent = strchr(address, '%');
877+
878+
if (percent != NULL) {
879+
if (percent[1] != '2' || percent[2] != '5')
880+
return 0;
881+
len = (size_t)(percent - address);
882+
percent += 3;
883+
} else {
884+
len = strlen(address);
885+
}
886+
887+
if (len < 2)
888+
return 0;
889+
890+
leading_colon = address[0] == ':';
891+
trailing_colon = address[len - 1] == ':';
892+
893+
if ((leading_colon && address[1] != ':') ||
894+
(trailing_colon && address[len - 2] != ':'))
895+
return 0;
896+
897+
/*
898+
* Loop counts the number of colons.
899+
* There can be 8 colons if the first or last character is a colon.
900+
*/
901+
max_colons = 7 + leading_colon + trailing_colon;
902+
903+
/*
904+
* Count the number of colons. Check if there is a "::".
905+
* Ensure that everything that is not a colon is a hex digit.
906+
* Ensure there are no more than 4 hex digits in a row.
907+
* address[len] is '\0' or '%' and in-bounds.
908+
*/
909+
for (i = 0; i < len; i++) {
910+
if (address[i] == ':') {
911+
xdigits = 0;
912+
if (address[i + 1] == ':') {
913+
/* Only one :: is allowed. */
914+
if (seen_double_colon)
915+
return 0;
916+
seen_double_colon = 1;
917+
}
918+
seen_colons++;
919+
continue;
920+
}
921+
if (isxdigit((unsigned char)address[i])) {
922+
xdigits++;
923+
if (xdigits > 4)
924+
return 0;
925+
continue;
926+
}
927+
return 0;
928+
}
929+
930+
/*
931+
* Check that there are the exact number of components if there
932+
* is no ::, or not too many components if there is a ::.
933+
*/
934+
if (seen_double_colon ? max_colons < seen_colons : seen_colons != 7)
935+
return 0;
936+
937+
/* See if there is a zone ID to check. */
938+
if (percent == NULL)
939+
return 1;
940+
941+
/*
942+
* RFC6874 says that non-unreserved characters MUST be percent-encoded.
943+
* Validate that here. urldecode() will check for bad characters after
944+
* the '%', as well as for %00.
945+
*/
946+
for (; *percent != '\0'; percent++) {
947+
if (!(isalnum((unsigned char)*percent) ||
948+
*percent == '-' ||
949+
*percent == '.' ||
950+
*percent == '_' ||
951+
*percent == '~' ||
952+
*percent == '%'))
953+
return 0;
954+
}
955+
return 1;
956+
}
957+
869958
char *
870959
colon(char *cp)
871960
{
@@ -1098,7 +1187,7 @@ int
10981187
parse_uri(const char *scheme, const char *uri, char **userp, char **hostp,
10991188
int *portp, char **pathp)
11001189
{
1101-
char *uridup, *cp, *tmp, ch;
1190+
char *uridup, *cp, *tmp_host, *tmp, ch;
11021191
char *user = NULL, *host = NULL, *path = NULL;
11031192
int port = -1, ret = -1;
11041193
size_t len;
@@ -1141,9 +1230,18 @@ parse_uri(const char *scheme, const char *uri, char **userp, char **hostp,
11411230
/* Extract mandatory hostname */
11421231
if ((cp = hpdelim2(&tmp, &ch)) == NULL || *cp == '\0')
11431232
goto out;
1144-
host = xstrdup(cleanhostname(cp));
1145-
if (!valid_domain(host, 0, NULL))
1146-
goto out;
1233+
tmp_host = cleanhostname(cp);
1234+
if (strchr(tmp_host, ':') != NULL) {
1235+
if (!valid_ipv6(tmp_host))
1236+
goto out;
1237+
/* Must URL-decode the zone ID */
1238+
if ((host = urldecode(tmp_host)) == NULL)
1239+
goto out;
1240+
} else {
1241+
if (!valid_domain(tmp_host, 0, NULL))
1242+
goto out;
1243+
host = xstrdup(tmp_host);
1244+
}
11471245

11481246
if (tmp != NULL && *tmp != '\0') {
11491247
if (ch == ':') {

regress/connect-uri.sh

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,99 @@ if [ $? -ne 0 ]; then
2121
fail "ssh connection failed"
2222
fi
2323

24+
verbose "$tid: IPv6 address"
25+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::1]:${PORT}/" true
26+
if [ $? -ne 0 ]; then
27+
fail "ssh connection failed"
28+
fi
29+
30+
verbose "$tid: IPv6 address 2"
31+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::a:a:a:a:a:a]:${PORT}/" true
32+
if [ $? -ne 0 ]; then
33+
fail "ssh connection failed"
34+
fi
35+
36+
verbose "$tid: IPv6 address "
37+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::a:a:a:a:a:a]:${PORT}/" true
38+
if [ $? -ne 0 ]; then
39+
fail "ssh connection failed"
40+
fi
41+
42+
verbose "$tid: IPv6 address with good zone ID"
43+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::1%25abc]:${PORT}/" true
44+
if [ $? -ne 0 ]; then
45+
fail "ssh connection failed"
46+
fi
47+
48+
verbose "$tid: IPv6 address with good encoded zone ID"
49+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::1%25ab%25]:${PORT}/" true
50+
if [ $? -ne 0 ]; then
51+
fail "ssh connection failed"
52+
fi
53+
54+
verbose "$tid: IPv6 address bad zone ID"
55+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::1%2]:${PORT}/" true \
56+
> /dev/null 2>&1
57+
if [ $? -eq 0 ]; then
58+
fail "ssh connection succeeded, expected failure"
59+
fi
60+
61+
verbose "$tid: IPv6 address bad zone ID: non-unreserved character"
62+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::1%25/]:${PORT}/" true \
63+
> /dev/null 2>&1
64+
if [ $? -eq 0 ]; then
65+
fail "ssh connection succeeded, expected failure"
66+
fi
67+
68+
verbose "$tid: IPv6 address bad zone ID: bad % encoding"
69+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::1%25a%2]:${PORT}/" true \
70+
> /dev/null 2>&1
71+
if [ $? -eq 0 ]; then
72+
fail "ssh connection succeeded, expected failure"
73+
fi
74+
75+
verbose "$tid: IPv6 address bad zone ID: NUL byte"
76+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::1%25a%00]:${PORT}/" true \
77+
> /dev/null 2>&1
78+
if [ $? -eq 0 ]; then
79+
fail "ssh connection succeeded, expected failure"
80+
fi
81+
82+
verbose "$tid: IPv6 address too many hex digits"
83+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::aaaaa]:${PORT}/" true \
84+
> /dev/null 2>&1
85+
if [ $? -eq 0 ]; then
86+
fail "ssh connection succeeded, expected failure"
87+
fi
88+
89+
verbose "$tid: IPv6 address single leading colon"
90+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[:0:0:0:0:0:0:0]:${PORT}/" true \
91+
> /dev/null 2>&1
92+
if [ $? -eq 0 ]; then
93+
fail "ssh connection succeeded, expected failure"
94+
fi
95+
96+
verbose "$tid: IPv6 address single trailing colon"
97+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[0:0:0:0:0:0:0:]:${PORT}/" true \
98+
> /dev/null 2>&1
99+
if [ $? -eq 0 ]; then
100+
fail "ssh connection succeeded, expected failure"
101+
fi
102+
103+
verbose "$tid: IPv6 address too many components"
104+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[0:0:0:0:0:0:0:0:0]:${PORT}/" true \
105+
> /dev/null 2>&1
106+
if [ $? -eq 0 ]; then
107+
fail "ssh connection succeeded, expected failure"
108+
fi
109+
110+
verbose "$tid: IPv6 address leading and trailing double colon"
111+
${SSH} -F $OBJ/ssh_config "ssh://${USER}@[::0::]:${PORT}/" true \
112+
> /dev/null 2>&1
113+
if [ $? -eq 0 ]; then
114+
fail "ssh connection succeeded, expected failure"
115+
fi
116+
24117
verbose "$tid: with path name"
25118
${SSH} -F $OBJ/ssh_config "ssh://${USER}@somehost:${PORT}/${DATA}" true \
26119
> /dev/null 2>&1

0 commit comments

Comments
 (0)