Skip to content

Commit 6c86c17

Browse files
Merge commit from fork
* fix: reject malformed authority and port normalization * test: add regression tests for encoded authority delimiters in host Covers %40, %3A, %2F, %23, %3F in host across normalize, parse, equal, resolve, and serialize. Includes cross-scheme and layered encoding vectors. * fix: re-escape gen-delims in host after unescape to prevent authority change unescape() decodes all percent-encoded characters including reserved authority delimiters (%40->@, %3A->:, %2F->/, %3F->?, %23->#). Per RFC 3986 these must stay encoded in the host component. Adds reescapeHostDelimiters() and applies it at the three call sites where unescape() is used on the host: parse(), normalizeComponentEncoding(), and recomposeAuthority(). * refactor: track malformed authority status during parse * refactor: centralize malformed input handling --------- Co-authored-by: Ulises Gascon <ulisesgascongonzalez@gmail.com>
1 parent a95158a commit 6c86c17

4 files changed

Lines changed: 240 additions & 19 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Dependency-free RFC 3986 URI toolbox.
1212

1313
All of the above functions can accept an additional options argument that is an object that can contain one or more of the following properties:
1414

15+
Malformed authorities and out-of-range ports are reported through the parsed component's `error` field. `normalize()` leaves malformed string inputs unchanged, and `equal()` returns `false` when either string input is malformed.
16+
1517
* `scheme` (string)
1618
Indicates the scheme that the URI should be treated as, overriding the URI's normal scheme parsing behavior.
1719

index.js

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use strict'
22

3-
const { normalizeIPv6, removeDotSegments, recomposeAuthority, normalizePercentEncoding, normalizePathEncoding, escapePreservingEscapes, isIPv4, nonSimpleDomain } = require('./lib/utils')
3+
const { normalizeIPv6, removeDotSegments, recomposeAuthority, normalizePercentEncoding, normalizePathEncoding, escapePreservingEscapes, reescapeHostDelimiters, isIPv4, nonSimpleDomain } = require('./lib/utils')
44
const { SCHEMES, getSchemeHandler } = require('./lib/schemes')
55

66
/**
@@ -11,7 +11,7 @@ const { SCHEMES, getSchemeHandler } = require('./lib/schemes')
1111
*/
1212
function normalize (uri, options) {
1313
if (typeof uri === 'string') {
14-
uri = /** @type {T} */ (serialize(parse(uri, options), options))
14+
uri = /** @type {T} */ (normalizeString(uri, options))
1515
} else if (typeof uri === 'object') {
1616
uri = /** @type {T} */ (parse(serialize(uri, options), options))
1717
}
@@ -106,19 +106,10 @@ function resolveComponent (base, relative, options, skipNormalization) {
106106
* @returns {boolean}
107107
*/
108108
function equal (uriA, uriB, options) {
109-
if (typeof uriA === 'string') {
110-
uriA = serialize(parse(uriA, options), options)
111-
} else if (typeof uriA === 'object') {
112-
uriA = serialize(uriA, options)
113-
}
114-
115-
if (typeof uriB === 'string') {
116-
uriB = serialize(parse(uriB, options), options)
117-
} else if (typeof uriB === 'object') {
118-
uriB = serialize(uriB, options)
119-
}
109+
const normalizedA = normalizeComparableURI(uriA, options)
110+
const normalizedB = normalizeComparableURI(uriB, options)
120111

121-
return uriA.toLowerCase() === uriB.toLowerCase()
112+
return normalizedA !== undefined && normalizedB !== undefined && normalizedA.toLowerCase() === normalizedB.toLowerCase()
122113
}
123114

124115
/**
@@ -211,12 +202,29 @@ function serialize (cmpts, opts) {
211202

212203
const URI_PARSE = /^(?:([^#/:?]+):)?(?:\/\/((?:([^#/?@]*)@)?(\[[^#/?\]]+\]|[^#/:?]*)(?::(\d*))?))?([^#?]*)(?:\?([^#]*))?(?:#((?:.|[\n\r])*))?/u
213204

205+
/**
206+
* @param {import('./types/index').URIComponent} parsed
207+
* @param {RegExpMatchArray} matches
208+
* @returns {string|undefined}
209+
*/
210+
function getParseError (parsed, matches) {
211+
if (matches[2] !== undefined && parsed.path && parsed.path[0] !== '/') {
212+
return 'URI path must start with "/" when authority is present.'
213+
}
214+
215+
if (typeof parsed.port === 'number' && (parsed.port < 0 || parsed.port > 65535)) {
216+
return 'URI port is malformed.'
217+
}
218+
219+
return undefined
220+
}
221+
214222
/**
215223
* @param {string} uri
216224
* @param {import('./types/index').Options} [opts]
217-
* @returns
225+
* @returns {{ parsed: import('./types/index').URIComponent, malformedAuthorityOrPort: boolean }}
218226
*/
219-
function parse (uri, opts) {
227+
function parseWithStatus (uri, opts) {
220228
const options = Object.assign({}, opts)
221229
/** @type {import('./types/index').URIComponent} */
222230
const parsed = {
@@ -229,6 +237,8 @@ function parse (uri, opts) {
229237
fragment: undefined
230238
}
231239

240+
let malformedAuthorityOrPort = false
241+
232242
let isIP = false
233243
if (options.reference === 'suffix') {
234244
if (options.scheme) {
@@ -254,6 +264,13 @@ function parse (uri, opts) {
254264
if (isNaN(parsed.port)) {
255265
parsed.port = matches[5]
256266
}
267+
268+
const parseError = getParseError(parsed, matches)
269+
if (parseError !== undefined) {
270+
parsed.error = parsed.error || parseError
271+
malformedAuthorityOrPort = true
272+
}
273+
257274
if (parsed.host) {
258275
const ipv4result = isIPv4(parsed.host)
259276
if (ipv4result === false) {
@@ -302,7 +319,7 @@ function parse (uri, opts) {
302319
parsed.scheme = unescape(parsed.scheme)
303320
}
304321
if (parsed.host !== undefined) {
305-
parsed.host = unescape(parsed.host)
322+
parsed.host = reescapeHostDelimiters(unescape(parsed.host), isIP)
306323
}
307324
}
308325
if (parsed.path) {
@@ -324,9 +341,57 @@ function parse (uri, opts) {
324341
} else {
325342
parsed.error = parsed.error || 'URI can not be parsed.'
326343
}
327-
return parsed
344+
return { parsed, malformedAuthorityOrPort }
345+
}
346+
347+
/**
348+
* @param {string} uri
349+
* @param {import('./types/index').Options} [opts]
350+
* @returns
351+
*/
352+
function parse (uri, opts) {
353+
return parseWithStatus(uri, opts).parsed
328354
}
329355

356+
/**
357+
* @param {string} uri
358+
* @param {import('./types/index').Options} [opts]
359+
* @returns {string}
360+
*/
361+
function normalizeString (uri, opts) {
362+
return normalizeStringWithStatus(uri, opts).normalized
363+
}
364+
365+
/**
366+
* @param {string} uri
367+
* @param {import('./types/index').Options} [opts]
368+
* @returns {{ normalized: string, malformedAuthorityOrPort: boolean }}
369+
*/
370+
function normalizeStringWithStatus (uri, opts) {
371+
const { parsed, malformedAuthorityOrPort } = parseWithStatus(uri, opts)
372+
return {
373+
normalized: malformedAuthorityOrPort ? uri : serialize(parsed, opts),
374+
malformedAuthorityOrPort
375+
}
376+
}
377+
378+
/**
379+
* @param {import ('./types/index').URIComponent|string} uri
380+
* @param {import('./types/index').Options} [opts]
381+
* @returns {string|undefined}
382+
*/
383+
function normalizeComparableURI (uri, opts) {
384+
if (typeof uri === 'string') {
385+
const { normalized, malformedAuthorityOrPort } = normalizeStringWithStatus(uri, opts)
386+
return malformedAuthorityOrPort ? undefined : normalized
387+
}
388+
389+
if (typeof uri === 'object') {
390+
return serialize(uri, opts)
391+
}
392+
}
393+
394+
330395
const fastUri = {
331396
SCHEMES,
332397
normalize,

lib/utils.js

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,26 @@ function removeDotSegments (path) {
272272
return output.join('')
273273
}
274274

275+
/**
276+
* Re-escape RFC 3986 gen-delims that must not appear literally in the host.
277+
* After the URI regex parses, these characters cannot be literal in the host
278+
* field, so any that appear after decoding came from percent-encoding and
279+
* must be restored to prevent authority structure changes.
280+
*
281+
* @param {string} host
282+
* @param {boolean} isIP - true for IPv4/IPv6 hosts (skip colon re-escaping)
283+
* @returns {string}
284+
*/
285+
const HOST_DELIMS = { '@': '%40', '/': '%2F', '?': '%3F', '#': '%23', ':': '%3A' }
286+
const HOST_DELIM_RE = /[@/?#:]/g
287+
const HOST_DELIM_NO_COLON_RE = /[@/?#]/g
288+
289+
function reescapeHostDelimiters (host, isIP) {
290+
const re = isIP ? HOST_DELIM_NO_COLON_RE : HOST_DELIM_RE
291+
re.lastIndex = 0
292+
return host.replace(re, (ch) => HOST_DELIMS[ch])
293+
}
294+
275295
/**
276296
* Normalizes percent escapes and optionally decodes only unreserved ASCII bytes.
277297
* Reserved delimiters such as `%2F` and `%2E` stay escaped.
@@ -394,7 +414,7 @@ function recomposeAuthority (component) {
394414
if (ipV6res.isIPV6 === true) {
395415
host = `[${ipV6res.escapedHost}]`
396416
} else {
397-
host = component.host
417+
host = reescapeHostDelimiters(host, false)
398418
}
399419
}
400420
uriTokens.push(host)
@@ -411,6 +431,7 @@ function recomposeAuthority (component) {
411431
module.exports = {
412432
nonSimpleDomain,
413433
recomposeAuthority,
434+
reescapeHostDelimiters,
414435
normalizePercentEncoding,
415436
normalizePathEncoding,
416437
escapePreservingEscapes,

test/security.test.js

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
'use strict'
2+
3+
const test = require('tape')
4+
const fastURI = require('..')
5+
6+
test('parse marks malformed authority and port inputs as errors', (t) => {
7+
const malformedCases = [
8+
{
9+
input: 'http://[::1]foo',
10+
expectedError: 'URI path must start with "/" when authority is present.'
11+
},
12+
{
13+
input: 'http://[::1]:80abc/path',
14+
expectedError: 'URI path must start with "/" when authority is present.'
15+
},
16+
{
17+
input: 'http://example.com:80abc/path',
18+
expectedError: 'URI path must start with "/" when authority is present.'
19+
},
20+
{
21+
input: 'http://[::1]:65536',
22+
expectedError: 'URI port is malformed.'
23+
}
24+
]
25+
26+
t.plan(malformedCases.length)
27+
28+
malformedCases.forEach(({ input, expectedError }) => {
29+
t.equal(fastURI.parse(input).error, expectedError, input)
30+
})
31+
})
32+
33+
test('normalize does not canonicalize malformed URLs into different valid URLs', (t) => {
34+
const malformedCases = [
35+
'http://[::1]foo',
36+
'http://[::1]:80abc/path',
37+
'http://example.com:80abc/path',
38+
'http://[::1]:65536'
39+
]
40+
41+
t.plan(malformedCases.length)
42+
43+
malformedCases.forEach((input) => {
44+
t.equal(fastURI.normalize(input), input, input)
45+
})
46+
})
47+
48+
test('equal returns false when either side is malformed', (t) => {
49+
const malformedPairs = [
50+
['http://[::1]foo', 'http://[::1]/foo'],
51+
['http://[::1]:80abc/path', 'http://[::1]/abc/path'],
52+
['http://example.com:80abc/path', 'http://example.com/abc/path'],
53+
['http://[::1]:65536', 'http://[::1]:65536/']
54+
]
55+
56+
t.plan(malformedPairs.length)
57+
58+
malformedPairs.forEach(([left, right]) => {
59+
t.equal(fastURI.equal(left, right), false, `${left} != ${right}`)
60+
})
61+
})
62+
63+
test('normalize preserves encoded authority delimiters in host', (t) => {
64+
const cases = [
65+
['http://trusted.com%40evil.com/', 'http://trusted.com%40evil.com/'],
66+
['http://example.com%3A8080/', 'http://example.com%3A8080/'],
67+
['http://example.com%2Fevil.com/path', 'http://example.com%2Fevil.com/path'],
68+
['http://example.com%23fragment/path', 'http://example.com%23fragment/path'],
69+
['http://example.com%3Fq=evil/path', 'http://example.com%3Fq=evil/path'],
70+
['http://user%3Apass%40evil.com/', 'http://user%3Apass%40evil.com/'],
71+
['http://user@trusted.com%40evil.com/', 'http://user@trusted.com%40evil.com/'],
72+
['https://trusted.com%40evil.com/', 'https://trusted.com%40evil.com/'],
73+
['ws://trusted.com%40evil.com/chat', 'ws://trusted.com%40evil.com/chat'],
74+
['wss://trusted.com%40evil.com/chat', 'wss://trusted.com%40evil.com/chat']
75+
]
76+
77+
t.plan(cases.length)
78+
79+
cases.forEach(([input, expected]) => {
80+
t.equal(fastURI.normalize(input), expected, input)
81+
})
82+
})
83+
84+
test('parse preserves encoded authority delimiters in host', (t) => {
85+
const cases = [
86+
['http://trusted.com%40evil.com/', 'trusted.com%40evil.com'],
87+
['http://example.com%3A8080/', 'example.com%3A8080'],
88+
['http://user%3Apass%40evil.com/', 'user%3Apass%40evil.com']
89+
]
90+
91+
t.plan(cases.length)
92+
93+
cases.forEach(([input, expectedHost]) => {
94+
t.equal(fastURI.parse(input).host, expectedHost, input)
95+
})
96+
})
97+
98+
test('equal returns false when encoded delimiters differ from live delimiters', (t) => {
99+
const pairs = [
100+
['http://trusted.com%40evil.com/', 'http://trusted.com@evil.com/'],
101+
['http://example.com%3A8080/', 'http://example.com:8080/']
102+
]
103+
104+
t.plan(pairs.length)
105+
106+
pairs.forEach(([left, right]) => {
107+
t.equal(fastURI.equal(left, right, {}), false, `${left} != ${right}`)
108+
})
109+
})
110+
111+
test('resolve preserves encoded authority delimiters', (t) => {
112+
const result = fastURI.resolve('http://base.com/', '//trusted.com%40evil.com/path')
113+
const parsed = fastURI.parse(result)
114+
115+
t.plan(1)
116+
t.notEqual(parsed.host, 'evil.com', '//trusted.com%40evil.com/path')
117+
})
118+
119+
test('serialize escapes authority delimiters in host field', (t) => {
120+
const result = fastURI.serialize({ scheme: 'http', host: 'trusted.com@evil.com', path: '/' })
121+
const parsed = fastURI.parse(result)
122+
123+
t.plan(1)
124+
t.notEqual(parsed.host, 'evil.com', 'host: trusted.com@evil.com')
125+
})
126+
127+
test('normalize does not double-decode %2540 into a live @', (t) => {
128+
const result = fastURI.normalize('http://trusted.com%2540evil.com/')
129+
const parsed = fastURI.parse(result)
130+
131+
t.plan(1)
132+
t.notEqual(parsed.host, 'trusted.com@evil.com', 'http://trusted.com%2540evil.com/')
133+
})

0 commit comments

Comments
 (0)