Skip to content
This repository was archived by the owner on Nov 6, 2022. It is now read-only.

Commit 7d5c99d

Browse files
committed
Support multi-coding Transfer-Encoding
`Transfer-Encoding` header might have multiple codings in it. Even though llhttp cares only about `chunked`, it must check that `chunked` is the last coding (if present). ABNF from RFC 7230: ``` Transfer-Encoding = *( "," OWS ) transfer-coding *( OWS "," [ OWS transfer-coding ] ) transfer-coding = "chunked" / "compress" / "deflate" / "gzip" / transfer-extension transfer-extension = token *( OWS ";" OWS transfer-parameter ) transfer-parameter = token BWS "=" BWS ( token / quoted-string ) ``` However, if `chunked` is not last - llhttp must assume that the encoding and size of the body is unknown (according to 3.3.3 of RFC 7230) and read the response until EOF. For request - the error must be raised for an unknown `Transfer-Encoding`. Furthermore, 3.3.3 of RFC 7230 explicitly states that presence of both `Transfer-Encoding` and `Content-Length` indicates the smuggling attack and "ought to be handled as an error". For the lenient mode: * Unknown `Transfer-Encoding` in requests is not an error and request body is simply read until EOF (end of connection) * Only `Transfer-Encoding: chunked` together with `Content-Length` would result an error (just like before the patch) PR-URL: nodejs-private/http-parser-private#4 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Sam Roberts <vieuxtech@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 28f3c35 commit 7d5c99d

File tree

3 files changed

+165
-14
lines changed

3 files changed

+165
-14
lines changed

http_parser.c

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,10 @@ enum header_states
381381
, h_transfer_encoding
382382
, h_upgrade
383383

384+
, h_matching_transfer_encoding_token_start
384385
, h_matching_transfer_encoding_chunked
386+
, h_matching_transfer_encoding_token
387+
385388
, h_matching_connection_token_start
386389
, h_matching_connection_keep_alive
387390
, h_matching_connection_close
@@ -1335,6 +1338,7 @@ size_t http_parser_execute (http_parser *parser,
13351338
parser->header_state = h_general;
13361339
} else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
13371340
parser->header_state = h_transfer_encoding;
1341+
parser->flags |= F_TRANSFER_ENCODING;
13381342
}
13391343
break;
13401344

@@ -1416,10 +1420,14 @@ size_t http_parser_execute (http_parser *parser,
14161420
if ('c' == c) {
14171421
parser->header_state = h_matching_transfer_encoding_chunked;
14181422
} else {
1419-
parser->header_state = h_general;
1423+
parser->header_state = h_matching_transfer_encoding_token;
14201424
}
14211425
break;
14221426

1427+
/* Multi-value `Transfer-Encoding` header */
1428+
case h_matching_transfer_encoding_token_start:
1429+
break;
1430+
14231431
case h_content_length:
14241432
if (UNLIKELY(!IS_NUM(ch))) {
14251433
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
@@ -1563,16 +1571,41 @@ size_t http_parser_execute (http_parser *parser,
15631571
goto error;
15641572

15651573
/* Transfer-Encoding: chunked */
1574+
case h_matching_transfer_encoding_token_start:
1575+
/* looking for 'Transfer-Encoding: chunked' */
1576+
if ('c' == c) {
1577+
h_state = h_matching_transfer_encoding_chunked;
1578+
} else if (STRICT_TOKEN(c)) {
1579+
/* TODO(indutny): similar code below does this, but why?
1580+
* At the very least it seems to be inconsistent given that
1581+
* h_matching_transfer_encoding_token does not check for
1582+
* `STRICT_TOKEN`
1583+
*/
1584+
h_state = h_matching_transfer_encoding_token;
1585+
} else if (c == ' ' || c == '\t') {
1586+
/* Skip lws */
1587+
} else {
1588+
h_state = h_general;
1589+
}
1590+
break;
1591+
15661592
case h_matching_transfer_encoding_chunked:
15671593
parser->index++;
15681594
if (parser->index > sizeof(CHUNKED)-1
15691595
|| c != CHUNKED[parser->index]) {
1570-
h_state = h_general;
1596+
h_state = h_matching_transfer_encoding_token;
15711597
} else if (parser->index == sizeof(CHUNKED)-2) {
15721598
h_state = h_transfer_encoding_chunked;
15731599
}
15741600
break;
15751601

1602+
case h_matching_transfer_encoding_token:
1603+
if (ch == ',') {
1604+
h_state = h_matching_transfer_encoding_token_start;
1605+
parser->index = 0;
1606+
}
1607+
break;
1608+
15761609
case h_matching_connection_token_start:
15771610
/* looking for 'Connection: keep-alive' */
15781611
if (c == 'k') {
@@ -1631,7 +1664,7 @@ size_t http_parser_execute (http_parser *parser,
16311664
break;
16321665

16331666
case h_transfer_encoding_chunked:
1634-
if (ch != ' ') h_state = h_general;
1667+
if (ch != ' ') h_state = h_matching_transfer_encoding_token;
16351668
break;
16361669

16371670
case h_connection_keep_alive:
@@ -1765,12 +1798,17 @@ size_t http_parser_execute (http_parser *parser,
17651798
REEXECUTE();
17661799
}
17671800

1768-
/* Cannot use chunked encoding and a content-length header together
1769-
per the HTTP specification. */
1770-
if ((parser->flags & F_CHUNKED) &&
1801+
/* Cannot us transfer-encoding and a content-length header together
1802+
per the HTTP specification. (RFC 7230 Section 3.3.3) */
1803+
if ((parser->flags & F_TRANSFER_ENCODING) &&
17711804
(parser->flags & F_CONTENTLENGTH)) {
1772-
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1773-
goto error;
1805+
/* Allow it for lenient parsing as long as `Transfer-Encoding` is
1806+
* not `chunked`
1807+
*/
1808+
if (!lenient || (parser->flags & F_CHUNKED)) {
1809+
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1810+
goto error;
1811+
}
17741812
}
17751813

17761814
UPDATE_STATE(s_headers_done);
@@ -1845,8 +1883,31 @@ size_t http_parser_execute (http_parser *parser,
18451883
UPDATE_STATE(NEW_MESSAGE());
18461884
CALLBACK_NOTIFY(message_complete);
18471885
} else if (parser->flags & F_CHUNKED) {
1848-
/* chunked encoding - ignore Content-Length header */
1886+
/* chunked encoding - ignore Content-Length header,
1887+
* prepare for a chunk */
18491888
UPDATE_STATE(s_chunk_size_start);
1889+
} else if (parser->flags & F_TRANSFER_ENCODING) {
1890+
if (parser->type == HTTP_REQUEST && !lenient) {
1891+
/* RFC 7230 3.3.3 */
1892+
1893+
/* If a Transfer-Encoding header field
1894+
* is present in a request and the chunked transfer coding is not
1895+
* the final encoding, the message body length cannot be determined
1896+
* reliably; the server MUST respond with the 400 (Bad Request)
1897+
* status code and then close the connection.
1898+
*/
1899+
SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1900+
RETURN(p - data); /* Error */
1901+
} else {
1902+
/* RFC 7230 3.3.3 */
1903+
1904+
/* If a Transfer-Encoding header field is present in a response and
1905+
* the chunked transfer coding is not the final encoding, the
1906+
* message body length is determined by reading the connection until
1907+
* it is closed by the server.
1908+
*/
1909+
UPDATE_STATE(s_body_identity_eof);
1910+
}
18501911
} else {
18511912
if (parser->content_length == 0) {
18521913
/* Content-Length header given but zero: Content-Length: 0\r\n */
@@ -2100,6 +2161,12 @@ http_message_needs_eof (const http_parser *parser)
21002161
return 0;
21012162
}
21022163

2164+
/* RFC 7230 3.3.3, see `s_headers_almost_done` */
2165+
if ((parser->flags & F_TRANSFER_ENCODING) &&
2166+
(parser->flags & F_CHUNKED) == 0) {
2167+
return 1;
2168+
}
2169+
21032170
if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
21042171
return 0;
21052172
}

http_parser.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ enum flags
225225
, F_UPGRADE = 1 << 5
226226
, F_SKIPBODY = 1 << 6
227227
, F_CONTENTLENGTH = 1 << 7
228+
, F_TRANSFER_ENCODING = 1 << 8
228229
};
229230

230231

@@ -271,6 +272,8 @@ enum flags
271272
"unexpected content-length header") \
272273
XX(INVALID_CHUNK_SIZE, \
273274
"invalid character in chunk size header") \
275+
XX(INVALID_TRANSFER_ENCODING, \
276+
"request has invalid transfer-encoding") \
274277
XX(INVALID_CONSTANT, "invalid constant string") \
275278
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
276279
XX(STRICT, "strict mode assertion failed") \
@@ -293,11 +296,11 @@ enum http_errno {
293296
struct http_parser {
294297
/** PRIVATE **/
295298
unsigned int type : 2; /* enum http_parser_type */
296-
unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
297299
unsigned int state : 7; /* enum state from http_parser.c */
298300
unsigned int header_state : 7; /* enum header_state from http_parser.c */
299301
unsigned int index : 7; /* index into current matcher */
300302
unsigned int lenient_http_headers : 1;
303+
unsigned int flags : 16; /* F_* values from 'flags' enum; semi-public */
301304

302305
uint32_t nread; /* # bytes read in various scenarios */
303306
uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */

test.c

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,6 @@ const struct message requests[] =
262262
,.type= HTTP_REQUEST
263263
,.raw= "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n"
264264
"Accept: */*\r\n"
265-
"Transfer-Encoding: identity\r\n"
266265
"Content-Length: 5\r\n"
267266
"\r\n"
268267
"World"
@@ -275,10 +274,9 @@ const struct message requests[] =
275274
,.fragment= "hey"
276275
,.request_path= "/post_identity_body_world"
277276
,.request_url= "/post_identity_body_world?q=search#hey"
278-
,.num_headers= 3
277+
,.num_headers= 2
279278
,.headers=
280279
{ { "Accept", "*/*" }
281-
, { "Transfer-Encoding", "identity" }
282280
, { "Content-Length", "5" }
283281
}
284282
,.body= "World"
@@ -1193,6 +1191,61 @@ const struct message requests[] =
11931191
,.headers= { { "Host", "example.com" } }
11941192
,.body= ""
11951193
}
1194+
1195+
#define POST_MULTI_TE_LAST_CHUNKED 43
1196+
, {.name= "post - multi coding transfer-encoding chunked body"
1197+
,.type= HTTP_REQUEST
1198+
,.raw= "POST / HTTP/1.1\r\n"
1199+
"Transfer-Encoding: deflate, chunked\r\n"
1200+
"\r\n"
1201+
"1e\r\nall your base are belong to us\r\n"
1202+
"0\r\n"
1203+
"\r\n"
1204+
,.should_keep_alive= TRUE
1205+
,.message_complete_on_eof= FALSE
1206+
,.http_major= 1
1207+
,.http_minor= 1
1208+
,.method= HTTP_POST
1209+
,.query_string= ""
1210+
,.fragment= ""
1211+
,.request_path= "/"
1212+
,.request_url= "/"
1213+
,.num_headers= 1
1214+
,.headers=
1215+
{ { "Transfer-Encoding" , "deflate, chunked" }
1216+
}
1217+
,.body= "all your base are belong to us"
1218+
,.num_chunks_complete= 2
1219+
,.chunk_lengths= { 0x1e }
1220+
}
1221+
1222+
#define POST_MULTI_LINE_TE_LAST_CHUNKED 43
1223+
, {.name= "post - multi coding transfer-encoding chunked body"
1224+
,.type= HTTP_REQUEST
1225+
,.raw= "POST / HTTP/1.1\r\n"
1226+
"Transfer-Encoding: deflate,\r\n"
1227+
" chunked\r\n"
1228+
"\r\n"
1229+
"1e\r\nall your base are belong to us\r\n"
1230+
"0\r\n"
1231+
"\r\n"
1232+
,.should_keep_alive= TRUE
1233+
,.message_complete_on_eof= FALSE
1234+
,.http_major= 1
1235+
,.http_minor= 1
1236+
,.method= HTTP_POST
1237+
,.query_string= ""
1238+
,.fragment= ""
1239+
,.request_path= "/"
1240+
,.request_url= "/"
1241+
,.num_headers= 1
1242+
,.headers=
1243+
{ { "Transfer-Encoding" , "deflate, chunked" }
1244+
}
1245+
,.body= "all your base are belong to us"
1246+
,.num_chunks_complete= 2
1247+
,.chunk_lengths= { 0x1e }
1248+
}
11961249
};
11971250

11981251
/* * R E S P O N S E S * */
@@ -1970,6 +2023,28 @@ const struct message responses[] =
19702023
,.num_chunks_complete= 3
19712024
,.chunk_lengths= { 2, 2 }
19722025
}
2026+
#define HTTP_200_MULTI_TE_NOT_LAST_CHUNKED 28
2027+
, {.name= "HTTP 200 response with `chunked` being *not last* Transfer-Encoding"
2028+
,.type= HTTP_RESPONSE
2029+
,.raw= "HTTP/1.1 200 OK\r\n"
2030+
"Transfer-Encoding: chunked, identity\r\n"
2031+
"\r\n"
2032+
"2\r\n"
2033+
"OK\r\n"
2034+
"0\r\n"
2035+
"\r\n"
2036+
,.should_keep_alive= FALSE
2037+
,.message_complete_on_eof= TRUE
2038+
,.http_major= 1
2039+
,.http_minor= 1
2040+
,.status_code= 200
2041+
,.response_status= "OK"
2042+
,.num_headers= 1
2043+
,.headers= { { "Transfer-Encoding", "chunked, identity" }
2044+
}
2045+
,.body= "2\r\nOK\r\n0\r\n\r\n"
2046+
,.num_chunks_complete= 0
2047+
}
19732048
};
19742049

19752050
/* strnlen() is a POSIX.2008 addition. Can't rely on it being available so
@@ -3663,7 +3738,7 @@ test_chunked_content_length_error (int req)
36633738
parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf));
36643739
assert(parsed == strlen(buf));
36653740

3666-
buf = "Transfer-Encoding: chunked\r\nContent-Length: 1\r\n\r\n";
3741+
buf = "Transfer-Encoding: anything\r\nContent-Length: 1\r\n\r\n";
36673742
size_t buflen = strlen(buf);
36683743

36693744
parsed = http_parser_execute(&parser, &settings_null, buf, buflen);
@@ -4332,6 +4407,12 @@ main (void)
43324407
"fooba",
43334408
HPE_OK);
43344409

4410+
// Unknown Transfer-Encoding in request
4411+
test_simple("GET / HTTP/1.1\r\n"
4412+
"Transfer-Encoding: unknown\r\n"
4413+
"\r\n",
4414+
HPE_INVALID_TRANSFER_ENCODING);
4415+
43354416
static const char *all_methods[] = {
43364417
"DELETE",
43374418
"GET",

0 commit comments

Comments
 (0)