Skip to content

Commit 5077768

Browse files
Normalize list-style Connection and Transfer-Encoding header parsing
1 parent 2602b71 commit 5077768

File tree

4 files changed

+103
-9
lines changed

4 files changed

+103
-9
lines changed

CHANGES/12253.bugfix.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Normalized parsing of list-style ``Connection`` and ``Transfer-Encoding``
2+
headers so repeated field lines and comma-joined values are handled
3+
consistently in the HTTP parser, without changing ``CIMultiDict``
4+
storage semantics.
5+
-- by :user:`rodrigobnogueira`.

aiohttp/helpers.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,54 @@ def parse_content_type(raw: str) -> tuple[str, MappingProxyType[str, str]]:
398398
return content_type, MappingProxyType(content_dict)
399399

400400

401+
def parse_http_list_values(header_values: Iterable[str]) -> tuple[str, ...]:
402+
"""Parse comma-separated HTTP field values from one or more field lines.
403+
404+
This normalizes equivalent list-style representations:
405+
- ``Foo: 1`` + ``Foo: 2``
406+
- ``Foo: 1, 2``
407+
408+
Quoted substrings are respected, so commas inside quoted values do not
409+
split the value.
410+
"""
411+
values: list[str] = []
412+
for header_value in header_values:
413+
values.extend(_parse_http_list_value(header_value))
414+
return tuple(values)
415+
416+
417+
def _parse_http_list_value(header_value: str) -> list[str]:
418+
values: list[str] = []
419+
start = 0
420+
in_quotes = False
421+
escaped = False
422+
423+
for idx, ch in enumerate(header_value):
424+
if escaped:
425+
escaped = False
426+
continue
427+
428+
if ch == "\\" and in_quotes:
429+
escaped = True
430+
continue
431+
432+
if ch == '"':
433+
in_quotes = not in_quotes
434+
continue
435+
436+
if ch == "," and not in_quotes:
437+
value = header_value[start:idx].strip(" \t")
438+
if value:
439+
values.append(value)
440+
start = idx + 1
441+
442+
value = header_value[start:].strip(" \t")
443+
if value:
444+
values.append(value)
445+
446+
return values
447+
448+
401449
def guess_filename(obj: Any, default: str | None = None) -> str | None:
402450
name = getattr(obj, "name", None)
403451
if name and isinstance(name, str) and name[0] != "<" and name[-1] != ">":

aiohttp/http_parser.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
EMPTY_BODY_STATUS_CODES,
2828
NO_EXTENSIONS,
2929
BaseTimerContext,
30+
parse_http_list_values,
3031
set_exception,
3132
)
3233
from .http_exceptions import (
@@ -522,14 +523,9 @@ def parse_headers(
522523

523524
# keep-alive and protocol switching
524525
# RFC 9110 section 7.6.1 defines Connection as a comma-separated list.
525-
conn_values = headers.getall(hdrs.CONNECTION, ())
526+
conn_values = parse_http_list_values(headers.getall(hdrs.CONNECTION, ()))
526527
if conn_values:
527-
conn_tokens = {
528-
token.lower()
529-
for conn_value in conn_values
530-
for token in (part.strip(" \t") for part in conn_value.split(","))
531-
if token and token.isascii()
532-
}
528+
conn_tokens = {token.lower() for token in conn_values if token.isascii()}
533529

534530
if "close" in conn_tokens:
535531
close_conn = True
@@ -658,7 +654,9 @@ def _is_chunked_te(self, te: str) -> bool:
658654
# https://www.rfc-editor.org/rfc/rfc9112#section-7.1-3
659655
# "A sender MUST NOT apply the chunked transfer coding more
660656
# than once to a message body"
661-
parts = [p.strip(" \t") for p in te.split(",")]
657+
parts = list(parse_http_list_values((te,)))
658+
if not parts:
659+
raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
662660
chunked_count = sum(1 for p in parts if p.isascii() and p.lower() == "chunked")
663661
if chunked_count > 1:
664662
raise BadHttpMessage("Request has duplicate `chunked` Transfer-Encoding")
@@ -751,7 +749,11 @@ def parse_message(self, lines: list[bytes]) -> RawResponseMessage:
751749

752750
def _is_chunked_te(self, te: str) -> bool:
753751
# https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2
754-
return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked"
752+
parts = parse_http_list_values((te,))
753+
if not parts:
754+
return False
755+
last = parts[-1]
756+
return last.isascii() and last.lower() == "chunked"
755757

756758

757759
class HttpPayloadParser:

tests/test_helpers.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,45 @@ def test_parse_content_type(
106106
assert result == expected
107107

108108

109+
def test_cimultidict_list_header_representations_differ() -> None:
110+
# Characterization test: document current CIMultiDict semantics.
111+
# Equivalent HTTP list-header wire representations are exposed
112+
# differently by getall(), which is the behavior we normalize later.
113+
repeated = CIMultiDict([("Foo", "1"), ("Foo", "2")])
114+
combined = CIMultiDict([("Foo", "1, 2")])
115+
116+
assert repeated.getall("Foo") == ["1", "2"]
117+
assert combined.getall("Foo") == ["1, 2"]
118+
119+
120+
@pytest.mark.parametrize(
121+
("header_values", "expected"),
122+
[
123+
(("1", "2"), ("1", "2")),
124+
(("1, 2",), ("1", "2")),
125+
(
126+
('"http://example.com/a.html,foo", apples',),
127+
('"http://example.com/a.html,foo"', "apples"),
128+
),
129+
(('"foo\\"bar", baz',), ('"foo\\"bar"', "baz")),
130+
((" spam , eggs ",), ("spam", "eggs")),
131+
((", , ",), ()),
132+
],
133+
)
134+
def test_parse_http_list_values(
135+
header_values: tuple[str, ...], expected: tuple[str, ...]
136+
) -> None:
137+
assert helpers.parse_http_list_values(header_values) == expected
138+
139+
140+
def test_parse_http_list_values_normalizes_equivalent_field_representations() -> None:
141+
repeated = CIMultiDict([("Foo", "1"), ("Foo", "2")])
142+
combined = CIMultiDict([("Foo", "1, 2")])
143+
144+
assert helpers.parse_http_list_values(repeated.getall("Foo")) == ("1", "2")
145+
assert helpers.parse_http_list_values(combined.getall("Foo")) == ("1", "2")
146+
147+
109148
# ------------------- guess_filename ----------------------------------
110149

111150

0 commit comments

Comments
 (0)