From 0670799a53ed51259df58a4f3db5be63db8cec3f Mon Sep 17 00:00:00 2001 From: lichuang9890-star Date: Mon, 6 Apr 2026 22:16:27 +0400 Subject: [PATCH 1/3] fix: recover latin-1 encoded Location headers on redirects (Fixes #10047) --- CHANGES/10047.bugfix.rst | 1 + aiohttp/client.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 CHANGES/10047.bugfix.rst diff --git a/CHANGES/10047.bugfix.rst b/CHANGES/10047.bugfix.rst new file mode 100644 index 00000000000..da63dc0f224 --- /dev/null +++ b/CHANGES/10047.bugfix.rst @@ -0,0 +1 @@ +Fixed redirect following when the server sends a ``Location`` header containing raw latin-1 encoded bytes (e.g. ``\xf8`` for ``ø``). Previously, these were decoded via UTF-8 surrogateescape, producing lone surrogates that broke URL parsing and caused 404 errors. The redirect URL is now recovered by round-tripping through latin-1 -- by :user:`lichuang9890-star`. diff --git a/aiohttp/client.py b/aiohttp/client.py index c3e874e650d..63ae99bd85c 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -847,6 +847,21 @@ async def _connect_and_send_request( # response is forbidden resp.release() + # Some servers send Location headers with raw + # latin-1 bytes (e.g. \xf8 for ø). The HTTP + # parser decodes them via utf-8/surrogateescape, + # producing lone surrogates (\udcf8) that break + # URL parsing. Recover by round-tripping back + # to bytes and decoding as latin-1. (See #10047) + try: + r_url.encode("utf-8") + except (UnicodeEncodeError, UnicodeDecodeError): + try: + raw = r_url.encode("utf-8", "surrogateescape") + r_url = raw.decode("latin-1") + except (UnicodeDecodeError, UnicodeEncodeError): + pass + try: parsed_redirect_url = URL( r_url, encoded=not self._requote_redirect_url From a440222e791f4c8338b8fd8cb1a043b4067cba13 Mon Sep 17 00:00:00 2001 From: zhanlong9890 Date: Tue, 7 Apr 2026 12:25:51 +0400 Subject: [PATCH 2/3] fix(client): recover redirect Location via surrogateescape with utf-8 first, latin-1 fallback --- aiohttp/client.py | 25 +++++++++++-------------- tests/test_client_functional.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/aiohttp/client.py b/aiohttp/client.py index 63ae99bd85c..6d538481bad 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -268,6 +268,16 @@ def __post_init__(self) -> None: _CharsetResolver = Callable[[ClientResponse, bytes], str] +def _recover_redirect_location(r_url: str) -> str: + if not any("\udc80" <= ch <= "\udcff" for ch in r_url): + return r_url + raw = r_url.encode("utf-8", "surrogateescape") + try: + return raw.decode("utf-8") + except UnicodeDecodeError: + return raw.decode("latin-1") + + @final class ClientSession: """First-class interface for making HTTP requests.""" @@ -847,20 +857,7 @@ async def _connect_and_send_request( # response is forbidden resp.release() - # Some servers send Location headers with raw - # latin-1 bytes (e.g. \xf8 for ø). The HTTP - # parser decodes them via utf-8/surrogateescape, - # producing lone surrogates (\udcf8) that break - # URL parsing. Recover by round-tripping back - # to bytes and decoding as latin-1. (See #10047) - try: - r_url.encode("utf-8") - except (UnicodeEncodeError, UnicodeDecodeError): - try: - raw = r_url.encode("utf-8", "surrogateescape") - r_url = raw.decode("latin-1") - except (UnicodeDecodeError, UnicodeEncodeError): - pass + r_url = _recover_redirect_location(r_url) try: parsed_redirect_url = URL( diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 8ee45330bb5..3e713bf1425 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -51,6 +51,7 @@ SocketTimeoutError, TooManyRedirects, ) +from aiohttp.client import _recover_redirect_location from aiohttp.client_reqrep import ClientRequest from aiohttp.compression_utils import DEFAULT_MAX_DECOMPRESS_SIZE from aiohttp.http_exceptions import DecompressSizeError @@ -3016,6 +3017,18 @@ async def handler_redirect(request: web.Request) -> web.Response: assert data == body +@pytest.mark.parametrize( + ("raw_location", "expected"), + ( + ("https://cornelius-k.dk/synspr\udcf8ve", "https://cornelius-k.dk/synsprøve"), + ("https://cornelius-k.dk/synspr\udcc3\udcb8ve", "https://cornelius-k.dk/synsprøve"), + ("https://cornelius-k.dk/synspr%C3%B8ve", "https://cornelius-k.dk/synspr%C3%B8ve"), + ), +) +def test_recover_redirect_location(raw_location: str, expected: str) -> None: + assert _recover_redirect_location(raw_location) == expected + + INVALID_URL_WITH_ERROR_MESSAGE_YARL_NEW = ( # yarl.URL.__new__ raises ValueError ("http://:/", "http://:/"), From 5ea52e569089350f47aa8f6f3703caf4457df790 Mon Sep 17 00:00:00 2001 From: zhanlong9890 Date: Tue, 7 Apr 2026 12:45:38 +0400 Subject: [PATCH 3/3] style(tests): apply isort+black formatting for pre-commit --- tests/test_client_functional.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 3e713bf1425..8eeb1093162 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -41,6 +41,7 @@ import aiohttp from aiohttp import Fingerprint, ServerFingerprintMismatch, hdrs, payload, web from aiohttp.abc import AbstractResolver, ResolveResult +from aiohttp.client import _recover_redirect_location from aiohttp.client_exceptions import ( ClientResponseError, InvalidURL, @@ -51,7 +52,6 @@ SocketTimeoutError, TooManyRedirects, ) -from aiohttp.client import _recover_redirect_location from aiohttp.client_reqrep import ClientRequest from aiohttp.compression_utils import DEFAULT_MAX_DECOMPRESS_SIZE from aiohttp.http_exceptions import DecompressSizeError @@ -3021,8 +3021,14 @@ async def handler_redirect(request: web.Request) -> web.Response: ("raw_location", "expected"), ( ("https://cornelius-k.dk/synspr\udcf8ve", "https://cornelius-k.dk/synsprøve"), - ("https://cornelius-k.dk/synspr\udcc3\udcb8ve", "https://cornelius-k.dk/synsprøve"), - ("https://cornelius-k.dk/synspr%C3%B8ve", "https://cornelius-k.dk/synspr%C3%B8ve"), + ( + "https://cornelius-k.dk/synspr\udcc3\udcb8ve", + "https://cornelius-k.dk/synsprøve", + ), + ( + "https://cornelius-k.dk/synspr%C3%B8ve", + "https://cornelius-k.dk/synspr%C3%B8ve", + ), ), ) def test_recover_redirect_location(raw_location: str, expected: str) -> None: