Skip to content

Commit 69a3e55

Browse files
Wrap handshake-time ProtocolError as DqliteConnectionError for retry
A wire-decode failure during the handshake (peer mid-restart producing a torn frame, in-flight leader flip yielding a partial reply, etc.) previously surfaced as ProtocolError. The connect-retry tuple in cluster.py is (DqliteConnectionError, ClusterError, OSError) — so a real transient failure during the first attempt would NOT match and the operator's 3-attempt retry budget did not fire. Rewrap as DqliteConnectionError so the retry classifier sees the right shape; preserve the original via __cause__ so structured- error capture surfaces both the rewrap class and the underlying wire diagnostic. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 25416de commit 69a3e55

2 files changed

Lines changed: 89 additions & 0 deletions

File tree

src/dqliteclient/connection.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,23 @@ async def _connect_impl(self) -> None:
12711271
raw_message=e.raw_message,
12721272
) from e
12731273
raise
1274+
except ProtocolError as e:
1275+
# Wire-decode failures during the handshake are
1276+
# transport-class transient: a peer mid-restart
1277+
# responding with a torn frame, an in-flight
1278+
# leader flip producing a partial reply, etc.
1279+
# Without rewrapping, ``ProtocolError`` would not
1280+
# match the ``connect()`` retry tuple at
1281+
# ``cluster.py:_DEFAULT_RETRYABLE_EXCEPTIONS``
1282+
# (DqliteConnectionError, ClusterError, OSError),
1283+
# so the operator's 3-attempt retry budget would
1284+
# not fire on a real transient failure. Surface
1285+
# as DqliteConnectionError so the retry classifier
1286+
# sees the right shape.
1287+
await self._abort_protocol()
1288+
raise DqliteConnectionError(
1289+
f"Wire decode failed during handshake to {self._safe_address}: {e}"
1290+
) from e
12741291
except BaseException:
12751292
await self._abort_protocol()
12761293
raise
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""A wire-decode failure during the handshake (``ProtocolError``)
2+
must surface as ``DqliteConnectionError`` so the connect-retry
3+
loop's classifier matches it. Without the rewrap, a peer mid-restart
4+
producing a torn frame on first request would abandon on the first
5+
attempt instead of using the 3-attempt retry budget.
6+
7+
The retry tuple in ``cluster.py`` is
8+
``(DqliteConnectionError, ClusterError, OSError)``.
9+
"""
10+
11+
import asyncio
12+
from unittest.mock import AsyncMock, patch
13+
14+
import pytest
15+
16+
from dqliteclient.connection import DqliteConnection
17+
from dqliteclient.exceptions import DqliteConnectionError, ProtocolError
18+
19+
20+
@pytest.mark.asyncio
21+
async def test_handshake_protocolerror_wrapped_as_dqlite_connection_error() -> None:
22+
"""A ``ProtocolError`` raised by ``protocol.handshake()`` is
23+
rewrapped as ``DqliteConnectionError`` so the connect-retry
24+
classifier sees a transient transport failure."""
25+
conn = DqliteConnection("127.0.0.1:9001", database="db", timeout=2.0)
26+
27+
fake_streams = (AsyncMock(spec=asyncio.StreamReader), AsyncMock(spec=asyncio.StreamWriter))
28+
fake_streams[1].close = lambda: None
29+
fake_streams[1].wait_closed = AsyncMock()
30+
31+
async def stub_open(*_args: object, **_kwargs: object):
32+
return fake_streams
33+
34+
with (
35+
patch("dqliteclient._dial.open_connection_with_keepalive", new=stub_open),
36+
patch(
37+
"dqliteclient.connection.DqliteProtocol.handshake",
38+
new=AsyncMock(side_effect=ProtocolError("simulated torn frame")),
39+
),
40+
pytest.raises(DqliteConnectionError, match="Wire decode failed"),
41+
):
42+
await conn.connect()
43+
44+
45+
@pytest.mark.asyncio
46+
async def test_handshake_protocolerror_preserves_cause() -> None:
47+
"""The original ProtocolError remains as ``__cause__`` so
48+
structured-error capture surfaces both the rewrap class and the
49+
underlying wire diagnostic."""
50+
conn = DqliteConnection("127.0.0.1:9001", database="db", timeout=2.0)
51+
52+
fake_streams = (AsyncMock(spec=asyncio.StreamReader), AsyncMock(spec=asyncio.StreamWriter))
53+
fake_streams[1].close = lambda: None
54+
fake_streams[1].wait_closed = AsyncMock()
55+
56+
async def stub_open(*_args: object, **_kwargs: object):
57+
return fake_streams
58+
59+
original = ProtocolError("simulated torn frame")
60+
with (
61+
patch("dqliteclient._dial.open_connection_with_keepalive", new=stub_open),
62+
patch(
63+
"dqliteclient.connection.DqliteProtocol.handshake",
64+
new=AsyncMock(side_effect=original),
65+
),
66+
):
67+
try:
68+
await conn.connect()
69+
except DqliteConnectionError as e:
70+
assert e.__cause__ is original
71+
else:
72+
pytest.fail("expected DqliteConnectionError")

0 commit comments

Comments
 (0)