Skip to content

Commit 14b172d

Browse files
Chain prior-attempt failures in retry_with_backoff exhaustion
The exhaustion raise previously carried only the last error; diagnostic context for "node A went VOTER → STANDBY between attempts 1 and 3" was lost. Chain via _bounded_group (the same helper used for find_leader / pool.initialize aggregates) so every attempt is preserved on __cause__ as a BaseExceptionGroup. Single-attempt path stays unchanged so simple cases keep their existing exception shape. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c2bb9c9 commit 14b172d

2 files changed

Lines changed: 79 additions & 0 deletions

File tree

src/dqliteclient/retry.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ async def retry_with_backoff[T](
136136
deadline = None if max_elapsed_seconds is None else loop.time() + max_elapsed_seconds
137137

138138
last_error: BaseException | None = None
139+
history: list[BaseException] = []
139140

140141
for attempt in range(max_attempts):
141142
# Deadline re-check BEFORE the next func() call: a previous
@@ -153,6 +154,7 @@ async def retry_with_backoff[T](
153154
raise
154155
except retryable_exceptions as e:
155156
last_error = e
157+
history.append(e)
156158

157159
if attempt == max_attempts - 1:
158160
break
@@ -182,4 +184,17 @@ async def retry_with_backoff[T](
182184
# would still leave the invariant intact at this point because the
183185
# break path always sets last_error first).
184186
assert last_error is not None
187+
if len(history) > 1:
188+
# Chain prior-attempt failures so a forensic walker can see
189+
# the full timeline rather than only the last error. Mirrors
190+
# the discipline ``_find_leader_impl`` and
191+
# ``ConnectionPool.initialize`` apply for per-node aggregates.
192+
# ``_bounded_group`` caps children so the chain stays
193+
# picklable for cross-process error capture. Local import to
194+
# avoid the retry <-> cluster import cycle at module load.
195+
from dqliteclient.cluster import _bounded_group
196+
197+
raise last_error from _bounded_group(
198+
f"retry exhausted after {len(history)} attempts", history
199+
)
185200
raise last_error
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""``retry_with_backoff``'s exhaustion path chains prior-attempt
2+
failures via ``BaseExceptionGroup`` so the diagnostic for an
3+
exhausted retry loop carries every attempt's error, not just the
4+
last one. Mirrors the per-node aggregate discipline in
5+
``_find_leader_impl`` and ``ConnectionPool.initialize``.
6+
"""
7+
8+
import pytest
9+
10+
from dqliteclient.exceptions import DqliteConnectionError
11+
from dqliteclient.retry import retry_with_backoff
12+
13+
14+
@pytest.mark.asyncio
15+
async def test_retry_exhaustion_chains_via_bounded_group() -> None:
16+
"""The exhaustion raise's ``__cause__`` is a
17+
``BaseExceptionGroup`` carrying every attempt when ``len > 1``."""
18+
attempts = 0
19+
20+
async def always_fail() -> None:
21+
nonlocal attempts
22+
attempts += 1
23+
raise DqliteConnectionError(f"attempt {attempts}")
24+
25+
with pytest.raises(DqliteConnectionError) as excinfo:
26+
await retry_with_backoff(
27+
always_fail,
28+
max_attempts=3,
29+
retryable_exceptions=(DqliteConnectionError,),
30+
base_delay=0.0,
31+
max_delay=0.0,
32+
jitter=0.0,
33+
)
34+
35+
cause = excinfo.value.__cause__
36+
assert isinstance(cause, BaseExceptionGroup)
37+
assert len(cause.exceptions) == 3
38+
# Each child carries the per-attempt message.
39+
msgs = [str(e) for e in cause.exceptions]
40+
for i in range(1, 4):
41+
assert any(f"attempt {i}" in m for m in msgs)
42+
43+
44+
@pytest.mark.asyncio
45+
async def test_single_attempt_path_no_group_wrap() -> None:
46+
"""When max_attempts=1, the single-attempt failure raises
47+
directly without a chain group — preserves the simple-case shape
48+
callers are used to."""
49+
50+
async def fail_once() -> None:
51+
raise DqliteConnectionError("only attempt")
52+
53+
with pytest.raises(DqliteConnectionError, match="only attempt") as excinfo:
54+
await retry_with_backoff(
55+
fail_once,
56+
max_attempts=1,
57+
retryable_exceptions=(DqliteConnectionError,),
58+
base_delay=0.0,
59+
max_delay=0.0,
60+
jitter=0.0,
61+
)
62+
63+
# No chain group wrap on single attempt.
64+
assert excinfo.value.__cause__ is None

0 commit comments

Comments
 (0)