Skip to content

Commit f588375

Browse files
test(client): pin dqlite-specific cluster shape contracts against the real cluster
Two integration tests for behaviors that are unique to dqlite (no analog in postgres / mysql / single-node sqlite client libraries) and were previously only covered with mocked wire responses: 1. Connect-to-follower returns DqliteConnectionError("...leader...") end-to-end. The 3-node cluster has exactly one leader at any time; direct connect (bypassing find_leader) yields 1 success + N-1 leader-class errors. Tolerates the brief no-leader window during Raft re-election by retrying the probe up to 3× when 0 successes are observed (a (1, N-1) split is steady-state; any other split is a real bug). The unit test at test_connection.py:854 mocks the wire byte stream; this catches wire-format drift between the C dqlite server and the Python FailureResponse decoder. 2. ClusterClient.find_leader skips an unreachable first address and falls through to remaining nodes. The unit tests patch asyncio.open_connection to raise OSError; this pins the kernel's actual ConnectionRefusedError shape against an unbound port — catching a future narrowing of the `except OSError` clause that would pass unit tests but break in production. Also adds a new ``cluster_node_addresses`` integration fixture (and DQLITE_TEST_CLUSTER_NODES env var, exported by run-tests.sh) so multi-node tests resolve the configurable host-mapped ports correctly. The pre-existing ``cluster_addresses`` fixture is preserved (load-bearing for the shared dqlite-test-cluster/pytest_fixtures.py shape). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 03bb783 commit f588375

2 files changed

Lines changed: 131 additions & 0 deletions

File tree

tests/integration/conftest.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111

1212
# Skip integration tests if cluster not available
1313
DQLITE_TEST_CLUSTER = os.environ.get("DQLITE_TEST_CLUSTER", "localhost:9001")
14+
# All cluster node addresses (host-mapped ports). Override via the
15+
# ``DQLITE_TEST_CLUSTER_NODES`` env var (comma-separated). The default
16+
# matches the dev fixed-port cluster (host ports 19001-19003).
17+
DQLITE_TEST_CLUSTER_NODES = os.environ.get(
18+
"DQLITE_TEST_CLUSTER_NODES",
19+
"localhost:19001,localhost:19002,localhost:19003",
20+
)
1421

1522

1623
def pytest_configure(config: pytest.Config) -> None:
@@ -27,3 +34,14 @@ def cluster_address() -> str:
2734
def cluster_addresses() -> list[str]:
2835
"""Get all test cluster addresses."""
2936
return ["localhost:9001", "localhost:9002", "localhost:9003"]
37+
38+
39+
@pytest.fixture
40+
def cluster_node_addresses() -> list[str]:
41+
"""All host-mapped cluster node addresses, configurable via the
42+
``DQLITE_TEST_CLUSTER_NODES`` env var. Use this for multi-node
43+
tests that need every node reachable (the older
44+
``cluster_addresses`` fixture is hardcoded and kept for
45+
backwards-compatibility with the shared
46+
``dqlite-test-cluster/pytest_fixtures.py`` shape)."""
47+
return [a.strip() for a in DQLITE_TEST_CLUSTER_NODES.split(",") if a.strip()]
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""Integration tests for dqlite-specific cluster-shape behaviors.
2+
3+
dqlite is a Raft-replicated distributed SQLite. Two cluster-shape
4+
contracts are unique to dqlite (no analog in postgres / mysql / single-
5+
node sqlite client libraries) and have unit-test coverage with mocked
6+
wire responses but no end-to-end pin against the real cluster:
7+
8+
1. **Connect-to-follower returns a transport-level error.**
9+
Connecting directly (no leader-discovery) to any of the cluster
10+
nodes yields exactly one success (the leader) and a
11+
``DqliteConnectionError("...no longer leader...")`` from each
12+
follower. The follower rejects ``OPEN_DATABASE`` with the
13+
``SQLITE_IOERR_NOT_LEADER`` code, which the client translates to
14+
``DqliteConnectionError`` so the pool's ``_invalidate``+rotate
15+
path can fire (a SQL-level error wouldn't trigger failover).
16+
Unit test ``test_connection.py:854`` mocks the wire byte stream;
17+
this pins the contract against the real C dqlite server's reply.
18+
19+
2. **``find_leader`` skips an unreachable first address.**
20+
The probe loop tolerates an immediate ``ECONNREFUSED`` from one
21+
node and falls through to the remaining addresses. Unit tests
22+
patch ``asyncio.open_connection`` to raise ``OSError``; this pins
23+
the kernel's actual ``ConnectionRefusedError`` shape against an
24+
unbound TCP port — catching a future narrowing of the ``except
25+
OSError`` clause that would pass unit tests but break in
26+
production.
27+
28+
The ``cluster_node_addresses`` fixture comes from
29+
``conftest.py`` and is configurable via ``DQLITE_TEST_CLUSTER_NODES``.
30+
"""
31+
32+
from __future__ import annotations
33+
34+
import asyncio
35+
36+
import pytest
37+
38+
from dqliteclient import connect
39+
from dqliteclient.cluster import ClusterClient
40+
from dqliteclient.exceptions import DqliteConnectionError
41+
from dqliteclient.node_store import MemoryNodeStore
42+
43+
44+
@pytest.mark.integration
45+
class TestConnectToFollowerReturnsLeaderError:
46+
async def test_connect_to_each_node_exactly_one_is_leader(
47+
self, cluster_node_addresses: list[str]
48+
) -> None:
49+
"""The 3-node Raft cluster has exactly one leader at any time;
50+
direct connect (bypassing ``ClusterClient.find_leader``) to
51+
each node yields 1 success + N-1 ``DqliteConnectionError("…leader…")``.
52+
53+
Tolerates the brief no-leader window during a Raft re-election
54+
by retrying the probe up to 3 times (only when 0 successes are
55+
observed — a (1, N-1) split is the steady-state shape and any
56+
other split is a real bug)."""
57+
successes: list[str] = []
58+
leader_errors: list[tuple[str, str]] = []
59+
for _attempt in range(3):
60+
successes = []
61+
leader_errors = []
62+
for addr in cluster_node_addresses:
63+
conn = None
64+
try:
65+
conn = await connect(addr, timeout=5.0)
66+
successes.append(addr)
67+
except DqliteConnectionError as e:
68+
msg = str(e).lower()
69+
assert "leader" in msg, (
70+
f"connect to {addr} raised DqliteConnectionError "
71+
f"without 'leader' in the message: {e!r}"
72+
)
73+
leader_errors.append((addr, str(e)))
74+
finally:
75+
if conn is not None:
76+
await conn.close()
77+
if len(successes) == 1:
78+
break
79+
# Mid-election window: all nodes returned NOT_LEADER. Retry.
80+
await asyncio.sleep(0.5)
81+
82+
assert len(successes) == 1, (
83+
f"expected exactly 1 leader, got successes={successes} "
84+
f"errors={leader_errors} (after retry)"
85+
)
86+
assert len(leader_errors) == len(cluster_node_addresses) - 1, (
87+
f"expected {len(cluster_node_addresses) - 1} follower errors, got {leader_errors}"
88+
)
89+
90+
91+
@pytest.mark.integration
92+
class TestFindLeaderSkipsUnreachableNode:
93+
async def test_find_leader_falls_through_unreachable_first_address(
94+
self, cluster_address: str
95+
) -> None:
96+
"""``ClusterClient.find_leader`` probes addresses in order
97+
(after a stable shuffle) and tolerates a per-node failure to
98+
try the next one. Pin the real-network ``ECONNREFUSED`` shape
99+
against an unbound port — the unit tests use mocked OSError,
100+
which would not catch a future narrowing of the ``except``
101+
clause to ``BrokenPipeError``-only or similar.
102+
103+
Port 1 is the IANA-reserved tcpmux port and is reliably
104+
unbound on a developer workstation."""
105+
store = MemoryNodeStore(["localhost:1", cluster_address])
106+
cluster = ClusterClient(store, timeout=5.0)
107+
leader = await cluster.find_leader()
108+
# The leader address may be the container-internal
109+
# ``0.0.0.0:9001`` (separate fixture-bug tracked elsewhere);
110+
# what matters here is that ``find_leader`` returned a
111+
# non-empty address rather than raising on the dead first
112+
# node.
113+
assert leader, f"find_leader returned empty leader address: {leader!r}"

0 commit comments

Comments
 (0)