Skip to content

Commit 18fcae7

Browse files
Add max_blob_size caller hook to encode_blob / decode_blob
The 16 MiB ``_MAX_BLOB_SIZE`` was a module-level constant with no override path. A deployment with a larger ``raft.log.entry_size_max`` (rare but legitimate) had no way to read or write blobs above the cap short of monkey-patching the module. Add an optional ``max_blob_size`` kwarg to both ``encode_blob`` and ``decode_blob``, defaulting to the module constant so existing callers see no behaviour change. Advanced callers (mock servers, bespoke decode pipelines) can now raise or lower the cap explicitly. End-to-end plumbing through ``decode_value`` / ``decode_params_tuple`` / ``MessageDecoder`` / ``DqliteConnection`` is deferred — adds ~30 call-sites for a feature whose caller demand is currently zero. The wire-layer hook is the minimum change that unblocks the "caller cannot override the cap" complaint; future end-to-end plumbing builds on it without re-litigating the API. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ee068c6 commit 18fcae7

2 files changed

Lines changed: 62 additions & 6 deletions

File tree

src/dqlitewire/types.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -309,30 +309,42 @@ def decode_text(
309309
return text, total_size
310310

311311

312-
def encode_blob(value: bytes) -> bytes:
312+
def encode_blob(value: bytes, *, max_blob_size: int = _MAX_BLOB_SIZE) -> bytes:
313313
"""Encode a blob (length-prefixed binary data, padded to 8-byte boundary).
314314
315315
Format: uint64 length + data + padding
316+
317+
``max_blob_size`` is the per-blob cap. Defaults to
318+
``_MAX_BLOB_SIZE`` (16 MiB) — a defense-in-depth ceiling matching
319+
the decode-side cap. Callers handling larger BLOBs (per their
320+
deployment's actual ``raft.log.entry_size_max``) can raise this
321+
explicitly. The outer ``max_message_size`` cap on the codec
322+
always wins regardless.
316323
"""
317324
length = len(value)
318-
if length > _MAX_BLOB_SIZE:
319-
raise EncodeError(f"Blob length {length} exceeds maximum ({_MAX_BLOB_SIZE})")
325+
if length > max_blob_size:
326+
raise EncodeError(f"Blob length {length} exceeds maximum ({max_blob_size})")
320327
padding = pad_to_word(length)
321328
return encode_uint64(length) + value + (b"\x00" * padding)
322329

323330

324-
def decode_blob(data: bytes | memoryview) -> tuple[bytes, int]:
331+
def decode_blob(
332+
data: bytes | memoryview, *, max_blob_size: int = _MAX_BLOB_SIZE
333+
) -> tuple[bytes, int]:
325334
"""Decode a blob.
326335
327336
Accepts either ``bytes`` or ``memoryview``. Returns the blob data
328337
(always as ``bytes``) and the number of bytes consumed.
338+
339+
``max_blob_size`` mirrors the ``encode_blob`` parameter — see that
340+
docstring for the rationale.
329341
"""
330342
if len(data) < 8:
331343
raise DecodeError("Not enough data for blob length")
332344

333345
length = decode_uint64(data[:8])
334-
if length > _MAX_BLOB_SIZE:
335-
raise DecodeError(f"Blob length {length} exceeds maximum ({_MAX_BLOB_SIZE})")
346+
if length > max_blob_size:
347+
raise DecodeError(f"Blob length {length} exceeds maximum ({max_blob_size})")
336348
total_size = 8 + length + pad_to_word(length)
337349

338350
if len(data) < total_size:

tests/test_blob_max_size_kwarg.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""``encode_blob`` and ``decode_blob`` accept an optional
2+
``max_blob_size`` kwarg so callers handling deployments with larger
3+
``raft.log.entry_size_max`` can override the defense-in-depth
4+
default (16 MiB) without monkey-patching the module constant.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import pytest
10+
11+
from dqlitewire.exceptions import DecodeError, EncodeError
12+
from dqlitewire.types import _MAX_BLOB_SIZE, decode_blob, encode_blob
13+
14+
15+
def test_default_max_blob_size_unchanged() -> None:
16+
"""Default cap is the module-level ``_MAX_BLOB_SIZE`` so existing
17+
callers see no behavior change."""
18+
oversize = b"\x00" * (_MAX_BLOB_SIZE + 1)
19+
with pytest.raises(EncodeError, match="exceeds maximum"):
20+
encode_blob(oversize)
21+
22+
23+
def test_caller_can_lower_encode_cap() -> None:
24+
"""A caller can tighten the cap below the default to reject blobs
25+
earlier on a constrained deployment."""
26+
with pytest.raises(EncodeError, match="exceeds maximum"):
27+
encode_blob(b"\x00" * 100, max_blob_size=99)
28+
29+
30+
def test_caller_can_raise_encode_cap() -> None:
31+
"""A caller with a larger ``raft.log.entry_size_max`` can raise the
32+
cap to send blobs above the default 16 MiB."""
33+
big = b"\x00" * (_MAX_BLOB_SIZE + 16)
34+
encoded = encode_blob(big, max_blob_size=_MAX_BLOB_SIZE * 2)
35+
decoded, _ = decode_blob(encoded, max_blob_size=_MAX_BLOB_SIZE * 2)
36+
assert decoded == big
37+
38+
39+
def test_caller_can_lower_decode_cap() -> None:
40+
"""A caller decoding from an untrusted peer can tighten the cap to
41+
reject anything above their own per-deployment ceiling."""
42+
payload = encode_blob(b"\x00" * 100)
43+
with pytest.raises(DecodeError, match="exceeds maximum"):
44+
decode_blob(payload, max_blob_size=50)

0 commit comments

Comments
 (0)