Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion headroom/proxy/handlers/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,10 +966,17 @@ async def _finalize_pre_upstream() -> None:
f"(frozen prefix={frozen_message_count}) to preserve cache"
)
inject_system_instructions = False
inject_tool = self.config.ccr_inject_tool
if inject_tool and frozen_message_count > 0:
logger.info(
f"[{request_id}] CCR: deferring tool injection "
f"(frozen prefix={frozen_message_count}) to preserve cache"
)
inject_tool = False
# Create fresh injector to avoid state leakage between requests
injector = CCRToolInjector(
provider="anthropic",
inject_tool=self.config.ccr_inject_tool,
inject_tool=inject_tool,
inject_system_instructions=inject_system_instructions,
)
optimized_messages, tools, was_injected = injector.process_request(
Expand Down
64 changes: 64 additions & 0 deletions tests/test_proxy_anthropic_cache_stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,70 @@ async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001
assert captured["inject_system"] is False


def test_ccr_tool_injection_disabled_when_prefix_frozen(monkeypatch) -> None:
captured = {"inject_tool": None}
with _make_proxy_client() as client:
proxy = client.app.state.proxy
proxy.config.optimize = False
proxy.config.image_optimize = False
proxy.config.ccr_inject_tool = True
proxy.config.ccr_inject_system_instructions = False

fake_tracker = _FakePrefixTracker(frozen_count=1)
proxy.session_tracker_store.compute_session_id = lambda request, model, messages: (
"stable-session"
)
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker

class _FakeInjector:
def __init__(
self,
provider, # noqa: ANN001
inject_tool, # noqa: ANN001
inject_system_instructions, # noqa: ANN001
):
captured["inject_tool"] = inject_tool
self.has_compressed_content = False
self.detected_hashes = []

def process_request(self, messages, tools): # noqa: ANN001
return messages, tools, False

monkeypatch.setattr("headroom.ccr.CCRToolInjector", _FakeInjector)

async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001
return httpx.Response(
200,
json={
"id": "msg_ccr_tool_1",
"type": "message",
"role": "assistant",
"content": [{"type": "text", "text": "ok"}],
"usage": {
"input_tokens": 20,
"output_tokens": 3,
"cache_read_input_tokens": 0,
"cache_creation_input_tokens": 0,
},
},
)

proxy._retry_request = _fake_retry

response = client.post(
"/v1/messages",
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
json={
"model": "claude-sonnet-4-6",
"max_tokens": 64,
"messages": [{"role": "user", "content": "hello"}],
},
)

assert response.status_code == 200
assert captured["inject_tool"] is False


def test_previous_turns_always_frozen_only_final_turn_mutable() -> None:
captured = {}
with _make_proxy_client() as client:
Expand Down
Loading