diff --git a/headroom/proxy/handlers/anthropic.py b/headroom/proxy/handlers/anthropic.py index 1c024f9a..f1531442 100644 --- a/headroom/proxy/handlers/anthropic.py +++ b/headroom/proxy/handlers/anthropic.py @@ -966,10 +966,17 @@ async def _finalize_pre_upstream() -> None: f"(frozen prefix={frozen_message_count}) to preserve cache" ) inject_system_instructions = False + inject_tool = self.config.ccr_inject_tool + if inject_tool and frozen_message_count > 0: + logger.info( + f"[{request_id}] CCR: deferring tool injection " + f"(frozen prefix={frozen_message_count}) to preserve cache" + ) + inject_tool = False # Create fresh injector to avoid state leakage between requests injector = CCRToolInjector( provider="anthropic", - inject_tool=self.config.ccr_inject_tool, + inject_tool=inject_tool, inject_system_instructions=inject_system_instructions, ) optimized_messages, tools, was_injected = injector.process_request( diff --git a/tests/test_proxy_anthropic_cache_stability.py b/tests/test_proxy_anthropic_cache_stability.py index 534ba7f0..ce5d6e7e 100644 --- a/tests/test_proxy_anthropic_cache_stability.py +++ b/tests/test_proxy_anthropic_cache_stability.py @@ -494,6 +494,70 @@ async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001 assert captured["inject_system"] is False +def test_ccr_tool_injection_disabled_when_prefix_frozen(monkeypatch) -> None: + captured = {"inject_tool": None} + with _make_proxy_client() as client: + proxy = client.app.state.proxy + proxy.config.optimize = False + proxy.config.image_optimize = False + proxy.config.ccr_inject_tool = True + proxy.config.ccr_inject_system_instructions = False + + fake_tracker = _FakePrefixTracker(frozen_count=1) + proxy.session_tracker_store.compute_session_id = lambda request, model, messages: ( + "stable-session" + ) + proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker + + class _FakeInjector: + def __init__( + self, + provider, # noqa: ANN001 + inject_tool, # noqa: ANN001 + inject_system_instructions, # noqa: ANN001 + ): + captured["inject_tool"] = inject_tool + self.has_compressed_content = False + self.detected_hashes = [] + + def process_request(self, messages, tools): # noqa: ANN001 + return messages, tools, False + + monkeypatch.setattr("headroom.ccr.CCRToolInjector", _FakeInjector) + + async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001 + return httpx.Response( + 200, + json={ + "id": "msg_ccr_tool_1", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "ok"}], + "usage": { + "input_tokens": 20, + "output_tokens": 3, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + }, + }, + ) + + proxy._retry_request = _fake_retry + + response = client.post( + "/v1/messages", + headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"}, + json={ + "model": "claude-sonnet-4-6", + "max_tokens": 64, + "messages": [{"role": "user", "content": "hello"}], + }, + ) + + assert response.status_code == 200 + assert captured["inject_tool"] is False + + def test_previous_turns_always_frozen_only_final_turn_mutable() -> None: captured = {} with _make_proxy_client() as client: