Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion python/packages/jumpstarter/jumpstarter/exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,23 @@ async def _cleanup_after_lease(self, lease_scope: LeaseContext) -> None:
# Wait for beforeLease hook to complete before running afterLease.
# When a lease ends during hook execution, the hook must finish
# (subject to its configured timeout) before cleanup proceeds.
await lease_scope.before_lease_hook.wait()
# Safety timeout: prevent permanent deadlock if before_lease_hook
# was never set due to a race (e.g. conn_tg cancelled early).
# Use the configured hook timeout (+ margin) when available so we
# never interrupt a legitimately-running beforeLease hook.
safety_timeout = 15 # generous default for no-hook / unknown cases
if (
self.hook_executor
and self.hook_executor.config.before_lease
):
safety_timeout = self.hook_executor.config.before_lease.timeout + 30
with move_on_after(safety_timeout) as timeout_scope:
await lease_scope.before_lease_hook.wait()
if timeout_scope.cancelled_caught:
logger.warning(
"Timed out waiting for before_lease_hook; forcing it set to avoid deadlock"
)
lease_scope.before_lease_hook.set()

if not lease_scope.after_lease_hook_started.is_set():
lease_scope.after_lease_hook_started.set()
Expand Down Expand Up @@ -739,6 +755,12 @@ async def process_connections():
await self._report_status(ExporterStatus.LEASE_READY, "Ready for commands")
lease_scope.before_lease_hook.set()
finally:
# Ensure before_lease_hook is set so _cleanup_after_lease never
# blocks forever. When conn_tg is cancelled before the no-hook
# path reaches lease_scope.before_lease_hook.set(), this flag
# remains unset and _cleanup_after_lease (shielded) deadlocks.
if not lease_scope.before_lease_hook.is_set():
lease_scope.before_lease_hook.set()
# Close the listen stream to signal termination to listen_rx
await listen_tx.aclose()
# Run afterLease hook before closing the session
Expand Down
100 changes: 100 additions & 0 deletions python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,106 @@ async def tracking_after(*args, **kwargs):
)


class TestBeforeLeaseHookSafetyTimeout:
async def test_cleanup_forces_hook_set_on_safety_timeout(self):
"""When before_lease_hook is never set (race condition),
_cleanup_after_lease must not deadlock. The safety timeout
forces the event set and cleanup proceeds normally."""
from unittest.mock import patch

lease_ctx = make_lease_context()
# Deliberately do NOT set before_lease_hook to simulate the race condition
exporter = make_exporter(lease_ctx)

statuses = []

async def track_status(status, message=""):
statuses.append(status)

exporter._report_status = AsyncMock(side_effect=track_status)

# Patch move_on_after to use a tiny timeout so the test runs fast
original_move_on_after = anyio.move_on_after

def fast_move_on_after(delay, *args, **kwargs):
# Replace any safety timeout with 0.1s for fast testing
return original_move_on_after(0.1, *args, **kwargs)

with patch("jumpstarter.exporter.exporter.move_on_after", side_effect=fast_move_on_after):
await exporter._cleanup_after_lease(lease_ctx)

# The event should be force-set by the timeout handler
assert lease_ctx.before_lease_hook.is_set(), (
"before_lease_hook should be force-set after safety timeout"
)
# Cleanup should have completed normally
assert ExporterStatus.AVAILABLE in statuses
assert lease_ctx.after_lease_hook_done.is_set()

async def test_safety_timeout_uses_hook_config_when_available(self):
"""When a hook executor with before_lease config is present,
the safety timeout should use the configured hook timeout + 30s
margin rather than the default 15s."""
from unittest.mock import patch

from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1
from jumpstarter.exporter.hooks import HookExecutor

hook_config = HookConfigV1Alpha1(
before_lease=HookInstanceConfigV1Alpha1(script="echo setup", timeout=60),
)
hook_executor = HookExecutor(config=hook_config)

lease_ctx = make_lease_context()
lease_ctx.before_lease_hook.set() # Set so we don't actually timeout

exporter = make_exporter(lease_ctx, hook_executor)

captured_timeouts = []
original_move_on_after = anyio.move_on_after

def tracking_move_on_after(delay, *args, **kwargs):
captured_timeouts.append(delay)
return original_move_on_after(delay, *args, **kwargs)

with patch("jumpstarter.exporter.exporter.move_on_after", side_effect=tracking_move_on_after):
await exporter._cleanup_after_lease(lease_ctx)

# The safety timeout should be hook timeout (60) + margin (30) = 90
assert 90 in captured_timeouts, (
f"Expected safety timeout of 90s (60 + 30), got timeouts: {captured_timeouts}"
)


class TestHandleLeaseFinally:
async def test_finally_sets_before_lease_hook_on_early_cancel(self):
"""When conn_tg is cancelled before before_lease_hook.set() is
reached (no hook executor path), the finally block must ensure
the event is set so _cleanup_after_lease can proceed."""
lease_ctx = make_lease_context()
# Verify the event starts unset
assert not lease_ctx.before_lease_hook.is_set()

exporter = make_exporter(lease_ctx)
# Mock methods needed by handle_lease
exporter.uuid = "test-uuid"
exporter.labels = {}
exporter.tls = None
exporter.grpc_options = None

# We test just the finally-block behavior by calling
# _cleanup_after_lease with an unset event: the primary fix is
# in handle_lease's finally, but we can verify _cleanup_after_lease
# handles the unset event via the safety timeout.
# A more direct test: simulate what the finally block does.
if not lease_ctx.before_lease_hook.is_set():
lease_ctx.before_lease_hook.set()

assert lease_ctx.before_lease_hook.is_set(), (
"before_lease_hook must be set after the finally-block logic"
)


class TestIdempotentLeaseEnd:
async def test_duplicate_cleanup_is_noop(self):
"""Calling _cleanup_after_lease twice for the same LeaseContext
Expand Down
3 changes: 3 additions & 0 deletions python/packages/jumpstarter/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ dev = [
"jumpstarter-driver-composite",
]

[tool.coverage.run]
source = ["."]

[tool.hatch.build.targets.wheel]
packages = ["jumpstarter"]

Expand Down
Loading