Skip to content

Commit 66873be

Browse files
xr843claude
andcommitted
feat(local-executor): add opt-in sandbox posture to LocalCommandLineCodeExecutor
Refs #7462. Supersedes closed PR #7467. The legacy `UserWarning` at construction was easily suppressed by production warning filters and `python -W ignore`, leaving unsandboxed execution of LLM-generated code as the silent default. This change introduces an explicit three-state sandbox posture parameter: - sandbox=None (default, legacy behavior for backward compatibility): DeprecationWarning + logger.warning() surface the risk in both Python warning channels and structured logging pipelines. A future release will make this parameter required. - sandbox=False Caller explicitly acknowledges unsandboxed execution; no warning is emitted. - sandbox=True Best-effort in-process hardening: * Environment entries whose name contains credential patterns (TOKEN, SECRET, API_KEY, PASSWORD, PRIVATE_KEY, CREDENTIAL, SESSION, COOKIE, AUTH) are stripped from the child process. * On POSIX, per-child rlimits (RLIMIT_CPU, RLIMIT_AS, RLIMIT_NOFILE, RLIMIT_NPROC) are applied via preexec_fn so runaway memory/fork-bomb payloads are capped. * On Windows, env scrub applies but preexec is unavailable; a UserWarning directs callers to the Docker executor for strong isolation. Docstring and LocalCommandLineCodeExecutorConfig are updated to round-trip the posture through serialization so declarative deployments cannot silently downgrade. This is NOT a substitute for DockerCommandLineCodeExecutor — adversarial payloads can still read files, make outbound connections, and write to work_dir. The docstring states this explicitly. Tests cover: default DeprecationWarning, explicit opt-out silence, env scrubbing on POSIX, and config round-trip. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8544314 commit 66873be

2 files changed

Lines changed: 219 additions & 9 deletions

File tree

python/packages/autogen-ext/src/autogen_ext/code_executors/local/__init__.py

Lines changed: 141 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pathlib import Path
1212
from string import Template
1313
from types import SimpleNamespace
14-
from typing import Any, Callable, ClassVar, List, Optional, Sequence, Union
14+
from typing import Any, Callable, ClassVar, List, Mapping, Optional, Sequence, Union
1515

1616
from autogen_core import CancellationToken, Component
1717
from autogen_core.code_executor import CodeBlock, CodeExecutor, FunctionWithRequirements, FunctionWithRequirementsStr
@@ -32,6 +32,80 @@
3232

3333
A = ParamSpec("A")
3434

35+
logger = logging.getLogger(__name__)
36+
37+
# Environment variable name patterns considered sensitive enough to strip when
38+
# the executor is instantiated with sandbox=True. Matching is case-insensitive
39+
# and uses substring containment, not prefix, to catch variants like
40+
# "HF_TOKEN" / "GH_TOKEN" / "MY_API_KEY" without enumerating every provider.
41+
_SENSITIVE_ENV_SUBSTRINGS: tuple[str, ...] = (
42+
"TOKEN",
43+
"SECRET",
44+
"PASSWORD",
45+
"PASSWD",
46+
"API_KEY",
47+
"APIKEY",
48+
"PRIVATE_KEY",
49+
"CREDENTIAL",
50+
"SESSION",
51+
"COOKIE",
52+
"AUTH",
53+
)
54+
55+
56+
def _scrub_sensitive_env(env: Mapping[str, str]) -> dict[str, str]:
57+
"""Return a copy of *env* with entries whose name matches a sensitive
58+
pattern removed. Case-insensitive substring match."""
59+
scrubbed: dict[str, str] = {}
60+
for key, value in env.items():
61+
upper = key.upper()
62+
if any(marker in upper for marker in _SENSITIVE_ENV_SUBSTRINGS):
63+
continue
64+
scrubbed[key] = value
65+
return scrubbed
66+
67+
68+
# Default per-process resource ceilings when sandbox=True on POSIX. These are
69+
# best-effort safety rails, not a security boundary — an adversarial payload
70+
# can still read files, make outbound connections, and write to work_dir.
71+
_SANDBOX_MAX_ADDRESS_SPACE_BYTES = 2 * 1024 * 1024 * 1024 # 2 GiB
72+
_SANDBOX_MAX_OPEN_FILES = 256
73+
_SANDBOX_MAX_PROCESSES = 64
74+
75+
76+
def _build_preexec_rlimits(timeout_seconds: int) -> Callable[[], None]:
77+
"""Return a ``preexec_fn`` callable that applies POSIX rlimits in the
78+
forked child before ``exec``. Isolated in its own factory so we can patch
79+
it in tests and to keep the ``resource`` import POSIX-scoped."""
80+
import resource # POSIX-only; import at call time for Windows safety
81+
82+
cpu_seconds = max(1, int(timeout_seconds))
83+
84+
def _apply() -> None:
85+
# CPU time — gives a hard second-level ceiling that complements the
86+
# asyncio-level timeout (which can be suppressed by blocking syscalls).
87+
resource.setrlimit(resource.RLIMIT_CPU, (cpu_seconds, cpu_seconds))
88+
# Address space — blocks naive memory-bomb payloads.
89+
resource.setrlimit(
90+
resource.RLIMIT_AS,
91+
(_SANDBOX_MAX_ADDRESS_SPACE_BYTES, _SANDBOX_MAX_ADDRESS_SPACE_BYTES),
92+
)
93+
# File descriptors.
94+
resource.setrlimit(
95+
resource.RLIMIT_NOFILE,
96+
(_SANDBOX_MAX_OPEN_FILES, _SANDBOX_MAX_OPEN_FILES),
97+
)
98+
# Fork bomb guard; RLIMIT_NPROC is advisory on some platforms.
99+
try:
100+
resource.setrlimit(
101+
resource.RLIMIT_NPROC,
102+
(_SANDBOX_MAX_PROCESSES, _SANDBOX_MAX_PROCESSES),
103+
)
104+
except (ValueError, OSError):
105+
pass
106+
107+
return _apply
108+
35109

36110
class LocalCommandLineCodeExecutorConfig(BaseModel):
37111
"""Configuration for LocalCommandLineCodeExecutor"""
@@ -40,6 +114,7 @@ class LocalCommandLineCodeExecutorConfig(BaseModel):
40114
work_dir: Optional[str] = None
41115
functions_module: str = "functions"
42116
cleanup_temp_files: bool = True
117+
sandbox: Optional[bool] = None
43118

44119

45120
class LocalCommandLineCodeExecutor(CodeExecutor, Component[LocalCommandLineCodeExecutorConfig]):
@@ -81,6 +156,14 @@ class LocalCommandLineCodeExecutor(CodeExecutor, Component[LocalCommandLineCodeE
81156
functions_module (str, optional): The name of the module that will be created to store the functions. Defaults to "functions".
82157
cleanup_temp_files (bool, optional): Whether to automatically clean up temporary files after execution. Defaults to True.
83158
virtual_env_context (Optional[SimpleNamespace], optional): The virtual environment context. Defaults to None.
159+
sandbox (Optional[bool], optional): Explicit sandbox posture. When ``None`` (default, legacy) the executor runs unsandboxed
160+
and emits a ``DeprecationWarning``; in a future release this parameter will become required. When ``False`` the caller
161+
explicitly acknowledges unsandboxed execution and no warning is emitted. When ``True`` the executor applies best-effort
162+
in-process hardening: environment entries whose name contains common credential patterns (``TOKEN``, ``SECRET``,
163+
``API_KEY``, ``PASSWORD``, ``PRIVATE_KEY`` etc.) are stripped from the child process, and on POSIX platforms per-child
164+
rlimits (``RLIMIT_CPU``, ``RLIMIT_AS``, ``RLIMIT_NOFILE``, ``RLIMIT_NPROC``) are applied via ``preexec_fn``. This is
165+
**not** a substitute for :class:`DockerCommandLineCodeExecutor`; it does not provide filesystem, network, or user
166+
isolation. Use the Docker executor for untrusted-code deployments.
84167
85168
.. note::
86169
Using the current directory (".") as working directory is deprecated. Using it will raise a deprecation warning.
@@ -158,15 +241,45 @@ def __init__(
158241
functions_module: str = "functions",
159242
cleanup_temp_files: bool = True,
160243
virtual_env_context: Optional[SimpleNamespace] = None,
244+
sandbox: Optional[bool] = None,
161245
):
162-
# Issue warning about using LocalCommandLineCodeExecutor
163-
warnings.warn(
164-
"Using LocalCommandLineCodeExecutor may execute code on the local machine which can be unsafe. "
165-
"For security, it is recommended to use DockerCommandLineCodeExecutor instead. "
166-
"To install Docker, visit: https://docs.docker.com/get-docker/",
167-
UserWarning,
168-
stacklevel=2,
169-
)
246+
# ── Sandbox posture notification ────────────────────────────────────
247+
# The legacy UserWarning at construction was easily suppressed by
248+
# production configurations (`python -W ignore`, warning filters in
249+
# logging pipelines). Callers now choose one of three postures:
250+
# • sandbox=None (default, legacy) → DeprecationWarning + logger
251+
# • sandbox=False → explicit opt-out, silent
252+
# • sandbox=True → best-effort in-process
253+
# hardening (env scrub +
254+
# POSIX rlimits). NOT a
255+
# substitute for the Docker
256+
# executor.
257+
if sandbox is None:
258+
warnings.warn(
259+
"LocalCommandLineCodeExecutor is running WITHOUT sandboxing. "
260+
"Pass sandbox=False to acknowledge this explicitly, or "
261+
"sandbox=True for best-effort POSIX hardening. "
262+
"For strong isolation use DockerCommandLineCodeExecutor "
263+
"(https://docs.docker.com/get-docker/). "
264+
"In a future release the `sandbox` parameter will become "
265+
"required.",
266+
DeprecationWarning,
267+
stacklevel=2,
268+
)
269+
logger.warning(
270+
"LocalCommandLineCodeExecutor instantiated without explicit "
271+
"sandbox posture; defaulting to unsandboxed execution."
272+
)
273+
elif sandbox is True and sys.platform == "win32":
274+
warnings.warn(
275+
"sandbox=True requested but POSIX rlimits / preexec hooks "
276+
"are not available on Windows; falling back to env scrub "
277+
"only. Use DockerCommandLineCodeExecutor for strong "
278+
"isolation on Windows.",
279+
UserWarning,
280+
stacklevel=2,
281+
)
282+
self._sandbox: Optional[bool] = sandbox
170283

171284
if timeout < 1:
172285
raise ValueError("Timeout must be greater than or equal to 1.")
@@ -399,6 +512,14 @@ async def _execute_code_dont_check_setup(
399512
virtual_env_bin_abs_path = os.path.abspath(self._virtual_env_context.bin_path)
400513
env["PATH"] = f"{virtual_env_bin_abs_path}{os.pathsep}{env['PATH']}"
401514

515+
# Sandbox hardening: strip env entries that look like credentials.
516+
# This is a shallow defence — LLM-generated code can still read
517+
# files, call out to the network, or touch the filesystem — but it
518+
# prevents the most common leak pattern where provider API keys
519+
# sit in the parent process environment.
520+
if self._sandbox is True:
521+
env = _scrub_sensitive_env(env)
522+
402523
# Decide how to invoke the script
403524
if lang == "python":
404525
program = (
@@ -422,6 +543,14 @@ async def _execute_code_dont_check_setup(
422543
# Shell commands (bash, sh, etc.)
423544
extra_args = [str(written_file.absolute())]
424545

546+
# Build sandbox-specific subprocess kwargs.
547+
# On POSIX with sandbox=True we set per-child rlimits via a
548+
# preexec_fn so runaway memory or fork bombs are capped. Windows
549+
# does not support preexec_fn or RLIMIT_AS → env scrub only.
550+
exec_kwargs: dict[str, Any] = {}
551+
if self._sandbox is True and sys.platform != "win32":
552+
exec_kwargs["preexec_fn"] = _build_preexec_rlimits(self._timeout)
553+
425554
# Create a subprocess and run
426555
task = asyncio.create_task(
427556
asyncio.create_subprocess_exec(
@@ -431,6 +560,7 @@ async def _execute_code_dont_check_setup(
431560
stdout=asyncio.subprocess.PIPE,
432561
stderr=asyncio.subprocess.PIPE,
433562
env=env,
563+
**exec_kwargs,
434564
)
435565
)
436566
cancellation_token.link_future(task)
@@ -514,6 +644,7 @@ def _to_config(self) -> LocalCommandLineCodeExecutorConfig:
514644
work_dir=str(self.work_dir),
515645
functions_module=self._functions_module,
516646
cleanup_temp_files=self._cleanup_temp_files,
647+
sandbox=self._sandbox,
517648
)
518649

519650
@classmethod
@@ -523,4 +654,5 @@ def _from_config(cls, config: LocalCommandLineCodeExecutorConfig) -> Self:
523654
work_dir=Path(config.work_dir) if config.work_dir is not None else None,
524655
functions_module=config.functions_module,
525656
cleanup_temp_files=config.cleanup_temp_files,
657+
sandbox=config.sandbox,
526658
)

python/packages/autogen-ext/tests/code_executors/test_commandline_code_executor.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import tempfile
1111
import types
1212
import venv
13+
import warnings
1314
from pathlib import Path
1415
from typing import AsyncGenerator, TypeAlias
1516
from unittest.mock import patch
@@ -445,3 +446,80 @@ async def test_cleanup_temp_files_oserror(caplog: pytest.LogCaptureFixture) -> N
445446
# The code file should have been attempted to be deleted and failed
446447
assert any("Failed to delete temporary file" in record.message for record in caplog.records)
447448
assert any("Mocked OSError" in record.message for record in caplog.records)
449+
450+
451+
# ─────────────────────────────────────────────────────────────────────────────
452+
# Sandbox posture tests (issue #7462)
453+
# ─────────────────────────────────────────────────────────────────────────────
454+
455+
456+
@pytest.mark.asyncio
457+
async def test_sandbox_default_emits_deprecation_warning() -> None:
458+
"""Instantiating without an explicit sandbox posture should emit a
459+
DeprecationWarning (not the legacy UserWarning) so warning-filter
460+
pipelines that silence UserWarning still surface the security notice."""
461+
with pytest.warns(DeprecationWarning, match=r"sandbox=False.*sandbox=True"):
462+
LocalCommandLineCodeExecutor()
463+
464+
465+
@pytest.mark.asyncio
466+
async def test_sandbox_false_is_silent_opt_out() -> None:
467+
"""sandbox=False is the explicit "I accept the risk" acknowledgement and
468+
must not emit the DeprecationWarning."""
469+
with warnings.catch_warnings():
470+
warnings.simplefilter("error", DeprecationWarning)
471+
# Must not raise: the DeprecationWarning-as-error filter would trip
472+
# if we regressed and emitted the warning in the explicit path.
473+
LocalCommandLineCodeExecutor(sandbox=False)
474+
475+
476+
@pytest.mark.asyncio
477+
@pytest.mark.skipif(sys.platform == "win32", reason="POSIX-only rlimit path")
478+
async def test_sandbox_true_strips_credential_env(tmp_path: Path) -> None:
479+
"""sandbox=True should remove environment variables whose names look
480+
like credentials before invoking the child process."""
481+
executor = LocalCommandLineCodeExecutor(work_dir=tmp_path, sandbox=True)
482+
await executor.start()
483+
try:
484+
code = (
485+
"import os\n"
486+
"for k in ['MY_API_KEY', 'SOME_TOKEN', 'HARMLESS_VAR']:\n"
487+
" print(k, os.environ.get(k, '<missing>'))\n"
488+
)
489+
with patch.dict(
490+
os.environ,
491+
{
492+
"MY_API_KEY": "sekret-1",
493+
"SOME_TOKEN": "sekret-2",
494+
"HARMLESS_VAR": "benign",
495+
},
496+
clear=False,
497+
):
498+
result = await executor.execute_code_blocks(
499+
[CodeBlock(code=code, language="python")],
500+
cancellation_token=CancellationToken(),
501+
)
502+
finally:
503+
await executor.stop()
504+
505+
assert result.exit_code == 0, result.output
506+
assert "MY_API_KEY <missing>" in result.output
507+
assert "SOME_TOKEN <missing>" in result.output
508+
assert "HARMLESS_VAR benign" in result.output
509+
510+
511+
@pytest.mark.asyncio
512+
async def test_sandbox_roundtrips_through_config(tmp_path: Path) -> None:
513+
"""The sandbox posture must survive serialize → deserialize so declarative
514+
deployments cannot silently downgrade to the default warning path."""
515+
executor = LocalCommandLineCodeExecutor(work_dir=tmp_path, sandbox=True)
516+
config = executor.dump_component()
517+
# dump_component should preserve the explicit posture.
518+
assert config.config["sandbox"] is True
519+
520+
# The load side must not emit the default DeprecationWarning — it would
521+
# fire if sandbox weren't threaded through.
522+
with warnings.catch_warnings():
523+
warnings.simplefilter("error", DeprecationWarning)
524+
restored = LocalCommandLineCodeExecutor.load_component(config)
525+
assert restored._sandbox is True # type: ignore[attr-defined]

0 commit comments

Comments
 (0)