@@ -82,13 +82,15 @@ def __init__(
8282 config : GatewayControllerConfig ,
8383 scheduler : Scheduler ,
8484 ) -> None :
85- from areal .api .alloc_mode import ModelAllocation
86-
8785 self .config = config
8886 self .scheduler = scheduler
8987
90- # Parse allocation from config.backend
91- self .rollout_alloc = ModelAllocation .from_str (config .backend )
88+ if config .external_api_url is not None :
89+ self .rollout_alloc = None
90+ else :
91+ from areal .api .alloc_mode import ModelAllocation
92+
93+ self .rollout_alloc = ModelAllocation .from_str (config .backend )
9294
9395 # Worker management
9496 self .workers : list [Worker ] = []
@@ -191,6 +193,15 @@ def initialize(
191193
192194 logger .info ("GatewayInferenceController initialized (role=%s)" , role )
193195
196+ if self .config .external_api_url :
197+ self ._register_external_model ()
198+ logger .info (
199+ "External model mode: url=%s, model=%s, name=%s" ,
200+ self .config .external_api_url ,
201+ self .config .external_api_model ,
202+ self .config .external_model_name ,
203+ )
204+
194205 async def _async_initialize (
195206 self ,
196207 server_args : dict [str , Any ] | None ,
@@ -208,6 +219,8 @@ async def _async_initialize(
208219 * **server_infos is not None** — SGLang servers already exist so
209220 we only fork data proxy on every worker; fork router + gateway
210221 on worker 0.
222+ * **external_mode** — skip inference servers entirely; data proxies
223+ start with an empty ``--backend-addr``.
211224 """
212225 from dataclasses import asdict
213226
@@ -216,30 +229,40 @@ async def _async_initialize(
216229 from areal .api .cli_args import SchedulingSpec , SchedulingStrategy
217230 from areal .api .scheduler_api import Job
218231
219- alloc = self .rollout_alloc
220- dp_size = alloc .parallel .dp_size
221232 cfg = self .config
222233 admin_api_key = self .config .openai .admin_api_key
223234
224- inf_backend = alloc .backend
235+ if self .external_mode :
236+ dp_size = 1
237+ inf_backend = None
238+ else :
239+ alloc = self .rollout_alloc
240+ dp_size = alloc .parallel .dp_size
241+ inf_backend = alloc .backend
225242
226243 # ==================================================================
227244 # Step 0: Always create dp_size RPCGuard workers
228245 # ==================================================================
229- inf_spec = SchedulingSpec (** asdict (cfg .scheduling_spec [0 ]))
230- instance_size = alloc .parallel .tp_size * alloc .parallel .pp_size
231- if server_infos is not None :
232- # Pre-existing inference servers — RPCGuard workers only host
233- # CPU services (data proxy, router, gateway), no GPUs needed.
234- inf_spec .gpu = 0
246+ if self .external_mode :
247+ inf_spec = SchedulingSpec (
248+ task_type = "worker" ,
249+ port_count = 2 ,
250+ gpu = 0 ,
251+ mem = 8 ,
252+ cmd = "python -m areal.experimental.inference_service.guard" ,
253+ )
235254 else :
236- inf_spec .cpu *= instance_size
237- inf_spec .mem *= instance_size
238- if inf_spec .gpu > 0 :
239- inf_spec .gpu = instance_size
240-
241- # Override cmd to launch RPCGuard instead of RPC server
242- inf_spec .cmd = "python -m areal.experimental.inference_service.guard"
255+ inf_spec = SchedulingSpec (** asdict (cfg .scheduling_spec [0 ]))
256+ instance_size = alloc .parallel .tp_size * alloc .parallel .pp_size
257+ if server_infos is not None :
258+ inf_spec .gpu = 0
259+ else :
260+ inf_spec .cpu *= instance_size
261+ inf_spec .mem *= instance_size
262+ if inf_spec .gpu > 0 :
263+ inf_spec .gpu = instance_size
264+ # Override cmd to launch RPCGuard instead of RPC server
265+ inf_spec .cmd = "python -m areal.experimental.inference_service.guard"
243266
244267 inf_role = f"{ self ._worker_role } { self ._INF_SUFFIX } "
245268 inf_job = Job (
@@ -256,9 +279,11 @@ async def _async_initialize(
256279 logger .info ("RPCGuard workers ready: %s" , [w .id for w in inf_workers ])
257280
258281 # ==================================================================
259- # Step 1: Launch inference servers (skip when pre-existing)
282+ # Step 1: Launch inference servers (skip in external mode or when pre-existing)
260283 # ==================================================================
261- if server_infos is not None :
284+ if self .external_mode :
285+ logger .info ("External mode — skipping inference server launch" )
286+ elif server_infos is not None :
262287 # Pre-existing servers — just record their addresses
263288 self .server_infos = server_infos
264289 self ._inf_addrs = [
@@ -327,7 +352,6 @@ def _build_launch_cmd(host: str, port: int) -> list[str]:
327352 else :
328353 raise ValueError (f"Unsupported inference backend: { inf_backend !r} " )
329354
330- # For each RPCGuard worker: alloc port, build cmd, fork server
331355 for rank , worker in enumerate (inf_workers ):
332356 guard_addr = (
333357 f"http://{ format_hostport (worker .ip , int (worker .worker_ports [0 ]))} "
@@ -447,12 +471,15 @@ def _build_launch_cmd(host: str, port: int) -> list[str]:
447471 f"http://{ format_hostport (worker .ip , int (worker .worker_ports [0 ]))} "
448472 )
449473 # Each data proxy connects to its corresponding inference server
450- data_proxy_cmd = data_proxy_base_cmd + [
451- "--backend-addr" ,
452- self ._inf_addrs [rank ],
453- "--backend-type" ,
454- inf_backend or "sglang" ,
455- ]
474+ if self .external_mode :
475+ data_proxy_cmd = data_proxy_base_cmd + ["--backend-addr" , "" ]
476+ else :
477+ data_proxy_cmd = data_proxy_base_cmd + [
478+ "--backend-addr" ,
479+ self ._inf_addrs [rank ],
480+ "--backend-type" ,
481+ inf_backend or "sglang" ,
482+ ]
456483 data_proxy_host , data_proxy_port = self ._fork_on_guard (
457484 guard_addr = guard_addr ,
458485 role = "data-proxy" ,
@@ -533,6 +560,40 @@ def _register_data_proxies_in_router(self) -> None:
533560 worker_id ,
534561 )
535562
563+ def _register_external_model (self ) -> None :
564+ import requests
565+
566+ cfg = self .config
567+ if cfg .external_api_key is None :
568+ raise ValueError (
569+ "external_api_key must be set when using external model mode. "
570+ "Without it, the internal admin API key would be leaked to the "
571+ "external provider."
572+ )
573+ resp = requests .post (
574+ f"{ self ._gateway_addr } /register_model" ,
575+ json = {
576+ "name" : cfg .external_model_name ,
577+ "url" : cfg .external_api_url ,
578+ "model" : cfg .external_api_model ,
579+ },
580+ headers = {"Authorization" : f"Bearer { cfg .openai .admin_api_key } " },
581+ timeout = cfg .request_timeout ,
582+ )
583+ resp .raise_for_status ()
584+ logger .info (
585+ "External model registered: name=%s url=%s model=%s "
586+ "(requests will be sent to %s/chat/completions)" ,
587+ cfg .external_model_name ,
588+ cfg .external_api_url ,
589+ cfg .external_api_model ,
590+ cfg .external_api_url .rstrip ("/" ),
591+ )
592+
593+ @property
594+ def external_mode (self ) -> bool :
595+ return self .config .external_api_url is not None
596+
536597 def _start_online_callback_server (self ) -> None :
537598 """Start callback server used by the router to deliver ready trajectories."""
538599 if self ._callback_server is not None :
@@ -990,11 +1051,19 @@ async def chat_completion(
9901051 if extra_body and isinstance (extra_body , dict ):
9911052 body .update (extra_body )
9921053
993- api_key = (
994- session_api_key
995- if session_api_key is not None
996- else self .config .openai .admin_api_key
997- )
1054+ if self .external_mode :
1055+ body ["model" ] = self .config .external_model_name
1056+ api_key = (
1057+ session_api_key
1058+ if session_api_key is not None
1059+ else self .config .external_api_key or self .config .openai .admin_api_key
1060+ )
1061+ else :
1062+ api_key = (
1063+ session_api_key
1064+ if session_api_key is not None
1065+ else self .config .openai .admin_api_key
1066+ )
9981067 url = f"{ self ._gateway_addr } /chat/completions"
9991068 headers = {
10001069 "Content-Type" : "application/json" ,
@@ -1201,6 +1270,19 @@ def _resolve_workflow(
12011270 from areal .api .workflow_api import RolloutWorkflow
12021271 from areal .utils .dynamic_import import import_from_string
12031272
1273+ # External mode only supports online mode (workflow=None)
1274+ if self .external_mode and workflow is not None :
1275+ raise ValueError (
1276+ "External model mode only supports online mode (workflow=None). "
1277+ "Agent-based workflows are not supported with external models."
1278+ )
1279+
1280+ if self .external_mode and group_size > 1 :
1281+ raise ValueError (
1282+ "External model mode requires group_size=1, "
1283+ f"got group_size={ group_size } ."
1284+ )
1285+
12041286 # (a) None → online mode: create InferenceServiceWorkflow without agent
12051287 if workflow is None :
12061288 from areal .experimental .inference_service .controller .workflow import (
0 commit comments