fix(api): replace Literal type with str for SchedulingSpec.ray_placement_strategy

garrett4wade · garrett4wade · commit b658ff3274cd · 2026-03-04T16:56:26.000+08:00
The Literal type annotation breaks omegaconf config loading since omegaconf 2.4.0.dev2 (and later dev versions) don't support Literal in structured configs. This caused ValidationError on any config loading path that touches SchedulingSpec, including scheduler.type=local which doesn't use Ray. Changes: - Change type from Literal["shared", "separate", "deferred"] to str - Add __post_init__ validation to ensure ray_placement_strategy is valid - Remove unused Literal import Fixes #975
diff --git a/areal/api/cli_args.py b/areal/api/cli_args.py
@@ -5,7 +5,7 @@
 from dataclasses import asdict, dataclass, field, fields
 from enum import Enum
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, ClassVar, Literal
+from typing import TYPE_CHECKING, Any, ClassVar
 
 import uvloop
 import yaml
@@ -854,7 +854,7 @@ class SchedulingSpec:
     exclude: str | None = field(
         default=None, metadata={"help": "sbatch/srun's `--exclude` option for slurm."}
     )
-    ray_placement_strategy: Literal["shared", "separate", "deferred"] = field(
+    ray_placement_strategy: str = field(
         default="shared",
         metadata={
             "help": "Which placement strategy to use for Ray scheduling. "
@@ -865,6 +865,15 @@ class SchedulingSpec:
         },
     )
 
+    def __post_init__(self):
+        """Validate scheduling spec configuration."""
+        valid_strategies = {"shared", "separate", "deferred"}
+        if self.ray_placement_strategy not in valid_strategies:
+            raise ValueError(
+                f"ray_placement_strategy must be one of {valid_strategies}, "
+                f"got '{self.ray_placement_strategy}'"
+            )
+
 
 @dataclass
 class TrainEngineConfig:
diff --git a/docs/en/cli_reference.md b/docs/en/cli_reference.md
@@ -945,22 +945,23 @@ Configuration for worker scheduling. Used in the single-controller mode. Experim
 
 Configuration class: SchedulingSpec
 
-| Parameter              | Type                   | Default                                      | Description                                                                                                                    |
-| ---------------------- | ---------------------- | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
-| `cpu`                  | integer                | `8`                                          | Number of CPU cores required per GPU                                                                                           |
-| `gpu`                  | integer                | `0`                                          | Number of GPU units required. Used only when allocating pods.                                                                  |
-| `mem`                  | integer                | `32`                                         | Amount of memory (GB) required per GPU                                                                                         |
-| `port_count`           | integer                | `2`                                          | Number of ports to expose                                                                                                      |
-| `image`                | string                 | `"/storage/openpsi/images/areal-latest.sif"` | Docker/Singularity container image to use. Currently only used by Slurm. Will be potentially used by Kubernetes in the future. |
-| `task_type`            | string                 | `"worker"`                                   | Task type (e.g., worker, engine) **Choices:** `worker`, `engine`                                                               |
-| `env_vars`             | `dict`                 | **Required**                                 | Environment variables for the container                                                                                        |
-| `cmd`                  | string \| None         | `None`                                       | Command to execute inside the container. Defaults to AReaL's RPC server.                                                       |
-| `srun_additional_args` | string                 | `"--unbuffered --mpi=pmi2 -K --chdir $PWD"`  | Additional arguments to pass to the srun command. Only used by slurm.                                                          |
-| `additional_bash_cmds` | list of string \| None | `None`                                       | Additional bash commands to setup the container before running the torchrun command. Only used by slurm.                       |
-| `container_type`       | string                 | `"apptainer"`                                | Type of containers used in slurm **Choices:** `apptainer`, `none`                                                              |
-| `mount`                | string                 | `"/storage:/storage"`                        | Mount path for slurm.                                                                                                          |
-| `nodelist`             | string \| None         | `None`                                       | sbatch/srun's `--nodelist` option for slurm.                                                                                   |
-| `exclude`              | string \| None         | `None`                                       | sbatch/srun's `--exclude` option for slurm.                                                                                    |
+| Parameter                | Type                   | Default                                      | Description                                                                                                                                                                                                                                                                                                                         |
+| ------------------------ | ---------------------- | -------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `cpu`                    | integer                | `8`                                          | Number of CPU cores required per GPU                                                                                                                                                                                                                                                                                                |
+| `gpu`                    | integer                | `0`                                          | Number of GPU units required. Used only when allocating pods.                                                                                                                                                                                                                                                                       |
+| `mem`                    | integer                | `32`                                         | Amount of memory (GB) required per GPU                                                                                                                                                                                                                                                                                              |
+| `port_count`             | integer                | `2`                                          | Number of ports to expose                                                                                                                                                                                                                                                                                                           |
+| `image`                  | string                 | `"/storage/openpsi/images/areal-latest.sif"` | Docker/Singularity container image to use. Currently only used by Slurm. Will be potentially used by Kubernetes in the future.                                                                                                                                                                                                      |
+| `task_type`              | string                 | `"worker"`                                   | Task type (e.g., worker, engine) **Choices:** `worker`, `engine`                                                                                                                                                                                                                                                                    |
+| `env_vars`               | `dict`                 | **Required**                                 | Environment variables for the container                                                                                                                                                                                                                                                                                             |
+| `cmd`                    | string \| None         | `None`                                       | Command to execute inside the container. Defaults to AReaL's RPC server.                                                                                                                                                                                                                                                            |
+| `srun_additional_args`   | string                 | `"--unbuffered --mpi=pmi2 -K --chdir $PWD"`  | Additional arguments to pass to the srun command. Only used by slurm.                                                                                                                                                                                                                                                               |
+| `additional_bash_cmds`   | list of string \| None | `None`                                       | Additional bash commands to setup the container before running the torchrun command. Only used by slurm.                                                                                                                                                                                                                            |
+| `container_type`         | string                 | `"apptainer"`                                | Type of containers used in slurm **Choices:** `apptainer`, `none`                                                                                                                                                                                                                                                                   |
+| `mount`                  | string                 | `"/storage:/storage"`                        | Mount path for slurm.                                                                                                                                                                                                                                                                                                               |
+| `nodelist`               | string \| None         | `None`                                       | sbatch/srun's `--nodelist` option for slurm.                                                                                                                                                                                                                                                                                        |
+| `exclude`                | string \| None         | `None`                                       | sbatch/srun's `--exclude` option for slurm.                                                                                                                                                                                                                                                                                         |
+| `ray_placement_strategy` | string                 | `"shared"`                                   | Which placement strategy to use for Ray scheduling. Shared will produce 1 placement group for all workers in the role (training). Separate will 1 placement group per worker (rollout). Deferred will do the same as separate but defers accelerator scheduling (multinode rollout).  **Choices:** `shared`, `separate`, `deferred` |
 
 (section-scheduling-strategy)=
 
diff --git a/docs/zh/cli_reference.md b/docs/zh/cli_reference.md