inclusionAI
diff --git a/‎areal/api/cli_args.py‎
Lines changed: 23 additions & 0 deletions b/‎areal/api/cli_args.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎areal/infra/dist_rollout.py‎
Lines changed: 11 additions & 3 deletions b/‎areal/infra/dist_rollout.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎areal/utils/data.py‎
Lines changed: 17 additions & 1 deletion b/‎areal/utils/data.py‎
Lines changed: 17 additions & 1 deletion
@@ -27,6 +27,7 @@
     PROX_LOGP_METHODS_ALL,
 )
 from areal.utils.pkg_version import is_version_less
+from areal.utils.seqpack import PACKING_ALGORITHMS
 
 if TYPE_CHECKING:
     from transformers import PreTrainedTokenizerFast
@@ -123,6 +124,27 @@ class MicroBatchSpec:
             "help": "Divisor for the number of micro-batches. The final number of micro-batches will be adjusted to be divisible by this value.",
         },
     )
+    packing_algorithm: str = field(
+        default="ffd",
+        metadata={
+            "help": (
+                "Sequence packing algorithm for micro-batch allocation. "
+                "Supported values: 'ffd' (First Fit Decreasing, default), "
+                "'kk' (Karmarkar-Karp, better balance but slightly slower). "
+                "KK is recommended when workload balance across DP ranks is "
+                "critical (e.g., large-scale RL training with variable-length sequences)."
+            ),
+            "choices": ["ffd", "kk"],
+        },
+    )
+
+    def __post_init__(self):
+        """Validate packing algorithm configuration."""
+        if self.packing_algorithm not in PACKING_ALGORITHMS:
+            raise ValueError(
+                f"packing_algorithm must be one of {sorted(PACKING_ALGORITHMS)}, "
+                f"got '{self.packing_algorithm}'"
+            )
 
     @classmethod
     def new(cls, mb_spec: "MicroBatchSpec", **kwargs):
@@ -132,6 +154,7 @@ def new(cls, mb_spec: "MicroBatchSpec", **kwargs):
             granularity=mb_spec.granularity,
             max_tokens_per_mb=mb_spec.max_tokens_per_mb,
             n_mbs_divisor=mb_spec.n_mbs_divisor,
+            packing_algorithm=mb_spec.packing_algorithm,
         )
         fields.update(kwargs)
         return cls(**fields)
 
@@ -15,7 +15,7 @@
     split_and_unpad_tensor,
     tensor_container_to,
 )
-from areal.utils.seqpack import ffd_allocate
+from areal.utils.seqpack import get_allocate_fn
 
 
 @dataclass
@@ -29,6 +29,7 @@ class RedistributedData:
 def redistribute_trajectories(
     trajectories: list[dict[str, Any]],
     group=None,
+    packing_algorithm: str = "ffd",
 ) -> RedistributedData:
     """Redistribute a list of trajectory dicts across a process group.
 
@@ -43,6 +44,8 @@ def redistribute_trajectories(
         contains tensors with shape [batch_size, seqlen, ...].
     group : dist.ProcessGroup, optional
         The process group for communication. If None, uses the default group.
+    packing_algorithm : str, optional
+        Packing algorithm to use ("ffd" or "kk"). Default is "ffd".
 
     Returns
     -------
@@ -73,9 +76,10 @@ def redistribute_trajectories(
         for d in all_data
     ]
 
-    # Allocate trajectories to ranks using first-fit-decreasing
+    allocate_fn = get_allocate_fn(packing_algorithm)
+    # Allocate trajectories to ranks using the configured packing algorithm
     # No capacity limit leads to balanced partition across this group
-    group_indices = ffd_allocate(
+    group_indices = allocate_fn(
         seqlens, capacity=int(1e12), min_groups=dist.get_world_size(group)
     )
     local_indices = group_indices[dist.get_rank(group=group)]
@@ -119,9 +123,13 @@ def _broadcast_and_redistribute_trajectories(
             Redistributed and broadcast batch available on all ranks (list of trajs)
         """
         if trajectories is not None:
+            config = getattr(self.train_engine, "config", None)
+            mb_spec = getattr(config, "mb_spec", None)
+            packing_algorithm = getattr(mb_spec, "packing_algorithm", "ffd")
             redist = redistribute_trajectories(
                 trajectories,
                 group=self.train_engine.data_parallel_group,
+                packing_algorithm=packing_algorithm,
             )
             batch = redist.data
         else:
 
@@ -19,6 +19,7 @@
 from areal.infra.platforms import current_platform
 from areal.utils import logging, seqpack
 from areal.utils.math import align
+from areal.utils.seqpack import get_allocate_fn
 
 logger = logging.getLogger("DataUtils")
 
@@ -445,8 +446,23 @@ def unpack_sequence(
 
 
 def allocate_balanced_mbs(mb_spec: MicroBatchSpec, lens: list[int]) -> list[list[int]]:
+    """Allocate sequences into balanced micro-batches using the configured algorithm.
+
+    The packing algorithm is determined by ``mb_spec.packing_algorithm``:
+      - ``"ffd"`` (default): First Fit Decreasing — fast greedy heuristic.
+      - ``"kk"``: Karmarkar-Karp — produces more balanced partitions at a
+        slight computational cost.
+
+    Args:
+        mb_spec: MicroBatchSpec containing packing configuration.
+        lens: List of sequence lengths to allocate.
+
+    Returns:
+        List of lists of indices, one per micro-batch.
+    """
     assert mb_spec.max_tokens_per_mb is not None
-    group_indices = seqpack.ffd_allocate(
+    allocate_fn = get_allocate_fn(getattr(mb_spec, "packing_algorithm", "ffd"))
+    group_indices = allocate_fn(
         lens,
         mb_spec.max_tokens_per_mb,
         min_groups=mb_spec.n_mbs,