inclusionAI
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 6 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎pyproject.vllm.toml‎
Lines changed: 1 addition & 6 deletions b/‎pyproject.vllm.toml‎
Lines changed: 1 addition & 6 deletions
@@ -169,7 +169,7 @@ cuda-train = [
     "areal[tms]",
     "areal[megatron]",
     "areal[kernels]",
-    "nvidia-modelopt[all]; sys_platform == 'linux' and platform_machine == 'x86_64'",
+    "nvidia-modelopt[hf]; sys_platform == 'linux' and platform_machine == 'x86_64'",
 ]
 # Full CUDA setup: training packages + SGLang inference + flash-attn
 cuda = [
@@ -251,11 +251,6 @@ override-dependencies = [
 
 # Static metadata so uv lock resolves flash-attn without downloading or building.
 # The actual pre-built wheel (variant-specific) is installed in the Dockerfile.
-[[tool.uv.dependency-metadata]]
-name = "cppimport"
-version = "26.4.17"
-requires-dist = []
-
 [[tool.uv.dependency-metadata]]
 name = "flash-attn"
 version = "2.8.3"
 
@@ -180,7 +180,7 @@ cuda-train = [
     "areal[tms]",
     "areal[megatron]",
     "areal[kernels]",
-    "nvidia-modelopt[all]; sys_platform == 'linux' and platform_machine == 'x86_64'",
+    "nvidia-modelopt[hf]; sys_platform == 'linux' and platform_machine == 'x86_64'",
 ]
 # Full CUDA setup: training packages + vLLM inference + flash-attn
 cuda = [
@@ -249,11 +249,6 @@ override-dependencies = [
 ]
 
 # flash-attn is a compiled CUDA extension — provide static metadata to avoid building.
-[[tool.uv.dependency-metadata]]
-name = "cppimport"
-version = "26.4.17"
-requires-dist = []
-
 [[tool.uv.dependency-metadata]]
 name = "flash-attn"
 version = "2.8.3"