Skip to content

add top-p and top-k arg

eff4294
Select commit
Loading
Failed to load commit list.
Open

Add GPU-side Gumbel-max sampling for CUDA graph compatibility #18844

add top-p and top-k arg
eff4294
Select commit
Loading
Failed to load commit list.
PyTorch Bot / Dr.CI completed Apr 24, 2026 in 0s

Dr.CI classification results

{"FAILED":[{"workflowId":24876491282,"workflowUniqueId":198852395,"id":72834220253,"runnerName":"i-0685cbcd8c8307db3","authorEmail":"gasoonjia@icloud.com","name":"Test Metal Backend / test-metal-qwen35-moe-tiny / macos-job","jobName":"test-metal-qwen35-moe-tiny / macos-job","conclusion":"failure","completed_at":"2026-04-24T07:00:15.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491282/job/72834220253","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834220253","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["/Users/ec2-user/runner/_work/executorch/executorch/pytorch/executorch/examples/models/qwen3_5_moe/main.cpp:352:3: error: use of undeclared identifier 'cudaMemGetInfo'"],"failure_lines":["/Users/ec2-user/runner/_work/executorch/executorch/pytorch/executorch/examples/models/qwen3_5_moe/main.cpp:352:3: error: use of undeclared identifier 'cudaMemGetInfo'"],"failure_context":[],"time":"2026-04-24T06:52:14.000000000Z"},{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223474,"runnerName":"i-09e3355e80b3542ea","authorEmail":"gasoonjia@icloud.com","name":"pull / unittest / macos / macos-job","jobName":"unittest / macos / macos-job","conclusion":"failure","completed_at":"2026-04-24T08:41:39.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223474","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223474","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["backends/xnnpack/test/ops/test_conv2d.py::TestConv2d::test_fp16_conv2d"],"failure_lines":["FAILED backends/xnnpack/test/ops/test_conv2d.py::TestConv2d::test_fp16_conv2d - AssertionError: Output 0 does not match reference output."],"failure_context":[],"time":"2026-04-24T06:52:15.000000000Z"},{"workflowId":24876492409,"workflowUniqueId":195669493,"id":72842195562,"runnerName":"i-065cb7c40ec683bcf-1002","authorEmail":"gasoonjia@icloud.com","name":"Test CUDA Builds / test-model-cuda-e2e (SocialLocalMobile, Qwen3.5-35B-A3B-HQQ-INT4, quantized-int4-tile-packed) / linux-job","jobName":"test-model-cuda-e2e (SocialLocalMobile, Qwen3.5-35B-A3B-HQQ-INT4, quantized-int4-tile-packed) / linux-job","conclusion":"failure","completed_at":"2026-04-24T08:46:01.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876492409/job/72842195562","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72842195562","head_branch":"ciflow/cuda/18844","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 802b0706f9db7e8675dbad4fc76b46a6ec211f3003fc7ddfa496fa3b1a9e81e1 /exec failed with exit code 127"],"failure_lines":["RuntimeError: Command docker exec -t 802b0706f9db7e8675dbad4fc76b46a6ec211f3003fc7ddfa496fa3b1a9e81e1 /exec failed with exit code 127"],"failure_context":[],"time":"2026-04-24T08:00:18.000000000Z"},{"workflowId":24876492428,"workflowUniqueId":216132948,"id":72834223851,"runnerName":"i-0269a1ff7b8eed2af","authorEmail":"gasoonjia@icloud.com","name":"Test CUDA Windows Export and E2E / export-model-cuda-windows-artifact (nvidia, parakeet-tdt, quantized-int4-weight-only) / linux-job","jobName":"export-model-cuda-windows-artifact (nvidia, parakeet-tdt, quantized-int4-weight-only) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:13:56.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876492428/job/72834223851","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223851","head_branch":"ciflow/cuda/18844","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["RuntimeError: Command docker exec -t af1db5990796c2666f85961a78f4e3712b62fc0213e1a2e9687faa3b1dfc2c5b /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t af1db5990796c2666f85961a78f4e3712b62fc0213e1a2e9687faa3b1dfc2c5b /exec failed with exit code 1"],"failure_context":[],"time":"2026-04-24T06:52:16.000000000Z"},{"workflowId":24876492428,"workflowUniqueId":216132948,"id":72834223854,"runnerName":"i-047a8218278e35dd5","authorEmail":"gasoonjia@icloud.com","name":"Test CUDA Windows Export and E2E / export-model-cuda-windows-artifact (nvidia, parakeet-tdt, non-quantized) / linux-job","jobName":"export-model-cuda-windows-artifact (nvidia, parakeet-tdt, non-quantized) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:14:11.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876492428/job/72834223854","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223854","head_branch":"ciflow/cuda/18844","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 2570c9deee066bbc50ce1699e388e785d449dcd68f5a25ea3c35b1aa4864c425 /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t 2570c9deee066bbc50ce1699e388e785d449dcd68f5a25ea3c35b1aa4864c425 /exec failed with exit code 1"],"failure_context":[],"time":"2026-04-24T06:52:16.000000000Z"},{"workflowId":24876492428,"workflowUniqueId":216132948,"id":72834223855,"runnerName":"i-01ac04f75cab9a157","authorEmail":"gasoonjia@icloud.com","name":"Test CUDA Windows Export and E2E / export-model-cuda-windows-artifact (facebook, dinov2-small-imagenet1k-1-layer, non-quantized) / linux-job","jobName":"export-model-cuda-windows-artifact (facebook, dinov2-small-imagenet1k-1-layer, non-quantized) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:12:28.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876492428/job/72834223855","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223855","head_branch":"ciflow/cuda/18844","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 72845024f28fbeec896ff02b30fedc8112672b255c504cb67a52c16a5dd1d737 /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t 72845024f28fbeec896ff02b30fedc8112672b255c504cb67a52c16a5dd1d737 /exec failed with exit code 1"],"failure_context":[],"time":"2026-04-24T06:52:16.000000000Z"}],"FLAKY":[{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223039,"runnerName":"i-0cdeb1429539c084b","authorEmail":"gasoonjia@icloud.com","name":"pull / test-models-linux (emformer_join, portable, linux.4xlarge.memory) / linux-job","jobName":"test-models-linux (emformer_join, portable, linux.4xlarge.memory) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:07:34.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223039","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223039","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-04-24T06:52:15.000000000Z"},{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223277,"runnerName":"i-0bd75a6e652b469e3","authorEmail":"gasoonjia@icloud.com","name":"pull / test-models-linux (emformer_join, xnnpack-quantization-delegation, linux.4xlarge.memory) / linux-job","jobName":"test-models-linux (emformer_join, xnnpack-quantization-delegation, linux.4xlarge.memory) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:07:53.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223277","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223277","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-04-24T06:52:15.000000000Z"},{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223150,"runnerName":"i-07d8a801b8db5a5b5","authorEmail":"gasoonjia@icloud.com","name":"pull / test-models-linux (ic4, portable, linux.4xlarge.memory) / linux-job","jobName":"test-models-linux (ic4, portable, linux.4xlarge.memory) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:07:42.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223150","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223150","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-04-24T06:52:15.000000000Z"},{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223275,"runnerName":"i-06658d85cb353f7f5","authorEmail":"gasoonjia@icloud.com","name":"pull / test-models-linux (ic4, xnnpack-quantization-delegation, linux.4xlarge.memory) / linux-job","jobName":"test-models-linux (ic4, xnnpack-quantization-delegation, linux.4xlarge.memory) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:06:49.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223275","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223275","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-04-24T06:52:15.000000000Z"},{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223075,"runnerName":"i-01bff797d2855bea6","authorEmail":"gasoonjia@icloud.com","name":"pull / test-models-linux (llama3_2_vision_encoder, portable, linux.4xlarge.memory) / linux-job","jobName":"test-models-linux (llama3_2_vision_encoder, portable, linux.4xlarge.memory) / linux-job","conclusion":"failure","completed_at":"2026-04-24T07:07:38.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223075","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223075","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-04-24T06:52:15.000000000Z"},{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223451,"runnerName":"i-0f9f99ff742b33975","authorEmail":"gasoonjia@icloud.com","name":"pull / unittest / windows / windows-job","jobName":"unittest / windows / windows-job","conclusion":"cancelled","completed_at":"2026-04-24T08:57:48.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223451","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223451","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["##[error]The operation was canceled."],"failure_lines":["##[error]The operation was canceled."],"failure_context":[],"time":"2026-04-24T06:52:15.000000000Z"},{"workflowId":24876491282,"workflowUniqueId":198852395,"id":72834220421,"runnerName":"i-01d3bd5778036cde5","authorEmail":"gasoonjia@icloud.com","name":"Test Metal Backend / export-model-metal-artifact (mistralai, Voxtral-Mini-4B-Realtime-2602, quantized-int4-metal) / macos-job","jobName":"export-model-metal-artifact (mistralai, Voxtral-Mini-4B-Realtime-2602, quantized-int4-metal) / macos-job","conclusion":"failure","completed_at":"2026-04-24T07:07:46.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491282/job/72834220421","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834220421","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["File doesn't exist"],"failure_lines":["##[error]File doesn't exist"],"failure_context":[],"time":"2026-04-24T06:52:14.000000000Z"}],"BROKEN_TRUNK":[{"workflowId":24876491344,"workflowUniqueId":63454257,"id":72834223558,"runnerName":"i-0f2a7ffed4cb8edbb","authorEmail":"gasoonjia@icloud.com","name":"pull / unittest-editable / windows / windows-job","jobName":"unittest-editable / windows / windows-job","conclusion":"cancelled","completed_at":"2026-04-24T08:57:48.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24876491344/job/72834223558","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/72834223558","head_branch":"cuda-graph-sampling","pr_number":18844,"head_sha":"eff4294a721887354e8e72423da1a68e6c15d225","head_sha_timestamp":"2026-04-24T06:51:16.000000000Z","failure_captures":["##[error]The operation was canceled."],"failure_lines":["##[error]The operation was canceled."],"failure_context":[],"time":"2026-04-24T06:52:16.000000000Z"}],"UNSTABLE":[],"AWAITING_APPROVAL":[]}