Skip to content

Commit 840e4a6

Browse files
authored
Merge branch 'main' into feat/auto-completions-338
2 parents bfcdfa5 + 42cfd26 commit 840e4a6

16 files changed

Lines changed: 423 additions & 31 deletions

.github/workflows/gpu-h100-conformance-test.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,17 @@ on:
4343
- 'recipes/overlays/h100-kind-conformance.yaml'
4444
- 'kwok/manifests/karpenter/**'
4545
- 'kwok/scripts/install-karpenter-kwok.sh'
46+
# Collector/snapshotter — affects GPU detection and snapshot content
47+
- 'pkg/collector/**'
48+
- 'pkg/snapshotter/**'
49+
# Validator infrastructure — affects how validator Jobs are deployed and run
50+
- 'pkg/validator/job/**'
51+
- 'pkg/validator/catalog/**'
52+
- 'pkg/defaults/timeouts.go'
53+
# Conformance validator source
54+
- 'validators/conformance/**'
55+
pull_request:
56+
types: [labeled]
4657
workflow_dispatch: {} # Allow manual runs
4758

4859
permissions:
@@ -55,6 +66,9 @@ concurrency:
5566
jobs:
5667

5768
gpu-conformance-test:
69+
if: >
70+
github.event_name != 'pull_request' ||
71+
github.event.label.name == 'run-gpu-tests'
5872
name: GPU Conformance Test (nvkind + H100 x2)
5973
runs-on: linux-amd64-gpu-h100-latest-2
6074
timeout-minutes: 60

.github/workflows/gpu-h100-inference-test.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ on:
4141
- 'recipes/overlays/h100-kind-inference-dynamo.yaml'
4242
- 'kwok/manifests/karpenter/**'
4343
- 'kwok/scripts/install-karpenter-kwok.sh'
44+
# Collector/snapshotter — affects GPU detection and snapshot content
45+
- 'pkg/collector/**'
46+
- 'pkg/snapshotter/**'
47+
- '.github/actions/gpu-snapshot-validate/**'
48+
# Validator infrastructure — affects how validator Jobs are deployed and run
49+
- 'pkg/validator/job/**'
50+
- 'pkg/validator/catalog/**'
51+
- 'pkg/defaults/timeouts.go'
52+
# Conformance validator source
53+
- 'validators/conformance/**'
54+
pull_request:
55+
types: [labeled]
4456
workflow_dispatch: {} # Allow manual runs
4557

4658
permissions:
@@ -53,6 +65,9 @@ concurrency:
5365
jobs:
5466

5567
gpu-inference-test:
68+
if: >
69+
github.event_name != 'pull_request' ||
70+
github.event.label.name == 'run-gpu-tests'
5671
name: GPU Inference Test (nvkind + H100)
5772
runs-on: linux-amd64-gpu-h100-latest-1
5873
timeout-minutes: 45

.github/workflows/gpu-h100-training-test.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ on:
3737
- 'kwok/manifests/karpenter/**'
3838
- 'kwok/scripts/install-karpenter-kwok.sh'
3939
- 'recipes/components/prometheus-adapter/**'
40+
# Collector/snapshotter — affects GPU detection and snapshot content
41+
- 'pkg/collector/**'
42+
- 'pkg/snapshotter/**'
43+
- '.github/actions/gpu-snapshot-validate/**'
44+
# Validator infrastructure — affects how validator Jobs are deployed and run
45+
- 'pkg/validator/job/**'
46+
- 'pkg/validator/catalog/**'
47+
- 'pkg/defaults/timeouts.go'
48+
# Conformance validator source
49+
- 'validators/conformance/**'
50+
pull_request:
51+
types: [labeled]
4052
workflow_dispatch: {} # Allow manual runs
4153

4254
permissions:
@@ -49,6 +61,9 @@ concurrency:
4961
jobs:
5062

5163
gpu-training-test:
64+
if: >
65+
github.event_name != 'pull_request' ||
66+
github.event.label.name == 'run-gpu-tests'
5267
name: GPU Training Test (nvkind + H100 x2)
5368
runs-on: linux-amd64-gpu-h100-latest-2
5469
timeout-minutes: 45

.github/workflows/gpu-smoke-test.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,18 @@ on:
2727
- '.github/actions/aicr-build/**'
2828
- '.github/actions/gpu-test-cleanup/**'
2929
- '.github/actions/load-versions/**'
30+
# Collector/snapshotter — affects GPU detection and snapshot content
31+
- 'pkg/collector/**'
32+
- 'pkg/snapshotter/**'
33+
- '.github/actions/gpu-snapshot-validate/**'
34+
# Validator infrastructure — affects how validator Jobs are deployed and run
35+
- 'pkg/validator/job/**'
36+
- 'pkg/validator/catalog/**'
37+
- 'pkg/defaults/timeouts.go'
38+
# Conformance validator source
39+
- 'validators/conformance/**'
40+
pull_request:
41+
types: [labeled]
3042
workflow_dispatch: {} # Allow manual runs
3143

3244
permissions:
@@ -39,6 +51,9 @@ concurrency:
3951
jobs:
4052

4153
gpu-smoke-test:
54+
if: >
55+
github.event_name != 'pull_request' ||
56+
github.event.label.name == 'run-gpu-tests'
4257
name: GPU Smoke Test (nvkind + T4)
4358
runs-on: linux-amd64-gpu-t4-latest-1
4459
timeout-minutes: 30

.github/workflows/triage.yaml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ jobs:
2828
name: Auto-Triage Issues
2929
runs-on: ubuntu-latest
3030
permissions:
31+
contents: read
3132
issues: write
3233
timeout-minutes: 5
3334
steps:
@@ -63,3 +64,49 @@ jobs:
6364
name: 'needs-triage',
6465
});
6566
}
67+
68+
- name: Assign by area label
69+
if: >-
70+
github.event.action == 'labeled' &&
71+
startsWith(github.event.label.name, 'area/')
72+
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
73+
with:
74+
script: |
75+
const { data: file } = await github.rest.repos.getContent({
76+
owner: context.repo.owner,
77+
repo: context.repo.repo,
78+
path: '.settings.yaml',
79+
});
80+
const content = Buffer.from(file.content, 'base64').toString('utf8');
81+
82+
// Parse area_assignees from .settings.yaml
83+
const assignees = {};
84+
let inBlock = false;
85+
for (const line of content.split('\n')) {
86+
if (line.startsWith('area_assignees:')) {
87+
inBlock = true;
88+
continue;
89+
}
90+
if (inBlock) {
91+
const m = line.match(/^\s+(area\/\S+):\s*(\S+)/);
92+
if (m) {
93+
assignees[m[1]] = m[2];
94+
} else if (/^\S/.test(line)) {
95+
break;
96+
}
97+
}
98+
}
99+
100+
const label = context.payload.label.name;
101+
const user = assignees[label];
102+
if (!user) {
103+
core.info(`No assignee configured for ${label}`);
104+
return;
105+
}
106+
107+
await github.rest.issues.addAssignees({
108+
owner: context.repo.owner,
109+
repo: context.repo.repo,
110+
issue_number: context.issue.number,
111+
assignees: [user],
112+
});

.settings.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,19 @@ testing_tools:
5353
yq: 'v4.52.4'
5454
karpenter: 'v1.8.0'
5555

56+
# Area Assignees (GitHub usernames assigned to issues by area label)
57+
area_assignees:
58+
area/api: cullenmcdermott
59+
area/bundler: ArangoGutierrez
60+
area/ci: mchmarny
61+
area/cli: lockwobr
62+
area/collector: ayuskauskas
63+
area/docs: dims
64+
area/infra: mchmarny
65+
area/recipes: yuanchen8911
66+
area/tests: atif1996
67+
area/validator: xdu31
68+
5669
# Quality Thresholds
5770
quality:
5871
coverage_threshold: '70'

pkg/bundler/verifier/trust.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package verifier
1616

1717
import (
1818
"fmt"
19+
"sort"
1920
"strings"
2021

2122
"github.com/NVIDIA/aicr/pkg/constraints"
@@ -72,6 +73,17 @@ func ParseTrustLevel(s string) (TrustLevel, error) {
7273
return level, nil
7374
}
7475

76+
// GetTrustLevels returns all valid trust level names sorted alphabetically.
77+
// This excludes "max" which is a meta-value for auto-detection, not a real level.
78+
func GetTrustLevels() []string {
79+
levels := make([]string, 0, len(trustOrder))
80+
for level := range trustOrder {
81+
levels = append(levels, string(level))
82+
}
83+
sort.Strings(levels)
84+
return levels
85+
}
86+
7587
// VerifyResult contains the outcome of bundle verification.
7688
type VerifyResult struct {
7789
// TrustLevel is the computed trust level for the bundle.

pkg/bundler/verifier/trust_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,17 @@ func TestMaxAchievableTrustLevel(t *testing.T) {
229229
})
230230
}
231231
}
232+
233+
func TestGetTrustLevels(t *testing.T) {
234+
levels := GetTrustLevels()
235+
236+
expected := []string{"attested", "unknown", "unverified", "verified"}
237+
if len(levels) != len(expected) {
238+
t.Fatalf("got %d levels, want %d", len(levels), len(expected))
239+
}
240+
for i, v := range levels {
241+
if v != expected[i] {
242+
t.Errorf("levels[%d] = %q, want %q", i, v, expected[i])
243+
}
244+
}
245+
}

pkg/cli/bundle.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,13 +298,13 @@ Package with explicit tag (overrides CLI version):
298298
Usage: "Estimated number of GPU nodes (written to nodeScheduling.nodeCountPaths in registry). 0 = unset.",
299299
Category: "Scheduling",
300300
},
301-
&cli.StringFlag{
301+
withCompletions(&cli.StringFlag{
302302
Name: "deployer",
303303
Aliases: []string{"d"},
304304
Value: string(config.DeployerHelm),
305305
Usage: fmt.Sprintf("Deployment method (e.g. %s)", strings.Join(config.GetDeployerTypes(), ", ")),
306306
Category: "Deployment",
307-
},
307+
}, config.GetDeployerTypes),
308308
&cli.StringFlag{
309309
Name: "repo",
310310
Value: "",

pkg/cli/bundle_verify.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,13 @@ Output as JSON:
6161
aicr verify ./my-bundle --format json
6262
`,
6363
Flags: []cli.Flag{
64-
&cli.StringFlag{
64+
withCompletions(&cli.StringFlag{
6565
Name: "min-trust-level",
6666
Value: "max",
6767
Usage: `Minimum required trust level. "max" (default) auto-detects the highest
6868
achievable level for this bundle and verifies against it.
6969
Explicit levels: verified, attested, unverified, unknown`,
70-
},
70+
}, verifier.GetTrustLevels),
7171
&cli.StringFlag{
7272
Name: "require-creator",
7373
Usage: "Require a specific creator identity (matched against bundle attestation certificate)",
@@ -83,11 +83,11 @@ Output as JSON:
8383
Usage: `Override the certificate identity pattern for binary attestation verification.
8484
Must contain "NVIDIA/aicr". Default pins to the release workflow on tag refs.`,
8585
},
86-
&cli.StringFlag{
86+
withCompletions(&cli.StringFlag{
8787
Name: "format",
8888
Value: "text",
8989
Usage: "Output format: text, json",
90-
},
90+
}, func() []string { return []string{"json", "text"} }),
9191
},
9292
Action: runBundleVerifyCmd,
9393
}

0 commit comments

Comments
 (0)