Build and Test Docker Image #403
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build and Test Docker Image | |
| on: | |
| workflow_dispatch: | |
| concurrency: | |
| group: build-docker-image-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| packages: write | |
| env: | |
| GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} | |
| VALIDATOR_LABELS: gcp-docker-validator | |
| RUNNER_VERSION: '2.332.0' | |
| IMAGE_NAME: ghcr.io/inclusionai/areal-runtime | |
| IMAGE_TAG: test | |
| jobs: | |
| start-builder: | |
| name: Start areal-docker-builder instance | |
| runs-on: ubuntu-latest | |
| outputs: | |
| was_running: ${{ steps.start-instance.outputs.was_running }} | |
| env: | |
| INSTANCE_NAME: areal-docker-builder | |
| INSTANCE_ZONE: us-central1-f | |
| steps: | |
| - name: Authenticate to Google Cloud | |
| uses: google-github-actions/auth@v3 | |
| with: | |
| credentials_json: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} | |
| - name: Set up Google Cloud SDK | |
| uses: google-github-actions/setup-gcloud@v3 | |
| - name: Start builder instance if stopped | |
| id: start-instance | |
| run: | | |
| set -euo pipefail | |
| # Check instance status | |
| status=$(gcloud compute instances describe "$INSTANCE_NAME" \ | |
| --project "$GCP_PROJECT_ID" \ | |
| --zone "$INSTANCE_ZONE" \ | |
| --format="get(status)" || echo "NOT_FOUND") | |
| if [ "$status" = "NOT_FOUND" ]; then | |
| echo "Error: Instance $INSTANCE_NAME not found in zone $INSTANCE_ZONE" >&2 | |
| exit 1 | |
| fi | |
| if [ "$status" = "RUNNING" ]; then | |
| echo "Instance $INSTANCE_NAME is already running." | |
| echo "was_running=true" >> $GITHUB_OUTPUT | |
| elif [ "$status" = "TERMINATED" ] || [ "$status" = "SUSPENDED" ]; then | |
| echo "Instance $INSTANCE_NAME is $status. Starting it..." | |
| gcloud compute instances start "$INSTANCE_NAME" \ | |
| --project "$GCP_PROJECT_ID" \ | |
| --zone "$INSTANCE_ZONE" | |
| echo "Instance started successfully." | |
| echo "was_running=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "Instance $INSTANCE_NAME has unexpected status: $status" >&2 | |
| exit 1 | |
| fi | |
| - name: Wait for builder runner to be online | |
| uses: actions/github-script@v8 | |
| env: | |
| INSTANCE_NAME: areal-docker-builder | |
| GH_PAT: ${{ secrets.GH_PAT }} | |
| with: | |
| github-token: ${{ secrets.GH_PAT }} | |
| script: | | |
| const instanceName = process.env.INSTANCE_NAME; | |
| const maxAttempts = 120; | |
| const delayMs = 10000; | |
| const pat = process.env.GH_PAT; | |
| if (!pat) { | |
| core.setFailed('GH_PAT secret is not configured.'); | |
| return; | |
| } | |
| const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); | |
| for (let attempt = 1; attempt <= maxAttempts; attempt++) { | |
| const response = await github.rest.actions.listSelfHostedRunnersForRepo({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| per_page: 100, | |
| request: { | |
| headers: { | |
| authorization: `token ${pat}`, | |
| }, | |
| }, | |
| }); | |
| const found = response.data.runners.find((runner) => runner.name === instanceName); | |
| if (found && found.status === 'online') { | |
| core.info(`Builder runner ${instanceName} is online.`); | |
| return; | |
| } | |
| core.info(`Builder runner ${instanceName} not ready yet (attempt ${attempt}/${maxAttempts}).`); | |
| await wait(delayMs); | |
| } | |
| throw new Error(`Timed out waiting for builder runner ${instanceName} to come online.`); | |
| build-and-push-images: | |
| needs: | |
| - start-builder | |
| name: Build and push Docker images | |
| runs-on: [self-hosted, areal-docker-builder] | |
| timeout-minutes: 240 # ~90 min/image x 2 + buffer for cache misses | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v4 | |
| with: | |
| driver: docker | |
| - name: Log in to GitHub Container Registry | |
| uses: docker/login-action@v4 | |
| with: | |
| registry: ghcr.io | |
| username: inclusionai | |
| password: ${{ secrets.GHCR_TOKEN }} | |
| - name: Build and push sglang image | |
| uses: docker/build-push-action@v7 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| push: true | |
| tags: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-sglang | |
| build-args: | | |
| VARIANT=sglang | |
| - name: Build and push vllm image | |
| uses: docker/build-push-action@v7 | |
| with: | |
| context: . | |
| file: ./Dockerfile | |
| push: true | |
| tags: ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-vllm | |
| build-args: | | |
| VARIANT=vllm | |
| - name: Image details | |
| run: | | |
| echo "✅ Docker images built and pushed successfully!" | |
| echo "Images:" | |
| echo " - ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-sglang" | |
| echo " - ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}-vllm" | |
| echo "Commit: ${{ github.sha }}" | |
| echo "Branch: ${{ github.head_ref || github.ref_name }}" | |
| run-tests-sglang: | |
| name: Run tests with sglang image | |
| needs: | |
| - build-and-push-images | |
| uses: ./.github/workflows/test-areal.yml | |
| with: | |
| image_tag: test | |
| variant: sglang | |
| secrets: inherit | |
| run-tests-vllm: | |
| name: Run tests with vllm image | |
| needs: | |
| - build-and-push-images | |
| uses: ./.github/workflows/test-areal.yml | |
| with: | |
| image_tag: test | |
| variant: vllm | |
| secrets: inherit | |
| promote-images: | |
| name: Promote ${{ matrix.variant }} test image to dev | |
| needs: | |
| - run-tests-sglang | |
| - run-tests-vllm | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| variant: [sglang, vllm] | |
| steps: | |
| - name: Log in to GitHub Container Registry | |
| uses: docker/login-action@v4 | |
| with: | |
| registry: ghcr.io | |
| username: inclusionai | |
| password: ${{ secrets.GHCR_TOKEN }} | |
| - name: Pull test image and push as dev | |
| env: | |
| IMAGE_NAME: ghcr.io/inclusionai/areal-runtime | |
| run: | | |
| docker pull $IMAGE_NAME:test-${{ matrix.variant }} | |
| docker tag $IMAGE_NAME:test-${{ matrix.variant }} $IMAGE_NAME:dev-${{ matrix.variant }} | |
| docker push $IMAGE_NAME:dev-${{ matrix.variant }} | |
| echo "✅ ${{ matrix.variant }} image promoted from :test-${{ matrix.variant }} to :dev-${{ matrix.variant }}" | |
| bake-gcp-image: | |
| name: Bake GCP CI image with promoted Docker images | |
| needs: | |
| - promote-images | |
| uses: ./.github/workflows/bake-gcp-image.yml | |
| with: | |
| image_tag: dev | |
| secrets: inherit | |
| cleanup-test-images: | |
| name: Delete ${{ matrix.variant }} test image from registry | |
| needs: | |
| - build-and-push-images | |
| - run-tests-sglang | |
| - run-tests-vllm | |
| - promote-images | |
| if: always() && needs.build-and-push-images.result == 'success' | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| variant: [sglang, vllm] | |
| steps: | |
| - name: Delete test image from GHCR | |
| env: | |
| GH_TOKEN: ${{ secrets.GHCR_TOKEN }} | |
| run: | | |
| PACKAGE_NAME="areal-runtime" | |
| TAG="test-${{ matrix.variant }}" | |
| # Get the package version ID for the test tag | |
| PACKAGE_VERSION_ID=$(curl -s -H "Authorization: Bearer $GH_TOKEN" \ | |
| "https://api.github.com/orgs/inclusionai/packages/container/$PACKAGE_NAME/versions?per_page=100" \ | |
| | jq -r ".[] | select(.metadata.container.tags[] == \"$TAG\") | .id") | |
| if [ -n "$PACKAGE_VERSION_ID" ] && [ "$PACKAGE_VERSION_ID" != "null" ]; then | |
| curl -X DELETE -H "Authorization: Bearer $GH_TOKEN" \ | |
| "https://api.github.com/orgs/inclusionai/packages/container/$PACKAGE_NAME/versions/$PACKAGE_VERSION_ID" | |
| echo "✅ Deleted ${{ matrix.variant }} test image from registry" | |
| else | |
| echo "⚠️ ${{ matrix.variant }} test image not found or already deleted" | |
| fi | |
| stop-builder: | |
| name: Stop areal-docker-builder instance | |
| needs: | |
| - start-builder | |
| - build-and-push-images | |
| - run-tests-sglang | |
| - run-tests-vllm | |
| - promote-images | |
| - bake-gcp-image | |
| - cleanup-test-images | |
| if: always() && needs.start-builder.outputs.was_running != 'true' | |
| runs-on: ubuntu-latest | |
| env: | |
| INSTANCE_NAME: areal-docker-builder | |
| INSTANCE_ZONE: us-central1-f | |
| steps: | |
| - name: Authenticate to Google Cloud | |
| uses: google-github-actions/auth@v3 | |
| with: | |
| credentials_json: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }} | |
| - name: Set up Google Cloud SDK | |
| uses: google-github-actions/setup-gcloud@v3 | |
| - name: Stop builder instance | |
| run: | | |
| # Check if instance is running | |
| status=$(gcloud compute instances describe "$INSTANCE_NAME" \ | |
| --project "$GCP_PROJECT_ID" \ | |
| --zone "$INSTANCE_ZONE" \ | |
| --format="get(status)" 2>/dev/null || echo "NOT_FOUND") | |
| if [ "$status" = "NOT_FOUND" ]; then | |
| echo "Warning: Instance $INSTANCE_NAME not found in zone $INSTANCE_ZONE" | |
| exit 0 | |
| fi | |
| if [ "$status" = "RUNNING" ]; then | |
| echo "Stopping instance $INSTANCE_NAME..." | |
| gcloud compute instances stop "$INSTANCE_NAME" \ | |
| --project "$GCP_PROJECT_ID" \ | |
| --zone "$INSTANCE_ZONE" | |
| echo "Instance stopped successfully." | |
| else | |
| echo "Instance $INSTANCE_NAME is already in status: $status" | |
| fi |