sgl-project/sglang

65 workflows · maturity 67% · 14 patterns · GitHub ↗

Security 7.69/100

Practices

✓ Matrix✓ Permissions○ Security scan○ AI review✓ Cache✓ Concurrency✓ Reusable workflows

Detected patterns

ai-code-review chaos-engineering cross-version-compat ecosystem-ci flaky-test-retry fuzz-randomized-testing hardware-matrix least-privilege-permissions multi-channel-release multi-stage-release multiple-build-systems per-sample-ci performance-tracking reusable-workflows

Security dimensions

permissions

7.7

security scan

supply chain

secret handling

harden runner

Workflows (65)

amd-aiter-scout .github/workflows/amd-aiter-scout.yml

Triggers

schedule, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest

Jobs

resolve-aiter, call-nightly-amd, call-nightly-amd-rocm720, call-pr-test-amd, call-pr-test-amd-rocm720, check-all-jobs

Commands

REF="${{ inputs.aiter_ref || 'main' }}" echo "Resolving AITER ref: ${REF}" SHA=$(git ls-remote https://github.com/ROCm/aiter.git "refs/heads/${REF}" | head -1 | cut -f1) if [ -z "$SHA" ]; then SHA=$(git ls-remote https://github.com/ROCm/aiter.git "refs/tags/${REF}" | head -1 | cut -f1) fi if [ -z "$SHA" ]; then SHA=$(git ls-remote https://github.com/ROCm/aiter.git "${REF}" | head -1 | cut -f1) fi if [ -z "$SHA" ]; then SHA="${REF}" fi echo "sha=${SHA}" >> $GITHUB_OUTPUT echo "### AITER Ref Resolution" >> $GITHUB_STEP_SUMMARY echo "- **Requested ref:** \`${REF}\`" >> $GITHUB_STEP_SUMMARY echo "- **Resolved SHA:** \`${SHA}\`" >> $GITHUB_STEP_SUMMARY echo "- **AITER commit:** https://github.com/ROCm/aiter/commit/${SHA}" >> $GITHUB_STEP_SUMMARY
FILTER="${{ inputs.job_filter || 'all' }}" echo "Job filter: ${FILTER}" if [[ "$FILTER" == "all" ]]; then echo "run_nightly_amd=true" >> $GITHUB_OUTPUT echo "run_nightly_amd_rocm720=true" >> $GITHUB_OUTPUT echo "run_pr_test_amd=true" >> $GITHUB_OUTPUT echo "run_pr_test_amd_rocm720=true" >> $GITHUB_OUTPUT else # Wrap with commas for exact substring matching (avoids "nightly-amd" matching "nightly-amd-rocm720") PADDED=",${FILTER// /}," echo "run_nightly_amd=$(echo "$PADDED" | grep -q ',nightly-amd,' && echo true || echo false)" >> $GITHUB_OUTPUT echo "run_nightly_amd_rocm720=$(echo "$PADDED" | grep -q ',nightly-amd-rocm720,' && echo true || echo false)" >> $GITHUB_OUTPUT echo "run_pr_test_amd=$(echo "$PADDED" | grep -q ',pr-test-amd,' && echo true || echo false)" >> $GITHUB_OUTPUT echo "run_pr_test_amd_rocm720=$(echo "$PADDED" | grep -q ',pr-test-amd-rocm720,' && echo true || echo false)" >> $GITHUB_OUTPUT fi echo "### Job Filter" >> $GITHUB_STEP_SUMMARY echo "- **Filter:** \`${FILTER}\`" >> $GITHUB_STEP_SUMMARY
echo "## AMD AITER Scout Results" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "- **AITER SHA:** \`${{ needs.resolve-aiter.outputs.aiter_sha }}\`" >> $GITHUB_STEP_SUMMARY echo "- **AITER commit:** https://github.com/ROCm/aiter/commit/${{ needs.resolve-aiter.outputs.aiter_sha }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Workflow | Result |" >> $GITHUB_STEP_SUMMARY echo "|----------|--------|" >> $GITHUB_STEP_SUMMARY echo "| Nightly AMD (AITER Latest) | \`${{ needs.call-nightly-amd.result }}\` |" >> $GITHUB_STEP_SUMMARY echo "| Nightly AMD ROCm 7.2 | \`${{ needs.call-nightly-amd-rocm720.result }}\` |" >> $GITHUB_STEP_SUMMARY echo "| PR Test AMD (AITER Latest) | \`${{ needs.call-pr-test-amd.result }}\` |" >> $GITHUB_STEP_SUMMARY echo "| PR Test AMD ROCm 7.2 | \`${{ needs.call-pr-test-amd-rocm720.result }}\` |" >> $GITHUB_STEP_SUMMARY
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then echo "One or more workflows failed" exit 1 fi if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then echo "One or more workflows were cancelled" exit 1 fi echo "All workflows passed"

View raw YAML

name: AMD AITER Scout

on:
  schedule:
    - cron: '0 20 * * 1'   # Monday 20:00 UTC
    - cron: '0 20 * * 4'   # Thursday 20:00 UTC
  workflow_dispatch:
    inputs:
      aiter_ref:
        description: 'AITER git ref (branch, tag, or SHA). Default: main (latest commit)'
        required: false
        type: string
        default: 'main'
      job_filter:
        description: 'Comma-separated workflows to run: nightly-amd, nightly-amd-rocm720, pr-test-amd, pr-test-amd-rocm720. Default: all'
        required: false
        type: string
        default: 'all'
      continue_on_error:
        description: 'Continue running other workflows even if one fails'
        required: false
        type: boolean
        default: true

concurrency:
  group: amd-aiter-scout-${{ github.run_id }}
  cancel-in-progress: true

jobs:
  resolve-aiter:
    runs-on: ubuntu-latest
    outputs:
      aiter_sha: ${{ steps.resolve.outputs.sha }}
      run_nightly_amd: ${{ steps.parse.outputs.run_nightly_amd }}
      run_nightly_amd_rocm720: ${{ steps.parse.outputs.run_nightly_amd_rocm720 }}
      run_pr_test_amd: ${{ steps.parse.outputs.run_pr_test_amd }}
      run_pr_test_amd_rocm720: ${{ steps.parse.outputs.run_pr_test_amd_rocm720 }}
    steps:
      - name: Resolve AITER commit
        id: resolve
        run: |
          REF="${{ inputs.aiter_ref || 'main' }}"
          echo "Resolving AITER ref: ${REF}"

          SHA=$(git ls-remote https://github.com/ROCm/aiter.git "refs/heads/${REF}" | head -1 | cut -f1)
          if [ -z "$SHA" ]; then
            SHA=$(git ls-remote https://github.com/ROCm/aiter.git "refs/tags/${REF}" | head -1 | cut -f1)
          fi
          if [ -z "$SHA" ]; then
            SHA=$(git ls-remote https://github.com/ROCm/aiter.git "${REF}" | head -1 | cut -f1)
          fi
          if [ -z "$SHA" ]; then
            SHA="${REF}"
          fi

          echo "sha=${SHA}" >> $GITHUB_OUTPUT
          echo "### AITER Ref Resolution" >> $GITHUB_STEP_SUMMARY
          echo "- **Requested ref:** \`${REF}\`" >> $GITHUB_STEP_SUMMARY
          echo "- **Resolved SHA:** \`${SHA}\`" >> $GITHUB_STEP_SUMMARY
          echo "- **AITER commit:** https://github.com/ROCm/aiter/commit/${SHA}" >> $GITHUB_STEP_SUMMARY

      - name: Parse job filter
        id: parse
        run: |
          FILTER="${{ inputs.job_filter || 'all' }}"
          echo "Job filter: ${FILTER}"

          if [[ "$FILTER" == "all" ]]; then
            echo "run_nightly_amd=true" >> $GITHUB_OUTPUT
            echo "run_nightly_amd_rocm720=true" >> $GITHUB_OUTPUT
            echo "run_pr_test_amd=true" >> $GITHUB_OUTPUT
            echo "run_pr_test_amd_rocm720=true" >> $GITHUB_OUTPUT
          else
            # Wrap with commas for exact substring matching (avoids "nightly-amd" matching "nightly-amd-rocm720")
            PADDED=",${FILTER// /},"
            echo "run_nightly_amd=$(echo "$PADDED" | grep -q ',nightly-amd,' && echo true || echo false)" >> $GITHUB_OUTPUT
            echo "run_nightly_amd_rocm720=$(echo "$PADDED" | grep -q ',nightly-amd-rocm720,' && echo true || echo false)" >> $GITHUB_OUTPUT
            echo "run_pr_test_amd=$(echo "$PADDED" | grep -q ',pr-test-amd,' && echo true || echo false)" >> $GITHUB_OUTPUT
            echo "run_pr_test_amd_rocm720=$(echo "$PADDED" | grep -q ',pr-test-amd-rocm720,' && echo true || echo false)" >> $GITHUB_OUTPUT
          fi

          echo "### Job Filter" >> $GITHUB_STEP_SUMMARY
          echo "- **Filter:** \`${FILTER}\`" >> $GITHUB_STEP_SUMMARY

  call-nightly-amd:
    if: needs.resolve-aiter.outputs.run_nightly_amd == 'true'
    needs: resolve-aiter
    uses: ./.github/workflows/nightly-test-amd.yml
    secrets: inherit
    with:
      ref: ${{ github.sha }}
      aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
      job_filter: 'all'
      continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}

  call-nightly-amd-rocm720:
    if: needs.resolve-aiter.outputs.run_nightly_amd_rocm720 == 'true'
    needs: resolve-aiter
    uses: ./.github/workflows/nightly-test-amd-rocm720.yml
    secrets: inherit
    with:
      ref: ${{ github.sha }}
      aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
      job_filter: 'all'
      continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}

  call-pr-test-amd:
    if: needs.resolve-aiter.outputs.run_pr_test_amd == 'true'
    needs: resolve-aiter
    uses: ./.github/workflows/pr-test-amd.yml
    secrets: inherit
    with:
      run_all_tests: true
      aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
      continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}

  call-pr-test-amd-rocm720:
    if: needs.resolve-aiter.outputs.run_pr_test_amd_rocm720 == 'true'
    needs: resolve-aiter
    uses: ./.github/workflows/pr-test-amd-rocm720.yml
    secrets: inherit
    with:
      run_all_tests: true
      aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
      continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}

  check-all-jobs:
    if: always()
    needs:
      - resolve-aiter
      - call-nightly-amd
      - call-nightly-amd-rocm720
      - call-pr-test-amd
      - call-pr-test-amd-rocm720
    runs-on: ubuntu-latest
    steps:
      - name: Summary
        run: |
          echo "## AMD AITER Scout Results" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "- **AITER SHA:** \`${{ needs.resolve-aiter.outputs.aiter_sha }}\`" >> $GITHUB_STEP_SUMMARY
          echo "- **AITER commit:** https://github.com/ROCm/aiter/commit/${{ needs.resolve-aiter.outputs.aiter_sha }}" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "| Workflow | Result |" >> $GITHUB_STEP_SUMMARY
          echo "|----------|--------|" >> $GITHUB_STEP_SUMMARY
          echo "| Nightly AMD (AITER Latest) | \`${{ needs.call-nightly-amd.result }}\` |" >> $GITHUB_STEP_SUMMARY
          echo "| Nightly AMD ROCm 7.2 | \`${{ needs.call-nightly-amd-rocm720.result }}\` |" >> $GITHUB_STEP_SUMMARY
          echo "| PR Test AMD (AITER Latest) | \`${{ needs.call-pr-test-amd.result }}\` |" >> $GITHUB_STEP_SUMMARY
          echo "| PR Test AMD ROCm 7.2 | \`${{ needs.call-pr-test-amd-rocm720.result }}\` |" >> $GITHUB_STEP_SUMMARY

      - name: Check if any job failed
        run: |
          if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
            echo "One or more workflows failed"
            exit 1
          fi
          if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
            echo "One or more workflows were cancelled"
            exit 1
          fi
          echo "All workflows passed"

amd-ci-job-monitor matrix .github/workflows/amd-ci-job-monitor.yml

Triggers

schedule, pull_request, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest, ubuntu-latest, ubuntu-latest, ubuntu-latest, ubuntu-latest, ubuntu-latest, ubuntu-latest

Jobs

fetch-actions-data, custom-report, parse-workflows, pr-ci-reports, nightly-reports, pr-rocm720-ci-reports, nightly-rocm720-reports, runner-fleet-report

Matrix

job_name→ ${{ fromJson(needs.parse-workflows.outputs.nightly_jobs) }}, ${{ fromJson(needs.parse-workflows.outputs.nightly_rocm720_jobs) }}, ${{ fromJson(needs.parse-workflows.outputs.pr_jobs) }}, ${{ fromJson(needs.parse-workflows.outputs.pr_rocm720_jobs) }}

Commands

pip install tabulate
if [[ -n "${{ inputs.job_filter }}" ]]; then echo "workflows=pr-test-amd.yml" >> "$GITHUB_OUTPUT" else echo "workflows=pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" >> "$GITHUB_OUTPUT" fi
python scripts/ci/utils/query_job_status.py \ --repo ${{ github.repository }} \ --workflow "${{ steps.select-workflows.outputs.workflows }}" \ --hours ${{ inputs.hours || '24' }} \ --dump-data-file actions-job-snapshot.json
pip install tabulate
python scripts/ci/utils/query_job_status.py \ --repo ${{ github.repository }} \ --job "${{ inputs.job_filter }}" \ --workflow "pr-test-amd.yml" \ --hours ${{ inputs.hours || '24' }} \ --input-data-file ci-data/actions-job-snapshot.json \ --summary
# Parse pr-test-amd.yml and extract job names (exclude utility jobs) # Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs pr_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd.yml | \ grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \ jq -R -s -c 'split("\n") | map(select(length > 0))') echo "pr_jobs=$pr_jobs" >> $GITHUB_OUTPUT echo "PR jobs: $pr_jobs" # Parse nightly-test-amd.yml and extract job names (exclude utility jobs) # Excluded: check-all-jobs nightly_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd.yml | \ grep -v -E '^(check-all-jobs)$' | \ jq -R -s -c 'split("\n") | map(select(length > 0))') echo "nightly_jobs=$nightly_jobs" >> $GITHUB_OUTPUT echo "Nightly jobs: $nightly_jobs" # Parse pr-test-amd-rocm720.yml (exclude utility jobs) # Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs pr_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd-rocm720.yml | \ grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \ jq -R -s -c 'split("\n") | map(select(length > 0))') echo "pr_rocm720_jobs=$pr_rocm720_jobs" >> $GITHUB_OUTPUT echo "PR ROCm 7.2 jobs: $pr_rocm720_jobs" # Parse nightly-test-amd-rocm720.yml (exclude utility jobs) # Excluded: check-all-jobs nightly_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd-rocm720.yml | \ grep -v -E '^(check-all-jobs)$' | \ jq -R -s -c 'split("\n") | map(select(length > 0))') echo "nightly_rocm720_jobs=$nightly_rocm720_jobs" >> $GITHUB_OUTPUT echo "Nightly ROCm 7.2 jobs: $nightly_rocm720_jobs"
pip install tabulate
python scripts/ci/utils/query_job_status.py \ --repo ${{ github.repository }} \ --job "${{ matrix.job_name }}" \ --workflow "pr-test-amd.yml" \ --hours ${{ inputs.hours || '24' }} \ --input-data-file ci-data/actions-job-snapshot.json \ --summary

View raw YAML

name: AMD CI Job Monitor

on:
  schedule:
    - cron: '0 0 * * *'  # Daily at midnight UTC
  pull_request:
    paths:
      - '.github/workflows/amd-ci-job-monitor.yml'
      - 'scripts/ci/utils/query_job_status.py'
  workflow_dispatch:
    inputs:
      hours:
        description: 'Time window in hours'
        required: false
        default: '24'
        type: string
      job_filter:
        description: 'Job name filter (leave empty for all AMD jobs)'
        required: false
        type: string

jobs:
  fetch-actions-data:
    name: Fetch Actions Snapshot
    runs-on: ubuntu-latest
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install tabulate

      - name: Select workflows for snapshot
        id: select-workflows
        run: |
          if [[ -n "${{ inputs.job_filter }}" ]]; then
            echo "workflows=pr-test-amd.yml" >> "$GITHUB_OUTPUT"
          else
            echo "workflows=pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" >> "$GITHUB_OUTPUT"
          fi

      - name: Fetch Actions data snapshot
        timeout-minutes: 30
        run: |
          python scripts/ci/utils/query_job_status.py \
            --repo ${{ github.repository }} \
            --workflow "${{ steps.select-workflows.outputs.workflows }}" \
            --hours ${{ inputs.hours || '24' }} \
            --dump-data-file actions-job-snapshot.json

      - name: Upload Actions data snapshot
        uses: actions/upload-artifact@v4
        with:
          name: actions-job-snapshot
          path: actions-job-snapshot.json
          if-no-files-found: error

  # Single job filter mode
  custom-report:
    name: Custom Job Report
    if: ${{ inputs.job_filter }}
    needs: fetch-actions-data
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install tabulate

      - name: Download Actions data snapshot
        uses: actions/download-artifact@v4
        with:
          name: actions-job-snapshot
          path: ci-data

      - name: Generate Custom Job Report
        timeout-minutes: 30
        run: |
          python scripts/ci/utils/query_job_status.py \
            --repo ${{ github.repository }} \
            --job "${{ inputs.job_filter }}" \
            --workflow "pr-test-amd.yml" \
            --hours ${{ inputs.hours || '24' }} \
            --input-data-file ci-data/actions-job-snapshot.json \
            --summary

  # Parse workflow files to get job names dynamically
  parse-workflows:
    name: Parse Workflow Jobs
    if: ${{ !inputs.job_filter }}
    runs-on: ubuntu-latest
    outputs:
      pr_jobs: ${{ steps.parse.outputs.pr_jobs }}
      nightly_jobs: ${{ steps.parse.outputs.nightly_jobs }}
      pr_rocm720_jobs: ${{ steps.parse.outputs.pr_rocm720_jobs }}
      nightly_rocm720_jobs: ${{ steps.parse.outputs.nightly_rocm720_jobs }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Parse workflow files
        id: parse
        run: |
          # Parse pr-test-amd.yml and extract job names (exclude utility jobs)
          # Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs
          pr_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd.yml | \
            grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \
            jq -R -s -c 'split("\n") | map(select(length > 0))')
          echo "pr_jobs=$pr_jobs" >> $GITHUB_OUTPUT
          echo "PR jobs: $pr_jobs"

          # Parse nightly-test-amd.yml and extract job names (exclude utility jobs)
          # Excluded: check-all-jobs
          nightly_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd.yml | \
            grep -v -E '^(check-all-jobs)$' | \
            jq -R -s -c 'split("\n") | map(select(length > 0))')
          echo "nightly_jobs=$nightly_jobs" >> $GITHUB_OUTPUT
          echo "Nightly jobs: $nightly_jobs"

          # Parse pr-test-amd-rocm720.yml (exclude utility jobs)
          # Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs
          pr_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd-rocm720.yml | \
            grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \
            jq -R -s -c 'split("\n") | map(select(length > 0))')
          echo "pr_rocm720_jobs=$pr_rocm720_jobs" >> $GITHUB_OUTPUT
          echo "PR ROCm 7.2 jobs: $pr_rocm720_jobs"

          # Parse nightly-test-amd-rocm720.yml (exclude utility jobs)
          # Excluded: check-all-jobs
          nightly_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd-rocm720.yml | \
            grep -v -E '^(check-all-jobs)$' | \
            jq -R -s -c 'split("\n") | map(select(length > 0))')
          echo "nightly_rocm720_jobs=$nightly_rocm720_jobs" >> $GITHUB_OUTPUT
          echo "Nightly ROCm 7.2 jobs: $nightly_rocm720_jobs"

  # PR CI reports using dynamic matrix
  pr-ci-reports:
    name: PR - ${{ matrix.job_name }}
    needs: [parse-workflows, fetch-actions-data]
    if: ${{ !inputs.job_filter }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_jobs) }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install tabulate

      - name: Download Actions data snapshot
        uses: actions/download-artifact@v4
        with:
          name: actions-job-snapshot
          path: ci-data

      - name: Generate Report
        timeout-minutes: 15
        run: |
          python scripts/ci/utils/query_job_status.py \
            --repo ${{ github.repository }} \
            --job "${{ matrix.job_name }}" \
            --workflow "pr-test-amd.yml" \
            --hours ${{ inputs.hours || '24' }} \
            --input-data-file ci-data/actions-job-snapshot.json \
            --summary

  # Nightly AMD test reports using dynamic matrix
  nightly-reports:
    name: Nightly - ${{ matrix.job_name }}
    needs: [parse-workflows, fetch-actions-data]
    if: ${{ !inputs.job_filter }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_jobs) }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install tabulate

      - name: Download Actions data snapshot
        uses: actions/download-artifact@v4
        with:
          name: actions-job-snapshot
          path: ci-data

      - name: Generate Nightly Report
        timeout-minutes: 15
        run: |
          python scripts/ci/utils/query_job_status.py \
            --repo ${{ github.repository }} \
            --job "${{ matrix.job_name }}" \
            --workflow "nightly-test-amd.yml" \
            --hours ${{ inputs.hours || '24' }} \
            --input-data-file ci-data/actions-job-snapshot.json \
            --summary

  # PR ROCm 7.2 CI reports using dynamic matrix
  pr-rocm720-ci-reports:
    name: PR ROCm720 - ${{ matrix.job_name }}
    needs: [parse-workflows, fetch-actions-data]
    if: ${{ !inputs.job_filter }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_rocm720_jobs) }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install tabulate

      - name: Download Actions data snapshot
        uses: actions/download-artifact@v4
        with:
          name: actions-job-snapshot
          path: ci-data

      - name: Generate PR ROCm 7.2 Report
        timeout-minutes: 15
        run: |
          python scripts/ci/utils/query_job_status.py \
            --repo ${{ github.repository }} \
            --job "${{ matrix.job_name }}" \
            --workflow "pr-test-amd-rocm720.yml" \
            --hours ${{ inputs.hours || '24' }} \
            --input-data-file ci-data/actions-job-snapshot.json \
            --summary

  # Nightly ROCm 7.2 reports using dynamic matrix
  nightly-rocm720-reports:
    name: Nightly ROCm720 - ${{ matrix.job_name }}
    needs: [parse-workflows, fetch-actions-data]
    if: ${{ !inputs.job_filter }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_rocm720_jobs) }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install tabulate

      - name: Download Actions data snapshot
        uses: actions/download-artifact@v4
        with:
          name: actions-job-snapshot
          path: ci-data

      - name: Generate Nightly ROCm 7.2 Report
        timeout-minutes: 15
        run: |
          python scripts/ci/utils/query_job_status.py \
            --repo ${{ github.repository }} \
            --job "${{ matrix.job_name }}" \
            --workflow "nightly-test-amd-rocm720.yml" \
            --hours ${{ inputs.hours || '24' }} \
            --input-data-file ci-data/actions-job-snapshot.json \
            --summary

  # Runner fleet report - cross-workflow runner analytics in a single pass
  runner-fleet-report:
    name: Runner Fleet Report
    if: ${{ !inputs.job_filter }}
    needs: fetch-actions-data
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install tabulate

      - name: Download Actions data snapshot
        uses: actions/download-artifact@v4
        with:
          name: actions-job-snapshot
          path: ci-data

      - name: Generate Runner Fleet Report
        timeout-minutes: 30
        run: |
          python scripts/ci/utils/query_job_status.py \
            --repo ${{ github.repository }} \
            --runner-report \
            --workflow "pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" \
            --hours ${{ inputs.hours || '24' }} \
            --input-data-file ci-data/actions-job-snapshot.json \
            --summary

auto-tune .github/workflows/auto-tune.yml

Triggers: workflow_dispatch
Runs on: ubuntu-latest
Jobs: lint

View raw YAML

name: Auto tune

on:
  workflow_dispatch:

jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

bot-bump-flashinfer-version perms .github/workflows/bot-bump-flashinfer-version.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

bump-flashinfer-version

Commands

pip install tomli
git config user.name "sglang-bot" git config user.email "sglang-bot@users.noreply.github.com" RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4) BRANCH_NAME="bot/bump-flashinfer-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}" git checkout -b "$BRANCH_NAME" echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
python scripts/release/bump_flashinfer_version.py "${{ github.event.inputs.new_version }}"
bash scripts/release/commit_and_pr.sh "flashinfer" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"

View raw YAML

name: Bot Bump Flashinfer Version

on:
  workflow_dispatch:
    inputs:
      new_version:
        description: 'New flashinfer version (e.g., 0.6.4)'
        required: true
        type: string

permissions:
  contents: write
  pull-requests: write

jobs:
  bump-flashinfer-version:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install Python dependencies
        run: |
            pip install tomli

      - name: Configure Git and branch
        run: |
          git config user.name "sglang-bot"
          git config user.email "sglang-bot@users.noreply.github.com"
          RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
          BRANCH_NAME="bot/bump-flashinfer-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}"
          git checkout -b "$BRANCH_NAME"
          echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV

      - name: Run flashinfer version bump script
        run: |
          python scripts/release/bump_flashinfer_version.py "${{ github.event.inputs.new_version }}"

      - name: Commit and create PR
        env:
          GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
        run: |
          bash scripts/release/commit_and_pr.sh "flashinfer" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"

bot-bump-kernel-version perms .github/workflows/bot-bump-kernel-version.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

bump-kernel-version

Commands

pip install tomli
git config user.name "sglang-bot" git config user.email "sglang-bot@users.noreply.github.com" RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4) BRANCH_NAME="bot/bump-kernel-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}" git checkout -b "$BRANCH_NAME" echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
python scripts/release/bump_kernel_version.py "${{ github.event.inputs.new_version }}"
bash scripts/release/commit_and_pr.sh "sgl-kernel" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"

View raw YAML

name: Bot Bump Kernel Version

on:
  workflow_dispatch:
    inputs:
      new_version:
        description: 'New sgl-kernel version (e.g., 0.3.12)'
        required: true
        type: string

permissions:
  contents: write
  pull-requests: write

jobs:
  bump-kernel-version:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install Python dependencies
        run: |
          pip install tomli

      - name: Configure Git and branch
        run: |
          git config user.name "sglang-bot"
          git config user.email "sglang-bot@users.noreply.github.com"
          RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
          BRANCH_NAME="bot/bump-kernel-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}"
          git checkout -b "$BRANCH_NAME"
          echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV

      - name: Run kernel version bump script
        run: |
          python scripts/release/bump_kernel_version.py "${{ github.event.inputs.new_version }}"

      - name: Commit and create PR
        env:
          GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
        run: |
          bash scripts/release/commit_and_pr.sh "sgl-kernel" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"

bot-bump-kernel-version-to-sglang perms .github/workflows/bot-bump-kernel-version-to-sglang.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

bump-kernel-version-to-sglang, run-nightly-tests-nvidia, run-nightly-tests-amd, run-nightly-tests-npu, run-pr-tests-xeon, run-pr-tests-xpu

Commands

pip install tomli
python scripts/release/check_kernel_version_to_sglang.py
git config user.name "sglang-bot" git config user.email "sglang-bot@users.noreply.github.com" RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4) KERNEL_VERSION="${{ steps.check_sync.outputs.kernel_version }}" BRANCH_NAME="bot/bump-kernel-version-to-sglang-${KERNEL_VERSION}-${RANDOM_SUFFIX}" git checkout -b "$BRANCH_NAME" echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
python scripts/release/bump_kernel_version_to_sglang.py
bash scripts/release/commit_and_pr_kernel_to_sglang.sh "$KERNEL_VERSION" "$BRANCH_NAME"

View raw YAML

name: Bot Bump Kernel Version to SGLang

on:
  workflow_dispatch:

permissions:
  contents: write
  pull-requests: write

jobs:
  bump-kernel-version-to-sglang:
    runs-on: ubuntu-latest
    outputs:
      branch_name: ${{ steps.set_output.outputs.branch_name }}
      needs_sync: ${{ steps.check_sync.outputs.needs_sync }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install Python dependencies
        run: |
          pip install tomli

      - name: Check if sync is needed
        id: check_sync
        run: |
          python scripts/release/check_kernel_version_to_sglang.py

      - name: Configure Git and branch
        if: steps.check_sync.outputs.needs_sync == 'true'
        id: set_output
        run: |
          git config user.name "sglang-bot"
          git config user.email "sglang-bot@users.noreply.github.com"
          RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
          KERNEL_VERSION="${{ steps.check_sync.outputs.kernel_version }}"
          BRANCH_NAME="bot/bump-kernel-version-to-sglang-${KERNEL_VERSION}-${RANDOM_SUFFIX}"
          git checkout -b "$BRANCH_NAME"
          echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
          echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV
          echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT

      - name: Run kernel version bump script
        if: steps.check_sync.outputs.needs_sync == 'true'
        run: |
          python scripts/release/bump_kernel_version_to_sglang.py

      - name: Commit and create PR
        if: steps.check_sync.outputs.needs_sync == 'true'
        env:
          GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
        run: |
          bash scripts/release/commit_and_pr_kernel_to_sglang.sh "$KERNEL_VERSION" "$BRANCH_NAME"

  run-nightly-tests-nvidia:
    needs: bump-kernel-version-to-sglang
    if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
    uses: ./.github/workflows/nightly-test-nvidia.yml
    with:
      ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
    secrets: inherit

  run-nightly-tests-amd:
    needs: bump-kernel-version-to-sglang
    if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
    uses: ./.github/workflows/nightly-test-amd.yml
    with:
      ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
    secrets: inherit

  run-nightly-tests-npu:
    needs: bump-kernel-version-to-sglang
    if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
    uses: ./.github/workflows/nightly-test-npu.yml
    with:
      ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
    secrets: inherit

  run-pr-tests-xeon:
    needs: bump-kernel-version-to-sglang
    if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
    uses: ./.github/workflows/pr-test-xeon.yml
    with:
      ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
    secrets: inherit

  run-pr-tests-xpu:
    needs: bump-kernel-version-to-sglang
    if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
    uses: ./.github/workflows/pr-test-xpu.yml
    with:
      ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
    secrets: inherit

bot-bump-sglang-version perms .github/workflows/bot-bump-sglang-version.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

bump-sglang-version, run-nightly-tests-nvidia, run-nightly-tests-amd, run-nightly-tests-npu, run-pr-tests-xeon, run-pr-tests-xpu

Commands

pip install tomli
git config user.name "sglang-bot" git config user.email "sglang-bot@users.noreply.github.com" RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4) BRANCH_NAME="bot/bump-sglang-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}" git checkout -b "$BRANCH_NAME" echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
python scripts/release/bump_sglang_version.py "${{ github.event.inputs.new_version }}"
bash scripts/release/commit_and_pr.sh "SGLang" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"

View raw YAML

name: Bot Bump SGLang Version

on:
  workflow_dispatch:
    inputs:
      new_version:
        description: 'New SGLang version (e.g., 0.5.3 or 0.5.3rc0)'
        required: true
        type: string

permissions:
  contents: write
  pull-requests: write

jobs:
  bump-sglang-version:
    runs-on: ubuntu-latest
    outputs:
      branch_name: ${{ steps.set_output.outputs.branch_name }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install Python dependencies
        run: |
          pip install tomli

      - name: Configure Git and branch
        id: set_output
        run: |
          git config user.name "sglang-bot"
          git config user.email "sglang-bot@users.noreply.github.com"
          RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
          BRANCH_NAME="bot/bump-sglang-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}"
          git checkout -b "$BRANCH_NAME"
          echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
          echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT

      - name: Run SGLang version bump script
        run: |
          python scripts/release/bump_sglang_version.py "${{ github.event.inputs.new_version }}"

      - name: Commit and create PR
        env:
          GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
        run: |
          bash scripts/release/commit_and_pr.sh "SGLang" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"

  run-nightly-tests-nvidia:
    needs: bump-sglang-version
    uses: ./.github/workflows/nightly-test-nvidia.yml
    with:
      ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
    secrets: inherit

  run-nightly-tests-amd:
    needs: bump-sglang-version
    uses: ./.github/workflows/nightly-test-amd.yml
    with:
      ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
    secrets: inherit

  run-nightly-tests-npu:
    needs: bump-sglang-version
    uses: ./.github/workflows/nightly-test-npu.yml
    with:
      ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
    secrets: inherit

  run-pr-tests-xeon:
    needs: bump-sglang-version
    uses: ./.github/workflows/pr-test-xeon.yml
    with:
      ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
    secrets: inherit

  run-pr-tests-xpu:
    needs: bump-sglang-version
    uses: ./.github/workflows/pr-test-xpu.yml
    with:
      ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
    secrets: inherit

bot-cherry-pick perms .github/workflows/bot-cherry-pick.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

cherry-pick

Commands

if [[ ! "$TARGET_BRANCH" =~ ^release/v[0-9]+\.[0-9]+(\.[0-9]+)?$ ]]; then echo "::error::Target branch must match pattern 'release/vX.Y' or 'release/vX.Y.Z' (e.g., release/v0.5.7)" exit 1 fi
git config user.name "sglang-bot" git config user.email "sglang-bot@users.noreply.github.com"
git fetch origin if ! git ls-remote --exit-code --heads origin "$TARGET_BRANCH" > /dev/null 2>&1; then echo "::error::Target branch '$TARGET_BRANCH' does not exist on remote" exit 1 fi
# Verify commit exists if ! git cat-file -t "$COMMIT_SHA_INPUT" > /dev/null 2>&1; then echo "::error::Commit SHA '$COMMIT_SHA_INPUT' does not exist" exit 1 fi # Get full SHA if short hash provided FULL_SHA=$(git rev-parse "$COMMIT_SHA_INPUT") COMMIT_TITLE=$(git log -1 --format="%s" "$FULL_SHA") SHORT_SHA=$(git rev-parse --short "$FULL_SHA") echo "full_sha=$FULL_SHA" >> $GITHUB_OUTPUT echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT # Use delimiter for multiline-safe output { echo "commit_title<<EOF" echo "$COMMIT_TITLE" echo "EOF" } >> $GITHUB_OUTPUT echo "Cherry-picking commit: $SHORT_SHA - $COMMIT_TITLE"
if [[ "$CREATE_PR" == "true" ]]; then # Create a new branch for the PR RANDOM_SUFFIX=$(head -c 4 /dev/urandom | xxd -p) NEW_BRANCH="cherry-pick/${SHORT_SHA}-to-${TARGET_BRANCH#release/}-${RANDOM_SUFFIX}" git checkout -b "$NEW_BRANCH" "origin/$TARGET_BRANCH" echo "new_branch=$NEW_BRANCH" >> $GITHUB_OUTPUT else # Checkout target branch directly git checkout "$TARGET_BRANCH" fi # Attempt cherry-pick if git cherry-pick "$FULL_SHA"; then echo "cherry_pick_success=true" >> $GITHUB_OUTPUT else echo "::error::Cherry-pick failed due to conflicts. Please resolve manually." git cherry-pick --abort || true echo "cherry_pick_success=false" >> $GITHUB_OUTPUT exit 1 fi
if [[ "$CREATE_PR" == "true" ]]; then git push origin "$NEW_BRANCH" else git push origin "$TARGET_BRANCH" fi
PR_TITLE="[Cherry-pick] ${COMMIT_TITLE} to ${TARGET_BRANCH}" gh pr create \ --title "$PR_TITLE" \ --base "$TARGET_BRANCH" \ --head "$NEW_BRANCH" \ --label "cherry-pick" \ --body-file - <<EOF Cherry-pick of commit ${FULL_SHA} to \`${TARGET_BRANCH}\` **Original commit:** ${FULL_SHA} **Original title:** ${COMMIT_TITLE} --- *This PR was automatically created by the cherry-pick workflow.* EOF
echo "## Cherry-Pick Summary" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "- **Triggered by:** @${ACTOR}" >> $GITHUB_STEP_SUMMARY echo "- **Commit:** ${FULL_SHA}" >> $GITHUB_STEP_SUMMARY echo "- **Title:** ${COMMIT_TITLE}" >> $GITHUB_STEP_SUMMARY echo "- **Target Branch:** ${TARGET_BRANCH}" >> $GITHUB_STEP_SUMMARY if [[ "$CHERRY_PICK_SUCCESS" == "true" ]]; then echo "- **Status:** ✅ Success" >> $GITHUB_STEP_SUMMARY else echo "- **Status:** ❌ Failed" >> $GITHUB_STEP_SUMMARY fi if [[ "$CREATE_PR" == "true" && "$CHERRY_PICK_SUCCESS" == "true" ]]; then echo "- **PR Branch:** ${NEW_BRANCH}" >> $GITHUB_STEP_SUMMARY fi

View raw YAML

name: Bot Cherry Pick to Release Branch

on:
  workflow_dispatch:
    inputs:
      commit_sha:
        description: 'Commit SHA to cherry-pick (full or short hash)'
        required: true
        type: string
      target_branch:
        description: 'Target release branch (e.g., release/v0.5.7)'
        required: true
        type: string
      create_pr:
        description: 'Create a PR instead of pushing directly'
        required: false
        type: boolean
        default: true

permissions:
  contents: write
  pull-requests: write

concurrency:
  group: cherry-pick-${{ github.event.inputs.target_branch }}
  cancel-in-progress: false

jobs:
  cherry-pick:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    environment: 'prod'
    steps:
      - name: Validate inputs
        env:
          TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
        run: |
          if [[ ! "$TARGET_BRANCH" =~ ^release/v[0-9]+\.[0-9]+(\.[0-9]+)?$ ]]; then
            echo "::error::Target branch must match pattern 'release/vX.Y' or 'release/vX.Y.Z' (e.g., release/v0.5.7)"
            exit 1
          fi

      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          token: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}

      - name: Configure Git
        run: |
          git config user.name "sglang-bot"
          git config user.email "sglang-bot@users.noreply.github.com"

      - name: Validate target branch exists
        env:
          TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
        run: |
          git fetch origin
          if ! git ls-remote --exit-code --heads origin "$TARGET_BRANCH" > /dev/null 2>&1; then
            echo "::error::Target branch '$TARGET_BRANCH' does not exist on remote"
            exit 1
          fi

      - name: Get commit info
        id: commit_info
        env:
          COMMIT_SHA_INPUT: ${{ github.event.inputs.commit_sha }}
        run: |
          # Verify commit exists
          if ! git cat-file -t "$COMMIT_SHA_INPUT" > /dev/null 2>&1; then
            echo "::error::Commit SHA '$COMMIT_SHA_INPUT' does not exist"
            exit 1
          fi

          # Get full SHA if short hash provided
          FULL_SHA=$(git rev-parse "$COMMIT_SHA_INPUT")
          COMMIT_TITLE=$(git log -1 --format="%s" "$FULL_SHA")
          SHORT_SHA=$(git rev-parse --short "$FULL_SHA")
          echo "full_sha=$FULL_SHA" >> $GITHUB_OUTPUT
          echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT
          # Use delimiter for multiline-safe output
          {
            echo "commit_title<<EOF"
            echo "$COMMIT_TITLE"
            echo "EOF"
          } >> $GITHUB_OUTPUT
          echo "Cherry-picking commit: $SHORT_SHA - $COMMIT_TITLE"

      - name: Cherry-pick commit
        id: cherry_pick
        env:
          TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
          FULL_SHA: ${{ steps.commit_info.outputs.full_sha }}
          SHORT_SHA: ${{ steps.commit_info.outputs.short_sha }}
          CREATE_PR: ${{ github.event.inputs.create_pr }}
        run: |
          if [[ "$CREATE_PR" == "true" ]]; then
            # Create a new branch for the PR
            RANDOM_SUFFIX=$(head -c 4 /dev/urandom | xxd -p)
            NEW_BRANCH="cherry-pick/${SHORT_SHA}-to-${TARGET_BRANCH#release/}-${RANDOM_SUFFIX}"
            git checkout -b "$NEW_BRANCH" "origin/$TARGET_BRANCH"
            echo "new_branch=$NEW_BRANCH" >> $GITHUB_OUTPUT
          else
            # Checkout target branch directly
            git checkout "$TARGET_BRANCH"
          fi

          # Attempt cherry-pick
          if git cherry-pick "$FULL_SHA"; then
            echo "cherry_pick_success=true" >> $GITHUB_OUTPUT
          else
            echo "::error::Cherry-pick failed due to conflicts. Please resolve manually."
            git cherry-pick --abort || true
            echo "cherry_pick_success=false" >> $GITHUB_OUTPUT
            exit 1
          fi

      - name: Push changes
        if: steps.cherry_pick.outputs.cherry_pick_success == 'true'
        env:
          CREATE_PR: ${{ github.event.inputs.create_pr }}
          TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
          NEW_BRANCH: ${{ steps.cherry_pick.outputs.new_branch }}
        run: |
          if [[ "$CREATE_PR" == "true" ]]; then
            git push origin "$NEW_BRANCH"
          else
            git push origin "$TARGET_BRANCH"
          fi

      - name: Create Pull Request
        if: steps.cherry_pick.outputs.cherry_pick_success == 'true' && github.event.inputs.create_pr == 'true'
        env:
          GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
          TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
          SHORT_SHA: ${{ steps.commit_info.outputs.short_sha }}
          COMMIT_TITLE: ${{ steps.commit_info.outputs.commit_title }}
          FULL_SHA: ${{ steps.commit_info.outputs.full_sha }}
          NEW_BRANCH: ${{ steps.cherry_pick.outputs.new_branch }}
        run: |
          PR_TITLE="[Cherry-pick] ${COMMIT_TITLE} to ${TARGET_BRANCH}"

          gh pr create \
            --title "$PR_TITLE" \
            --base "$TARGET_BRANCH" \
            --head "$NEW_BRANCH" \
            --label "cherry-pick" \
            --body-file - <<EOF
          Cherry-pick of commit ${FULL_SHA} to \`${TARGET_BRANCH}\`

          **Original commit:** ${FULL_SHA}
          **Original title:** ${COMMIT_TITLE}

          ---
          *This PR was automatically created by the cherry-pick workflow.*
          EOF

      - name: Summary
        if: always()
        env:
          FULL_SHA: ${{ steps.commit_info.outputs.full_sha }}
          COMMIT_TITLE: ${{ steps.commit_info.outputs.commit_title }}
          TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
          CHERRY_PICK_SUCCESS: ${{ steps.cherry_pick.outputs.cherry_pick_success }}
          CREATE_PR: ${{ github.event.inputs.create_pr }}
          NEW_BRANCH: ${{ steps.cherry_pick.outputs.new_branch }}
          ACTOR: ${{ github.actor }}
        run: |
          echo "## Cherry-Pick Summary" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "- **Triggered by:** @${ACTOR}" >> $GITHUB_STEP_SUMMARY
          echo "- **Commit:** ${FULL_SHA}" >> $GITHUB_STEP_SUMMARY
          echo "- **Title:** ${COMMIT_TITLE}" >> $GITHUB_STEP_SUMMARY
          echo "- **Target Branch:** ${TARGET_BRANCH}" >> $GITHUB_STEP_SUMMARY
          if [[ "$CHERRY_PICK_SUCCESS" == "true" ]]; then
            echo "- **Status:** ✅ Success" >> $GITHUB_STEP_SUMMARY
          else
            echo "- **Status:** ❌ Failed" >> $GITHUB_STEP_SUMMARY
          fi
          if [[ "$CREATE_PR" == "true" && "$CHERRY_PICK_SUCCESS" == "true" ]]; then
            echo "- **PR Branch:** ${NEW_BRANCH}" >> $GITHUB_STEP_SUMMARY
          fi

cancel-pr-workflow-on-merge perms .github/workflows/cancel-pr-workflow-on-merge.yml

Triggers: pull_request_target
Runs on: ubuntu-latest
Jobs: cancel
Actions: styfle/cancel-workflow-action

View raw YAML

name: Cancel PR Workflows on Merge

on:
  pull_request_target:
    types:
      - closed

permissions:
  actions: write

jobs:
  cancel:
    if: github.event.pull_request.merged == true
    runs-on: ubuntu-latest
    steps:
      - name: Cancel Previous Runs
        uses: styfle/cancel-workflow-action@0.12.1
        with:
          workflow_id: all
          access_token: ${{ secrets.GITHUB_TOKEN }}
          ignore_sha: true
          pr_number: ${{ github.event.pull_request.number }}

cancel-unfinished-pr-tests perms .github/workflows/cancel-unfinished-pr-tests.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

cancel-unfinished-pr-runs

Commands

sudo apt-get install -y gh jq
set -euo pipefail # Read the space-separated string from the input into a bash array read -r -a WORKFLOW_FILES <<< "${WORKFLOWS}" echo "Targeting ${#WORKFLOW_FILES[@]} workflow(s): ${WORKFLOWS}" echo "" for workflow_file in "${WORKFLOW_FILES[@]}"; do echo "=========================================" echo "Workflow: $workflow_file" echo "=========================================" # Get all unfinished runs all_runs=$(gh run list \ --repo "$REPO" \ --workflow "$workflow_file" \ --json databaseId,status,event,url,createdAt \ --limit 1000 \ | jq -c '.[] | select(.status=="queued" or .status=="waiting" or .status=="in_progress")') if [ -z "$all_runs" ]; then echo "✅ No unfinished runs found" echo "" continue fi # Count runs by event type total_runs=$(echo "$all_runs" | wc -l) pr_runs=$(echo "$all_runs" | jq -s '[.[] | select(.event=="pull_request")] | length') other_runs=$(echo "$all_runs" | jq -s '[.[] | select(.event!="pull_request")] | length') echo "📊 Summary: $total_runs unfinished runs ($pr_runs PR-related, $other_runs other)" echo "" # Process non-PR runs first if [ "$other_runs" -gt 0 ]; then echo "--- Non-PR Runs ---" echo "$all_runs" | jq -c 'select(.event!="pull_request")' | while read -r run; do run_url=$(echo "$run" | jq -r '.url') run_event=$(echo "$run" | jq -r '.event') run_status=$(echo "$run" | jq -r '.status') echo " • $run_event ($run_status): $run_url" done echo "" fi # Process PR runs if [ "$pr_runs" -gt 0 ]; then echo "--- PR Runs (checking for cancellation) ---" echo "$all_runs" | jq -c 'select(.event=="pull_request")' | while read -r run; do run_id=$(echo "$run" | jq -r '.databaseId') run_url=$(echo "$run" | jq -r '.url') run_status=$(echo "$run" | jq -r '.status') echo "" echo "Run ($run_status): $run_url" # Fetch full run details to get head repository and branch info run_details=$(gh api -H "Accept: application/vnd.github+json" \ "repos/$REPO/actions/runs/$run_id" 2>/dev/null || true) if [ -z "$run_details" ]; then echo " ⚠️ Could not fetch run details, skipping" continue fi # Get head owner and branch (works for both fork and non-fork PRs) head_owner=$(echo "$run_details" | jq -r '.head_repository.owner.login // empty') head_branch=$(echo "$run_details" | jq -r '.head_branch // empty') if [ -z "$head_owner" ] || [ -z "$head_branch" ]; then echo " ⚠️ Missing head info, skipping" continue fi echo " Branch: ${head_owner}:${head_branch}" # Find PR by searching with head=owner:branch pr_number=$(gh api -H "Accept: application/vnd.github+json" \ "repos/$REPO/pulls?state=open&head=${head_owner}:${head_branch}" \ --jq '.[0].number // empty' 2>/dev/null || true) if [ -z "$pr_number" ]; then echo " ⚠️ No open PR found, skipping" continue fi pr_url="https://github.com/$REPO/pull/$pr_number" echo " PR: $pr_url" # Check for high priority label labels=$(gh pr view "$pr_number" --repo "$REPO" --json labels \ | jq -r '.labels[].name' 2>/dev/null || true) if echo "$labels" | grep -Fxq "bypass-maintenance"; then echo " 🛑 Skipping (bypass-maintenance label, never cancelled)" continue fi if echo "$labels" | grep -Fxq "high priority"; then if [ "$INCLUDE_HIGH_PRIORITY" != "true" ]; then echo " 🛑 Skipping (high priority label)" continue fi echo " ⚠️ High priority PR, but include_high_priority is enabled" fi echo " 🚫 Cancelling..." gh run cancel "$run_id" --repo "$REPO" || echo " ⚠️ Cancellation failed" done fi echo "" done echo "=========================================" echo "✅ Processing complete" echo "========================================="

View raw YAML

name: Cancel Unfinished PR Runs

on:
  workflow_dispatch:
    inputs:
      workflows:
        description: 'Space-separated list of workflow filenames to cancel'
        required: true
        type: string
        default: 'pr-test.yml'
      include_high_priority:
        description: 'Also cancel runs from high-priority PRs'
        required: false
        type: boolean
        default: false

permissions:
  actions: write   # Needed to cancel runs
  contents: read   # Needed to read repo info
  pull-requests: read  # needed for gh pr view (labels)

jobs:
  cancel-unfinished-pr-runs:
    runs-on: ubuntu-latest
    steps:
      - name: Install GitHub CLI
        run: sudo apt-get install -y gh jq

      - name: Cancel unfinished PR-associated runs (skip high-priority PRs)
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          REPO: ${{ github.repository }}
          WORKFLOWS: ${{ github.event.inputs.workflows || 'pr-test.yml' }}
          INCLUDE_HIGH_PRIORITY: ${{ github.event.inputs.include_high_priority || 'false' }}
        shell: bash
        run: |
          set -euo pipefail

          # Read the space-separated string from the input into a bash array
          read -r -a WORKFLOW_FILES <<< "${WORKFLOWS}"

          echo "Targeting ${#WORKFLOW_FILES[@]} workflow(s): ${WORKFLOWS}"
          echo ""

          for workflow_file in "${WORKFLOW_FILES[@]}"; do
            echo "========================================="
            echo "Workflow: $workflow_file"
            echo "========================================="

            # Get all unfinished runs
            all_runs=$(gh run list \
              --repo "$REPO" \
              --workflow "$workflow_file" \
              --json databaseId,status,event,url,createdAt \
              --limit 1000 \
            | jq -c '.[] | select(.status=="queued" or .status=="waiting" or .status=="in_progress")')

            if [ -z "$all_runs" ]; then
              echo "✅ No unfinished runs found"
              echo ""
              continue
            fi

            # Count runs by event type
            total_runs=$(echo "$all_runs" | wc -l)
            pr_runs=$(echo "$all_runs" | jq -s '[.[] | select(.event=="pull_request")] | length')
            other_runs=$(echo "$all_runs" | jq -s '[.[] | select(.event!="pull_request")] | length')

            echo "📊 Summary: $total_runs unfinished runs ($pr_runs PR-related, $other_runs other)"
            echo ""

            # Process non-PR runs first
            if [ "$other_runs" -gt 0 ]; then
              echo "--- Non-PR Runs ---"
              echo "$all_runs" | jq -c 'select(.event!="pull_request")' | while read -r run; do
                run_url=$(echo "$run" | jq -r '.url')
                run_event=$(echo "$run" | jq -r '.event')
                run_status=$(echo "$run" | jq -r '.status')
                echo "  • $run_event ($run_status): $run_url"
              done
              echo ""
            fi

            # Process PR runs
            if [ "$pr_runs" -gt 0 ]; then
              echo "--- PR Runs (checking for cancellation) ---"
              echo "$all_runs" | jq -c 'select(.event=="pull_request")' | while read -r run; do
                run_id=$(echo "$run" | jq -r '.databaseId')
                run_url=$(echo "$run" | jq -r '.url')
                run_status=$(echo "$run" | jq -r '.status')

                echo ""
                echo "Run ($run_status): $run_url"

                # Fetch full run details to get head repository and branch info
                run_details=$(gh api -H "Accept: application/vnd.github+json" \
                  "repos/$REPO/actions/runs/$run_id" 2>/dev/null || true)

                if [ -z "$run_details" ]; then
                  echo "  ⚠️  Could not fetch run details, skipping"
                  continue
                fi

                # Get head owner and branch (works for both fork and non-fork PRs)
                head_owner=$(echo "$run_details" | jq -r '.head_repository.owner.login // empty')
                head_branch=$(echo "$run_details" | jq -r '.head_branch // empty')

                if [ -z "$head_owner" ] || [ -z "$head_branch" ]; then
                  echo "  ⚠️  Missing head info, skipping"
                  continue
                fi

                echo "  Branch: ${head_owner}:${head_branch}"

                # Find PR by searching with head=owner:branch
                pr_number=$(gh api -H "Accept: application/vnd.github+json" \
                  "repos/$REPO/pulls?state=open&head=${head_owner}:${head_branch}" \
                  --jq '.[0].number // empty' 2>/dev/null || true)

                if [ -z "$pr_number" ]; then
                  echo "  ⚠️  No open PR found, skipping"
                  continue
                fi

                pr_url="https://github.com/$REPO/pull/$pr_number"
                echo "  PR: $pr_url"

                # Check for high priority label
                labels=$(gh pr view "$pr_number" --repo "$REPO" --json labels \
                  | jq -r '.labels[].name' 2>/dev/null || true)

                if echo "$labels" | grep -Fxq "bypass-maintenance"; then
                  echo "  🛑 Skipping (bypass-maintenance label, never cancelled)"
                  continue
                fi

                if echo "$labels" | grep -Fxq "high priority"; then
                  if [ "$INCLUDE_HIGH_PRIORITY" != "true" ]; then
                    echo "  🛑 Skipping (high priority label)"
                    continue
                  fi
                  echo "  ⚠️  High priority PR, but include_high_priority is enabled"
                fi

                echo "  🚫 Cancelling..."
                gh run cancel "$run_id" --repo "$REPO" || echo "  ⚠️  Cancellation failed"
              done
            fi

            echo ""
          done

          echo "========================================="
          echo "✅ Processing complete"
          echo "========================================="

ci-coverage-overview .github/workflows/ci-coverage-overview.yml

Triggers

schedule, pull_request, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest, ubuntu-latest, 1-gpu-h100, ubuntu-latest

Jobs

summary, by-folder, by-suite, unit-test-coverage, json-export

Commands

python scripts/ci/utils/ci_coverage_report.py --section summary
python scripts/ci/utils/ci_coverage_report.py --section by-folder
python scripts/ci/utils/ci_coverage_report.py --section by-suite
pip install -e "python/[test]"
pytest test/registered/unit/ \ --cov --cov-config=.coveragerc \ --cov-report=term-missing:skip-covered \ --continue-on-collection-errors \ -v | tee coverage_output.txt
echo "## Unit Test Code Coverage" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**Commit:** \`${GITHUB_SHA::8}\` | **Branch:** \`${GITHUB_REF_NAME}\`" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY # Test result line (e.g., "== 42 passed, 1 failed in 23.5s ==") echo '```' >> $GITHUB_STEP_SUMMARY grep -E '^=+.*passed' coverage_output.txt >> $GITHUB_STEP_SUMMARY || true echo "" >> $GITHUB_STEP_SUMMARY # Coverage total grep -E '^TOTAL ' coverage_output.txt >> $GITHUB_STEP_SUMMARY || true echo '```' >> $GITHUB_STEP_SUMMARY # Partially covered core modules (1-49%) — most actionable for contributors # Only show modules with testable logic; skip configs, models, layers, etc. LOW_COV=$(awk '/^python\/.*%/ { for (i=1; i<=NF; i++) { if ($i ~ /^[0-9]+%$/) { pct = $i + 0 if (pct >= 1 && pct < 50) printf "%-80s %5s %s\n", $1, $(i-2), $i break } } }' coverage_output.txt \ | grep -E '/(mem_cache|managers|sampling|parser|observability|function_call|entrypoints|speculative|multimodal|utils)/' \ | head -40 || true) if [ -n "$LOW_COV" ]; then echo "" >> $GITHUB_STEP_SUMMARY echo "<details><summary>Core modules with coverage below 50% — good candidates for more unit tests</summary>" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY echo "$LOW_COV" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY echo "</details>" >> $GITHUB_STEP_SUMMARY fi
python scripts/ci/utils/ci_coverage_report.py --output-format json > ci_coverage.json

View raw YAML

name: CI Coverage Overview

on:
  schedule:
    - cron: '0 6 * * *'  # Daily at 6 AM UTC
  pull_request:
    paths:
      - '.github/workflows/ci-coverage-overview.yml'
      - 'scripts/ci/utils/ci_coverage_report.py'
      - 'test/registered/**'
  workflow_dispatch:
    inputs:
      output_format:
        description: 'Output format'
        required: false
        default: 'markdown'
        type: choice
        options:
          - markdown
          - json

jobs:
  summary:
    name: Summary
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Generate Summary Report
        run: |
          python scripts/ci/utils/ci_coverage_report.py --section summary

  by-folder:
    name: Tests by Folder
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Generate Tests by Folder Report
        run: |
          python scripts/ci/utils/ci_coverage_report.py --section by-folder

  by-suite:
    name: Tests by Suite
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Generate Tests by Suite Report
        run: |
          python scripts/ci/utils/ci_coverage_report.py --section by-suite

  unit-test-coverage:
    name: Unit Test Code Coverage
    if: github.event_name != 'pull_request'
    runs-on: 1-gpu-h100
    timeout-minutes: 30
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install dependencies
        timeout-minutes: 10
        run: |
          pip install -e "python/[test]"

      - name: Run unit tests with coverage
        timeout-minutes: 10
        run: |
          pytest test/registered/unit/ \
            --cov --cov-config=.coveragerc \
            --cov-report=term-missing:skip-covered \
            --continue-on-collection-errors \
            -v | tee coverage_output.txt

      - name: Write coverage to summary
        if: always()
        run: |
          echo "## Unit Test Code Coverage" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "**Commit:** \`${GITHUB_SHA::8}\` | **Branch:** \`${GITHUB_REF_NAME}\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY

          # Test result line (e.g., "== 42 passed, 1 failed in 23.5s ==")
          echo '```' >> $GITHUB_STEP_SUMMARY
          grep -E '^=+.*passed' coverage_output.txt >> $GITHUB_STEP_SUMMARY || true
          echo "" >> $GITHUB_STEP_SUMMARY
          # Coverage total
          grep -E '^TOTAL ' coverage_output.txt >> $GITHUB_STEP_SUMMARY || true
          echo '```' >> $GITHUB_STEP_SUMMARY

          # Partially covered core modules (1-49%) — most actionable for contributors
          # Only show modules with testable logic; skip configs, models, layers, etc.
          LOW_COV=$(awk '/^python\/.*%/ {
            for (i=1; i<=NF; i++) {
              if ($i ~ /^[0-9]+%$/) {
                pct = $i + 0
                if (pct >= 1 && pct < 50) printf "%-80s %5s  %s\n", $1, $(i-2), $i
                break
              }
            }
          }' coverage_output.txt \
            | grep -E '/(mem_cache|managers|sampling|parser|observability|function_call|entrypoints|speculative|multimodal|utils)/' \
            | head -40 || true)
          if [ -n "$LOW_COV" ]; then
            echo "" >> $GITHUB_STEP_SUMMARY
            echo "<details><summary>Core modules with coverage below 50% — good candidates for more unit tests</summary>" >> $GITHUB_STEP_SUMMARY
            echo "" >> $GITHUB_STEP_SUMMARY
            echo '```' >> $GITHUB_STEP_SUMMARY
            echo "$LOW_COV" >> $GITHUB_STEP_SUMMARY
            echo '```' >> $GITHUB_STEP_SUMMARY
            echo "</details>" >> $GITHUB_STEP_SUMMARY
          fi

  json-export:
    name: JSON Export
    runs-on: ubuntu-latest
    if: inputs.output_format == 'json'
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Generate JSON Report
        run: |
          python scripts/ci/utils/ci_coverage_report.py --output-format json > ci_coverage.json

      - name: Upload JSON artifact
        uses: actions/upload-artifact@v4
        with:
          name: ci-coverage-report
          path: ci_coverage.json

ci-failure-monitor perms .github/workflows/ci-failure-monitor.yml

Triggers

schedule, workflow_dispatch

Runs on

ubuntu-latest

Jobs

failure-analysis

Commands

python -m pip install --upgrade pip pip install requests slack_sdk
cd scripts/ci_monitor python ci_failures_analysis.py \ --token $GITHUB_TOKEN \ --limit 100 \ --output ci_failure_analysis_$(date +%Y%m%d_%H%M%S).json
cd scripts/ci_monitor LATEST_REPORT=$(ls -t ci_failure_analysis_*.json | head -1) if [ ! -f "$LATEST_REPORT" ]; then echo "No report found, so skipping Slack notification" exit 0 fi if [ -n "$SGLANG_DIFFUSION_SLACK_TOKEN" ]; then python3 post_ci_failures_to_slack.py --report-file "$LATEST_REPORT" else echo "SGLANG_DIFFUSION_SLACK_TOKEN not configured, skipping notification" fi

View raw YAML

name: CI Failure Monitor

on:
  schedule:
    - cron: '0 */12 * * *' # Every 12 hour
  workflow_dispatch:

concurrency:
  group: ci-failure-monitor-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  actions: read

jobs:
  failure-analysis:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.14'

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install requests slack_sdk

      - name: Run Failure Analysis
        env:
          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
          GH_PAT_FOR_RUNNER_ADMIN: ${{ secrets.GH_PAT_FOR_RUNNER_ADMIN }}
          PYTHONUNBUFFERED: 1
          PYTHONIOENCODING: utf-8
        run: |
          cd scripts/ci_monitor
          python ci_failures_analysis.py \
            --token $GITHUB_TOKEN \
            --limit 100 \
            --output ci_failure_analysis_$(date +%Y%m%d_%H%M%S).json

      - name: Upload Analysis Results
        uses: actions/upload-artifact@v4
        with:
          name: ci-failure-analysis-${{ github.run_number }}
          path: |
            scripts/ci_monitor/ci_failure_analysis_*.json
          retention-days: 7

      - name: Send Slack Notification
        if: always()
        env:
          SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
        run: |
          cd scripts/ci_monitor
          LATEST_REPORT=$(ls -t ci_failure_analysis_*.json | head -1)

          if [ ! -f "$LATEST_REPORT" ]; then
            echo "No report found, so skipping Slack notification"
            exit 0
          fi

          if [ -n "$SGLANG_DIFFUSION_SLACK_TOKEN" ]; then
            python3 post_ci_failures_to_slack.py --report-file "$LATEST_REPORT"
          else
            echo "SGLANG_DIFFUSION_SLACK_TOKEN not configured, skipping notification"
          fi

close-inactive-issues perms .github/workflows/close-inactive-issues.yml

Triggers: schedule, workflow_dispatch
Runs on: ubuntu-latest
Jobs: close-inactive-issues

View raw YAML

name: Close Inactive Issues

on:
  schedule:
    - cron: '0 0 * * *'
  workflow_dispatch:

permissions:
  issues: write
  contents: read

jobs:
  close-inactive-issues:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    steps:
      - name: Check and close inactive issues
        uses: actions/github-script@v6
        with:
          github-token: ${{secrets.GITHUB_TOKEN}}
          script: |
            const sixtyDaysAgo = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000);

            const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/');
            console.log(`Owner: ${owner}, Repo: ${repo}`);

            async function fetchIssues(page = 1) {
              console.log(`Fetching issues for ${owner}/${repo}, page ${page}`);
              return await github.rest.issues.listForRepo({
                owner,
                repo,
                state: 'open',
                sort: 'updated',
                direction: 'asc',
                per_page: 100,
                page: page
              });
            }

            async function processIssues() {
              console.log('Starting to process issues');
              console.log(`Repository: ${owner}/${repo}`);

              let page = 1;
              let hasMoreIssues = true;
              while (hasMoreIssues) {
                try {
                  const issues = await fetchIssues(page);
                  console.log(`Fetched ${issues.data.length} issues on page ${page}`);

                  if (issues.data.length === 0) {
                    hasMoreIssues = false;
                    break;
                  }

                  for (const issue of issues.data) {
                    // Skip if the issue has 'good first issue' label
                    if (issue.labels.some(label => label.name === 'good first issue')) {
                      console.log(`Skipping issue #${issue.number} as it's marked as 'good first issue'`);
                      continue;
                    }
                    if (new Date(issue.updated_at) < sixtyDaysAgo) {
                      try {
                        await github.rest.issues.update({
                          owner,
                          repo,
                          issue_number: issue.number,
                          state: 'closed',
                          labels: [...issue.labels.map(l => l.name), 'inactive']
                        });
                        await github.rest.issues.createComment({
                          owner,
                          repo,
                          issue_number: issue.number,
                          body: 'This issue has been automatically closed due to inactivity. Please feel free to reopen it if needed.'
                        });
                        console.log(`Closed issue #${issue.number} due to inactivity.`);
                      } catch (error) {
                        console.error(`Failed to close issue #${issue.number}: ${error.message}`);
                      }
                    } else {
                      console.log(`Issue #${issue.number} is still active. Stopping processing.`);
                      hasMoreIssues = false;
                      break;
                    }
                  }
                  page += 1;
                } catch (error) {
                  console.error(`Error fetching issues on page ${page}: ${error.message}`);
                  hasMoreIssues = false;
                }
              }
              console.log('Finished processing issues');
            }

            await processIssues();

diffusion-ci-gt-gen matrix perms .github/workflows/diffusion-ci-gt-gen.yml

Triggers

workflow_dispatch

Runs on

1-gpu-h100, 2-gpu-h100, ubuntu-latest

Jobs

multimodal-diffusion-gen-1gpu, multimodal-diffusion-gen-2gpu, diffusion-ci-push

Matrix

part→ 0, 1

Commands

bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd python python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \ --suite 1-gpu \ --partition-id ${{ matrix.part }} \ --total-partitions 2 \ --out-dir ./diffusion-ci-outputs \ ${{ inputs.case_ids != '' && format('--case-ids {0}', inputs.case_ids) || '' }}
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd python python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \ --suite 2-gpu \ --partition-id ${{ matrix.part }} \ --total-partitions 2 \ --out-dir ./diffusion-ci-outputs \ ${{ inputs.case_ids != '' && format('--case-ids {0}', inputs.case_ids) || '' }}
mkdir -p gt_images find combined $ -name "*.png" -o -name "*.jpg" -o -name "*.jpeg" -o -name "*.webp" $ -type f -exec cp -f {} gt_images/ \;
python scripts/ci/utils/diffusion/publish_diffusion_gt.py --source-dir gt_images

View raw YAML

name: Diffusion CI Ground Truth Generation

on:
  workflow_dispatch:
    inputs:
      ref:
        description: 'Git ref to checkout'
        required: false
        default: ''
        type: string
      case_ids:
        description: 'Specific case IDs to run (space-separated, optional)'
        required: false
        default: ''
        type: string

concurrency:
  group: diffusion-ci-gt-gen-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write
  actions: read

jobs:
  multimodal-diffusion-gen-1gpu:
    if: github.repository == 'sgl-project/sglang'
    runs-on: 1-gpu-h100
    strategy:
      matrix:
        part: [0, 1]
    timeout-minutes: 150
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Generate outputs
        run: |
          cd python
          python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \
            --suite 1-gpu \
            --partition-id ${{ matrix.part }} \
            --total-partitions 2 \
            --out-dir ./diffusion-ci-outputs \
            ${{ inputs.case_ids != '' && format('--case-ids {0}', inputs.case_ids) || '' }}

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: diffusion-gen-1gpu-part${{ matrix.part }}
          path: python/diffusion-ci-outputs
          retention-days: 7

  multimodal-diffusion-gen-2gpu:
    if: github.repository == 'sgl-project/sglang'
    runs-on: 2-gpu-h100
    strategy:
      matrix:
        part: [0, 1]
    timeout-minutes: 150
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Generate outputs
        run: |
          cd python
          python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \
            --suite 2-gpu \
            --partition-id ${{ matrix.part }} \
            --total-partitions 2 \
            --out-dir ./diffusion-ci-outputs \
            ${{ inputs.case_ids != '' && format('--case-ids {0}', inputs.case_ids) || '' }}

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: diffusion-gen-2gpu-part${{ matrix.part }}
          path: python/diffusion-ci-outputs
          retention-days: 7

  diffusion-ci-push:
    needs: [multimodal-diffusion-gen-1gpu, multimodal-diffusion-gen-2gpu]
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          pattern: diffusion-gen-*
          path: combined
          merge-multiple: true

      - name: Collect image files
        run: |
          mkdir -p gt_images
          find combined \( -name "*.png" -o -name "*.jpg" -o -name "*.jpeg" -o -name "*.webp" \) -type f -exec cp -f {} gt_images/ \;

      - name: Publish GT images to sglang-bot/sglang-ci-data
        env:
          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
        run: python scripts/ci/utils/diffusion/publish_diffusion_gt.py --source-dir gt_images

execute-notebook .github/workflows/execute-notebook.yml

Triggers

pull_request, workflow_dispatch

Runs on

1-gpu-h100, ubuntu-latest

Jobs

call-gate, run-all-notebooks, notebook-finish

Commands

bash scripts/ci/cuda/ci_install_dependency.sh pip install -r docs/requirements.txt apt-get update && apt-get install -y pandoc parallel retry ln -sf "$(which python3)" /usr/bin/python
python -m ipykernel install --user --name python3 --display-name "Python 3"
cd docs make clean make compile
results=(${{ join(needs.*.result, ' ') }}) for result in "${results[@]}"; do if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then echo "Job failed with result: $result" exit 1 fi done echo "All jobs completed successfully" exit 0

View raw YAML

name: Execute Notebooks

on:
  pull_request:
    branches: [ main ]
    types: [opened, synchronize, reopened, labeled]
    paths:
      - "python/sglang/**"
      - "docs/**"
      - "!python/sglang/**/*.md"
      - "!docs/**/*.md"
  workflow_dispatch:


concurrency:
  group: execute-notebook-${{ github.ref }}
  cancel-in-progress: true

env:
  SGLANG_IS_IN_CI: true

jobs:
  call-gate:
    # Align with PR Test: fail fast if PR doesn't have run-ci label.
    # This makes /tag-and-rerun-ci work by rerunning this failed workflow.
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit

  run-all-notebooks:
    needs: [call-gate]
    runs-on: 1-gpu-h100
    if: github.event_name != 'pull_request' || needs.call-gate.result == 'success'
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh
          pip install -r docs/requirements.txt
          apt-get update && apt-get install -y pandoc parallel retry
          ln -sf "$(which python3)" /usr/bin/python

      - name: Setup Jupyter Kernel
        run: |
          python -m ipykernel install --user --name python3 --display-name "Python 3"

      - name: Execute notebooks
        timeout-minutes: 40
        run: |
          cd docs
          make clean
          make compile


  notebook-finish:
    needs: [
      call-gate,
      run-all-notebooks
    ]
    runs-on: ubuntu-latest
    if: always() && needs.run-all-notebooks.result != 'skipped'
    steps:
      - name: Check all dependent job statuses
        run: |
          results=(${{ join(needs.*.result, ' ') }})
          for result in "${results[@]}"; do
            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
              echo "Job failed with result: $result"
              exit 1
            fi
          done
          echo "All jobs completed successfully"
          exit 0

labeler perms .github/workflows/labeler.yml

Triggers: pull_request_target
Runs on: ubuntu-latest
Jobs: label
Actions: actions/labeler

View raw YAML

name: Auto Label PRs

on:
  pull_request_target:
    types: [opened, synchronize, reopened]

permissions:
  contents: read
  pull-requests: write

jobs:
  label:
    runs-on: ubuntu-latest
    steps:
      - name: Auto-label by file changes
        uses: actions/labeler@v5
        with:
          repo-token: "${{ secrets.GITHUB_TOKEN }}"
          configuration-path: .github/labeler.yml
          sync-labels: false

lint .github/workflows/lint.yml

Triggers

push, pull_request

Runs on

ubuntu-latest

Jobs

lint

Actions

lycheeverse/lychee-action, DoozyX/clang-format-lint-action

Commands

python -m pip install pre-commit pre-commit install
SKIP=no-commit-to-branch pre-commit run --all-files --show-diff-on-failure

View raw YAML

name: Lint

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.12"

      - name: Install pre-commit hook
        run: |
          python -m pip install pre-commit
          pre-commit install

      - name: Run pre-commit checks
        run: SKIP=no-commit-to-branch pre-commit run --all-files --show-diff-on-failure

      - name: Run lychee docs checks (offline references)
        uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2
        with:
          args: --config .github/linters/lychee.toml README.md "docs/**/*.md" "docs/**/*.rst" "docs/**/*.ipynb"

      - name: Run sgl-kernel clang-format checks
        uses: DoozyX/clang-format-lint-action@v0.20
        with:
          source: sgl-kernel
          extensions: h,c,cpp,hpp,cu,cuh,cc
          clangFormatVersion: 20
          style: file

list-active-pr-runs perms .github/workflows/list-active-pr-runs.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

list-active-runs

Commands

sudo apt-get install -y gh jq
set -euo pipefail echo "=========================================" echo "🔍 Active Workflow Runs Report" echo "=========================================" echo "" # Get all workflows or specific ones read -r -a workflow_files <<< "${WORKFLOWS}" echo "📋 Checking specified workflows: ${WORKFLOWS}" echo "" # Create a temporary file to store PR data pr_data_file=$(mktemp) # Process each workflow for workflow_file in ${workflow_files[@]}; do echo "Scanning workflow: $workflow_file" # Get all active runs (queued, waiting, in_progress) active_runs=$(gh run list \ --repo "$REPO" \ --workflow "$workflow_file" \ --json databaseId,status,event,headBranch,createdAt,updatedAt,headSha,number,attempt \ --limit 500 \ | jq -c '.[] | select(.status=="queued" or .status=="waiting" or .status=="in_progress")') if [ -z "$active_runs" ]; then continue fi # Process each run echo "$active_runs" | while read -r run; do run_id=$(echo "$run" | jq -r '.databaseId') run_status=$(echo "$run" | jq -r '.status') run_event=$(echo "$run" | jq -r '.event') created_at=$(echo "$run" | jq -r '.createdAt') head_sha=$(echo "$run" | jq -r '.headSha') run_number=$(echo "$run" | jq -r '.number') run_attempt=$(echo "$run" | jq -r '.attempt // 1') # Get detailed run information including jobs run_details=$(gh api "repos/$REPO/actions/runs/$run_id" 2>/dev/null || true) if [ -z "$run_details" ]; then continue fi head_owner=$(echo "$run_details" | jq -r '.head_repository.owner.login // empty') head_branch=$(echo "$run_details" | jq -r '.head_branch // empty') if [ -z "$head_owner" ] || [ -z "$head_branch" ]; then continue fi # Find PR number (may be empty for non-PR runs) pr_number=$(gh api "repos/$REPO/pulls?state=open&head=${head_owner}:${head_branch}" \ --jq '.[0].number // empty' 2>/dev/null || true) if [ -z "$pr_number" ]; then pr_number="NO_PR" fi # Get jobs for this run (with pagination to avoid missing jobs) jobs=$(gh api "repos/$REPO/actions/runs/$run_id/jobs" --paginate --jq '.jobs[]' | jq -s '.') running_jobs=$(echo "$jobs" | jq '[.[] | select(.status=="in_progress")] | length') queued_jobs=$(echo "$jobs" | jq '[.[] | select(.status=="queued" or .status=="waiting")] | length') # Get runner info for running jobs runners=$(echo "$jobs" | jq -r '.[] | select(.status=="in_progress") | .runner_name // "N/A"' | paste -sd "," -) # Calculate queue time current_time=$(date -u +%s) created_time=$(date -u -d "$created_at" +%s 2>/dev/null || echo "$current_time") queue_time=$((current_time - created_time)) queue_minutes=$((queue_time / 60)) # Store data in temporary file (unified format with event and branch) echo "$pr_number|$workflow_file|$run_id|$run_status|$running_jobs|$queued_jobs|$runners|$queue_minutes|$created_at|$head_sha|$run_attempt|$run_event|$head_branch" >> "$pr_data_file" done done echo "" echo "=========================================" echo "📊 Active Runs Summary" echo "=========================================" echo "" if [ ! -s "$pr_data_file" ]; then echo "✅ No active runs found" rm -f "$pr_data_file" exit 0 fi # Get unique PR numbers (exclude NO_PR entries) pr_numbers=$(cut -d'|' -f1 < "$pr_data_file" | grep -v '^NO_PR$' | sort -u || true) # Separate high priority and normal PRs high_priority_prs=() normal_prs=() for pr_num in $pr_numbers; do labels=$(gh pr view "$pr_num" --repo "$REPO" --json labels \ | jq -r '.labels[].name' 2>/dev/null || true) if echo "$labels" | grep -Fxq "high priority"; then high_priority_prs+=($pr_num) else normal_prs+=($pr_num) fi done # Combine: high priority first, then normal sorted_pr_numbers=("${high_priority_prs[@]}" "${normal_prs[@]}") pr_count=0 total_running=0 total_queued=0 for pr_num in "${sorted_pr_numbers[@]}"; do pr_count=$((pr_count + 1)) # Get PR details pr_info=$(gh pr view "$pr_num" --repo "$REPO" --json title,author,labels,url 2>/dev/null || true) if [ -z "$pr_info" ]; then continue fi pr_title=$(echo "$pr_info" | jq -r '.title') pr_author=$(echo "$pr_info" | jq -r '.author.login') pr_url=$(echo "$pr_info" | jq -r '.url') pr_labels=$(echo "$pr_info" | jq -r '.labels[].name' | paste -sd ", " -) if [ -z "$pr_labels" ]; then pr_labels="(no labels)" fi # Add priority indicator priority_indicator="" if echo "$pr_labels" | grep -q "high priority"; then priority_indicator="🔴 [HIGH PRIORITY] " fi echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "🔗 ${priority_indicator}PR #$pr_num: $pr_title" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "👤 Author: $pr_author" echo "🏷️ Labels: $pr_labels" echo "🔗 URL: $pr_url" echo "" # Get all runs for this PR pr_runs=$(grep "^$pr_num|" "$pr_data_file") pr_running_total=0 pr_queued_total=0 echo "$pr_runs" | while read -r line; do workflow=$(echo "$line" | cut -d'|' -f2) run_id=$(echo "$line" | cut -d'|' -f3) status=$(echo "$line" | cut -d'|' -f4) running=$(echo "$line" | cut -d'|' -f5) queued=$(echo "$line" | cut -d'|' -f6) runners=$(echo "$line" | cut -d'|' -f7) queue_min=$(echo "$line" | cut -d'|' -f8) created=$(echo "$line" | cut -d'|' -f9) attempt=$(echo "$line" | cut -d'|' -f11) pr_running_total=$((pr_running_total + running)) pr_queued_total=$((pr_queued_total + queued)) run_url="https://github.com/$REPO/actions/runs/$run_id" # Calculate retry count for this specific run retry_count=$((attempt - 1)) # Show retry indicator retry_indicator="" if [ "$retry_count" -gt 0 ]; then retry_indicator=" 🔄 Retry #$retry_count" fi echo " 📦 Workflow: $workflow (Run #$run_id)$retry_indicator" echo " Status: $status" echo " 🟢 Running jobs: $running" echo " 🟡 Queued jobs: $queued" if [ "$running" -gt 0 ] && [ "$runners" != "" ]; then echo " 🖥️ Runners: $runners" fi if [ "$queue_min" -gt 0 ]; then echo " ⏱️ Queue time: ${queue_min} minutes" fi echo " 🔗 Run URL: $run_url" echo "" done # Summary for this PR pr_running_total=$(grep "^$pr_num|" "$pr_data_file" | cut -d'|' -f5 | awk '{sum+=$1} END {print sum+0}') pr_queued_total=$(grep "^$pr_num|" "$pr_data_file" | cut -d'|' -f6 | awk '{sum+=$1} END {print sum+0}') total_running=$((total_running + pr_running_total)) total_queued=$((total_queued + pr_queued_total)) echo " 📊 PR Total: $pr_running_total running, $pr_queued_total queued" echo "" done # --- Non-PR Runs Section --- non_pr_runs=$(grep '^NO_PR|' "$pr_data_file" 2>/dev/null || true) non_pr_running=0 non_pr_queued=0 if [ -n "$non_pr_runs" ]; then echo "=========================================" echo "📦 Non-PR Runs (manual / scheduled / other)" echo "=========================================" echo "" echo "$non_pr_runs" | while read -r line; do workflow=$(echo "$line" | cut -d'|' -f2) run_id=$(echo "$line" | cut -d'|' -f3) status=$(echo "$line" | cut -d'|' -f4) running=$(echo "$line" | cut -d'|' -f5) queued=$(echo "$line" | cut -d'|' -f6) runners=$(echo "$line" | cut -d'|' -f7) queue_min=$(echo "$line" | cut -d'|' -f8) created=$(echo "$line" | cut -d'|' -f9) attempt=$(echo "$line" | cut -d'|' -f11) event=$(echo "$line" | cut -d'|' -f12) branch=$(echo "$line" | cut -d'|' -f13) run_url="https://github.com/$REPO/actions/runs/$run_id" retry_count=$((attempt - 1)) retry_indicator="" if [ "$retry_count" -gt 0 ]; then retry_indicator=" 🔄 Retry #$retry_count" fi echo " 📦 Workflow: $workflow (Run #$run_id)$retry_indicator" echo " Event: $event" echo " Branch: $branch" echo " Status: $status" echo " 🟢 Running jobs: $running" echo " 🟡 Queued jobs: $queued" if [ "$running" -gt 0 ] && [ "$runners" != "" ]; then echo " 🖥️ Runners: $runners" fi if [ "$queue_min" -gt 0 ]; then echo " ⏱️ Queue time: ${queue_min} minutes" fi echo " 🔗 Run URL: $run_url" echo "" done non_pr_running=$(echo "$non_pr_runs" | cut -d'|' -f5 | awk '{sum+=$1} END {print sum+0}') non_pr_queued=$(echo "$non_pr_runs" | cut -d'|' -f6 | awk '{sum+=$1} END {print sum+0}') non_pr_count=$(echo "$non_pr_runs" | wc -l | tr -d ' ') total_running=$((total_running + non_pr_running)) total_queued=$((total_queued + non_pr_queued)) echo " 📊 Non-PR Total: $non_pr_running running, $non_pr_queued queued" echo "" fi # Overall summary echo "=========================================" echo "📈 Overall Summary" echo "=========================================" echo "Total PRs with active runs: $pr_count" echo "Total non-PR active runs: ${non_pr_count:-0}" echo "Total running jobs: $total_running" echo "Total queued jobs: $total_queued" echo "=========================================" # Cleanup rm -f "$pr_data_file"

View raw YAML

name: List Active Runs

on:
  workflow_dispatch:
    inputs:
      workflows:
        description: 'Space-separated list of workflow filenames to check'
        required: false
        type: string
        default: 'pr-test.yml'

permissions:
  actions: read
  contents: read
  pull-requests: read

jobs:
  list-active-runs:
    runs-on: ubuntu-latest
    steps:
      - name: Install GitHub CLI
        run: sudo apt-get install -y gh jq

      - name: List active runs grouped by PR
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          REPO: ${{ github.repository }}
          WORKFLOWS: ${{ github.event.inputs.workflows || 'pr-test.yml' }}
        shell: bash
        run: |
          set -euo pipefail

          echo "========================================="
          echo "🔍 Active Workflow Runs Report"
          echo "========================================="
          echo ""

          # Get all workflows or specific ones
          read -r -a workflow_files <<< "${WORKFLOWS}"
          echo "📋 Checking specified workflows: ${WORKFLOWS}"

          echo ""

          # Create a temporary file to store PR data
          pr_data_file=$(mktemp)

          # Process each workflow
          for workflow_file in ${workflow_files[@]}; do
            echo "Scanning workflow: $workflow_file"

            # Get all active runs (queued, waiting, in_progress)
            active_runs=$(gh run list \
              --repo "$REPO" \
              --workflow "$workflow_file" \
              --json databaseId,status,event,headBranch,createdAt,updatedAt,headSha,number,attempt \
              --limit 500 \
              | jq -c '.[] | select(.status=="queued" or .status=="waiting" or .status=="in_progress")')

            if [ -z "$active_runs" ]; then
              continue
            fi

            # Process each run
            echo "$active_runs" | while read -r run; do
              run_id=$(echo "$run" | jq -r '.databaseId')
              run_status=$(echo "$run" | jq -r '.status')
              run_event=$(echo "$run" | jq -r '.event')
              created_at=$(echo "$run" | jq -r '.createdAt')
              head_sha=$(echo "$run" | jq -r '.headSha')
              run_number=$(echo "$run" | jq -r '.number')
              run_attempt=$(echo "$run" | jq -r '.attempt // 1')

              # Get detailed run information including jobs
              run_details=$(gh api "repos/$REPO/actions/runs/$run_id" 2>/dev/null || true)

              if [ -z "$run_details" ]; then
                continue
              fi

              head_owner=$(echo "$run_details" | jq -r '.head_repository.owner.login // empty')
              head_branch=$(echo "$run_details" | jq -r '.head_branch // empty')

              if [ -z "$head_owner" ] || [ -z "$head_branch" ]; then
                continue
              fi

              # Find PR number (may be empty for non-PR runs)
              pr_number=$(gh api "repos/$REPO/pulls?state=open&head=${head_owner}:${head_branch}" \
                --jq '.[0].number // empty' 2>/dev/null || true)

              if [ -z "$pr_number" ]; then
                pr_number="NO_PR"
              fi

              # Get jobs for this run (with pagination to avoid missing jobs)
              jobs=$(gh api "repos/$REPO/actions/runs/$run_id/jobs" --paginate --jq '.jobs[]' | jq -s '.')

              running_jobs=$(echo "$jobs" | jq '[.[] | select(.status=="in_progress")] | length')
              queued_jobs=$(echo "$jobs" | jq '[.[] | select(.status=="queued" or .status=="waiting")] | length')

              # Get runner info for running jobs
              runners=$(echo "$jobs" | jq -r '.[] | select(.status=="in_progress") | .runner_name // "N/A"' | paste -sd "," -)

              # Calculate queue time
              current_time=$(date -u +%s)
              created_time=$(date -u -d "$created_at" +%s 2>/dev/null || echo "$current_time")
              queue_time=$((current_time - created_time))
              queue_minutes=$((queue_time / 60))

              # Store data in temporary file (unified format with event and branch)
              echo "$pr_number|$workflow_file|$run_id|$run_status|$running_jobs|$queued_jobs|$runners|$queue_minutes|$created_at|$head_sha|$run_attempt|$run_event|$head_branch" >> "$pr_data_file"
            done
          done

          echo ""
          echo "========================================="
          echo "📊 Active Runs Summary"
          echo "========================================="
          echo ""

          if [ ! -s "$pr_data_file" ]; then
            echo "✅ No active runs found"
            rm -f "$pr_data_file"
            exit 0
          fi

          # Get unique PR numbers (exclude NO_PR entries)
          pr_numbers=$(cut -d'|' -f1 < "$pr_data_file" | grep -v '^NO_PR$' | sort -u || true)

          # Separate high priority and normal PRs
          high_priority_prs=()
          normal_prs=()

          for pr_num in $pr_numbers; do
            labels=$(gh pr view "$pr_num" --repo "$REPO" --json labels \
              | jq -r '.labels[].name' 2>/dev/null || true)

            if echo "$labels" | grep -Fxq "high priority"; then
              high_priority_prs+=($pr_num)
            else
              normal_prs+=($pr_num)
            fi
          done

          # Combine: high priority first, then normal
          sorted_pr_numbers=("${high_priority_prs[@]}" "${normal_prs[@]}")

          pr_count=0
          total_running=0
          total_queued=0

          for pr_num in "${sorted_pr_numbers[@]}"; do
            pr_count=$((pr_count + 1))

            # Get PR details
            pr_info=$(gh pr view "$pr_num" --repo "$REPO" --json title,author,labels,url 2>/dev/null || true)

            if [ -z "$pr_info" ]; then
              continue
            fi

            pr_title=$(echo "$pr_info" | jq -r '.title')
            pr_author=$(echo "$pr_info" | jq -r '.author.login')
            pr_url=$(echo "$pr_info" | jq -r '.url')
            pr_labels=$(echo "$pr_info" | jq -r '.labels[].name' | paste -sd ", " -)

            if [ -z "$pr_labels" ]; then
              pr_labels="(no labels)"
            fi

            # Add priority indicator
            priority_indicator=""
            if echo "$pr_labels" | grep -q "high priority"; then
              priority_indicator="🔴 [HIGH PRIORITY] "
            fi

            echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
            echo "🔗 ${priority_indicator}PR #$pr_num: $pr_title"
            echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
            echo "👤 Author: $pr_author"
            echo "🏷️  Labels: $pr_labels"
            echo "🔗 URL: $pr_url"
            echo ""

            # Get all runs for this PR
            pr_runs=$(grep "^$pr_num|" "$pr_data_file")

            pr_running_total=0
            pr_queued_total=0

            echo "$pr_runs" | while read -r line; do
              workflow=$(echo "$line" | cut -d'|' -f2)
              run_id=$(echo "$line" | cut -d'|' -f3)
              status=$(echo "$line" | cut -d'|' -f4)
              running=$(echo "$line" | cut -d'|' -f5)
              queued=$(echo "$line" | cut -d'|' -f6)
              runners=$(echo "$line" | cut -d'|' -f7)
              queue_min=$(echo "$line" | cut -d'|' -f8)
              created=$(echo "$line" | cut -d'|' -f9)
              attempt=$(echo "$line" | cut -d'|' -f11)

              pr_running_total=$((pr_running_total + running))
              pr_queued_total=$((pr_queued_total + queued))

              run_url="https://github.com/$REPO/actions/runs/$run_id"

              # Calculate retry count for this specific run
              retry_count=$((attempt - 1))

              # Show retry indicator
              retry_indicator=""
              if [ "$retry_count" -gt 0 ]; then
                retry_indicator=" 🔄 Retry #$retry_count"
              fi

              echo "  📦 Workflow: $workflow (Run #$run_id)$retry_indicator"
              echo "     Status: $status"
              echo "     🟢 Running jobs: $running"
              echo "     🟡 Queued jobs: $queued"

              if [ "$running" -gt 0 ] && [ "$runners" != "" ]; then
                echo "     🖥️  Runners: $runners"
              fi

              if [ "$queue_min" -gt 0 ]; then
                echo "     ⏱️  Queue time: ${queue_min} minutes"
              fi

              echo "     🔗 Run URL: $run_url"
              echo ""
            done

            # Summary for this PR
            pr_running_total=$(grep "^$pr_num|" "$pr_data_file" | cut -d'|' -f5 | awk '{sum+=$1} END {print sum+0}')
            pr_queued_total=$(grep "^$pr_num|" "$pr_data_file" | cut -d'|' -f6 | awk '{sum+=$1} END {print sum+0}')

            total_running=$((total_running + pr_running_total))
            total_queued=$((total_queued + pr_queued_total))

            echo "  📊 PR Total: $pr_running_total running, $pr_queued_total queued"
            echo ""
          done

          # --- Non-PR Runs Section ---
          non_pr_runs=$(grep '^NO_PR|' "$pr_data_file" 2>/dev/null || true)
          non_pr_running=0
          non_pr_queued=0

          if [ -n "$non_pr_runs" ]; then
            echo "========================================="
            echo "📦 Non-PR Runs (manual / scheduled / other)"
            echo "========================================="
            echo ""

            echo "$non_pr_runs" | while read -r line; do
              workflow=$(echo "$line" | cut -d'|' -f2)
              run_id=$(echo "$line" | cut -d'|' -f3)
              status=$(echo "$line" | cut -d'|' -f4)
              running=$(echo "$line" | cut -d'|' -f5)
              queued=$(echo "$line" | cut -d'|' -f6)
              runners=$(echo "$line" | cut -d'|' -f7)
              queue_min=$(echo "$line" | cut -d'|' -f8)
              created=$(echo "$line" | cut -d'|' -f9)
              attempt=$(echo "$line" | cut -d'|' -f11)
              event=$(echo "$line" | cut -d'|' -f12)
              branch=$(echo "$line" | cut -d'|' -f13)

              run_url="https://github.com/$REPO/actions/runs/$run_id"

              retry_count=$((attempt - 1))
              retry_indicator=""
              if [ "$retry_count" -gt 0 ]; then
                retry_indicator=" 🔄 Retry #$retry_count"
              fi

              echo "  📦 Workflow: $workflow (Run #$run_id)$retry_indicator"
              echo "     Event: $event"
              echo "     Branch: $branch"
              echo "     Status: $status"
              echo "     🟢 Running jobs: $running"
              echo "     🟡 Queued jobs: $queued"

              if [ "$running" -gt 0 ] && [ "$runners" != "" ]; then
                echo "     🖥️  Runners: $runners"
              fi

              if [ "$queue_min" -gt 0 ]; then
                echo "     ⏱️  Queue time: ${queue_min} minutes"
              fi

              echo "     🔗 Run URL: $run_url"
              echo ""
            done

            non_pr_running=$(echo "$non_pr_runs" | cut -d'|' -f5 | awk '{sum+=$1} END {print sum+0}')
            non_pr_queued=$(echo "$non_pr_runs" | cut -d'|' -f6 | awk '{sum+=$1} END {print sum+0}')
            non_pr_count=$(echo "$non_pr_runs" | wc -l | tr -d ' ')

            total_running=$((total_running + non_pr_running))
            total_queued=$((total_queued + non_pr_queued))

            echo "  📊 Non-PR Total: $non_pr_running running, $non_pr_queued queued"
            echo ""
          fi

          # Overall summary
          echo "========================================="
          echo "📈 Overall Summary"
          echo "========================================="
          echo "Total PRs with active runs: $pr_count"
          echo "Total non-PR active runs: ${non_pr_count:-0}"
          echo "Total running jobs: $total_running"
          echo "Total queued jobs: $total_queued"
          echo "========================================="

          # Cleanup
          rm -f "$pr_data_file"

nightly-link-check .github/workflows/nightly-link-check.yml

Triggers: schedule, workflow_dispatch
Runs on: ubuntu-latest
Jobs: lychee-online
Actions: lycheeverse/lychee-action

View raw YAML

name: Nightly Link Check

on:
  schedule:
    - cron: "0 2 * * *"
  workflow_dispatch:

concurrency:
  group: nightly-link-check-${{ github.ref }}
  cancel-in-progress: true

jobs:
  lychee-online:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    timeout-minutes: 20
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Run lychee online link checks
        uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2
        with:
          fail: true
          args: >-
            --config .github/linters/lychee-ci.toml
            README.md
            docs/**/*.md
            docs/**/*.rst
            docs/**/*.ipynb
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

nightly-release-gateway matrix .github/workflows/nightly-release-gateway.yml

Triggers

schedule, workflow_dispatch

Runs on

${{ matrix.os }}-latest, ubuntu-latest, ubuntu-latest

Jobs

build, build-sdist, upload

Matrix

exclude, exclude.os, exclude.target, include, include.interpreter, include.ls, include.manylinux, include.os, include.platform, include.python-architecture, include.target, manylinux, os, target→ 3.9 3.10 3.11 3.12 3.13, aarch64, auto, dir, linux, macos, musllinux_1_1, ubuntu, windows, x64, x86_64

Actions

PyO3/maturin-action, PyO3/maturin-action

Commands

mv sglang-repo/sgl-model-gateway/* . rm -rf sglang-repo ls -alt
# Get current version from pyproject.toml CURRENT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])" 2>/dev/null || python -c "import tomli; print(tomli.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])") # Create nightly version with date: e.g., 0.2.1.dev20250128 NIGHTLY_VERSION="${CURRENT_VERSION}.dev$(date +%Y%m%d)" echo "Nightly version: $NIGHTLY_VERSION" # Update pyproject.toml with nightly version (temporary, not committed) sed -i.bak "s/version = \"${CURRENT_VERSION}\"/version = \"${NIGHTLY_VERSION}\"/" bindings/python/pyproject.toml # Verify the change cat bindings/python/pyproject.toml | grep "^version"
pip install -U twine tomli
brew install protobuf
choco install protoc -y
${{ matrix.ls || 'ls -lh' }} bindings/python/dist/
twine check --strict bindings/python/dist/*
mv sglang-repo/sgl-model-gateway/* . rm -rf sglang-repo ls -alt

View raw YAML

# Nightly release workflow for SGLang Model Gateway

name: Nightly Release SGLang Model Gateway to PyPI

on:
  schedule:
    # Run at 2 AM UTC every day
    - cron: '0 2 * * *'
  workflow_dispatch:  # Allow manual trigger

jobs:
  build:
    name: build on ${{ matrix.platform || matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }})
    runs-on: ${{ matrix.os }}-latest
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu, macos, windows]
        target: [x86_64, aarch64]
        manylinux: [auto]
        include:
          - os: ubuntu
            platform: linux
          - os: windows
            ls: dir
            target: x86_64
            python-architecture: x64
            interpreter: 3.9 3.10 3.11 3.12 3.13
          - os: macos
            target: aarch64
            interpreter: 3.9 3.10 3.11 3.12 3.13
          - os: ubuntu
            platform: linux
            target: aarch64
          # musllinux
          - os: ubuntu
            platform: linux
            target: x86_64
            manylinux: musllinux_1_1
          - os: ubuntu
            platform: linux
            target: aarch64
            manylinux: musllinux_1_1
        exclude:
          - os: windows
            target: aarch64

    steps:
      - uses: actions/checkout@v4
        with:
          path: sglang-repo

      - name: Move sgl-model-gateway folder to root and delete sglang-repo
        run: |
          mv sglang-repo/sgl-model-gateway/* .
          rm -rf sglang-repo
          ls -alt
        shell: bash

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.13"
          architecture: ${{ matrix.python-architecture || 'x64' }}

      - name: Modify version for nightly release
        run: |
          # Get current version from pyproject.toml
          CURRENT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])" 2>/dev/null || python -c "import tomli; print(tomli.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])")
          # Create nightly version with date: e.g., 0.2.1.dev20250128
          NIGHTLY_VERSION="${CURRENT_VERSION}.dev$(date +%Y%m%d)"
          echo "Nightly version: $NIGHTLY_VERSION"

          # Update pyproject.toml with nightly version (temporary, not committed)
          sed -i.bak "s/version = \"${CURRENT_VERSION}\"/version = \"${NIGHTLY_VERSION}\"/" bindings/python/pyproject.toml

          # Verify the change
          cat bindings/python/pyproject.toml | grep "^version"
        shell: bash

      - name: Install twine and tomli
        run: pip install -U twine tomli

      - name: Install protoc (macOS)
        if: matrix.os == 'macos'
        run: brew install protobuf

      - name: Install protoc (Windows)
        if: matrix.os == 'windows'
        run: choco install protoc -y

      - name: Build wheels
        uses: PyO3/maturin-action@v1
        with:
          working-directory: bindings/python
          target: ${{ matrix.target }}
          manylinux: ${{ matrix.manylinux || 'auto' }}
          args: --release --out dist --features vendored-openssl --interpreter ${{ matrix.interpreter || '3.9 3.10 3.11 3.12 3.13 3.14' }}
          rust-toolchain: stable
          docker-options: -e CI -e CC_aarch64_unknown_linux_gnu=aarch64-linux-gnu-gcc -e CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++
          before-script-linux: |
            # Install build dependencies (perl/make for vendored OpenSSL, protoc for gRPC)
            if command -v yum &> /dev/null; then
              yum update -y && yum install -y wget unzip gcc gcc-c++ perl-core make
              # Install cross-compilation toolchain for aarch64 if needed
              if [ "${{ matrix.target }}" = "aarch64" ]; then
                yum install -y gcc-aarch64-linux-gnu gcc-c++-aarch64-linux-gnu || true
              fi
            elif command -v apt-get &> /dev/null; then
              apt-get update && apt-get install -y wget unzip gcc g++ perl make
              # Install cross-compilation toolchain for aarch64 if needed
              if [ "${{ matrix.target }}" = "aarch64" ]; then
                apt-get install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu || true
              fi
            fi
            (cd /tmp && \
             wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip && \
             unzip protoc-32.0-linux-x86_64.zip -d /usr/local && \
             rm protoc-32.0-linux-x86_64.zip)
            protoc --version

      - name: List built packages
        run: ${{ matrix.ls || 'ls -lh' }} bindings/python/dist/

      - name: Check packages
        run: twine check --strict bindings/python/dist/*

      - uses: actions/upload-artifact@v4
        with:
          name: packages-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux || 'auto' }}
          path: bindings/python/dist/

  build-sdist:
    name: Build SDist
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          path: sglang-repo

      - name: Move sgl-model-gateway folder to root and delete sglang-repo
        run: |
          mv sglang-repo/sgl-model-gateway/* .
          rm -rf sglang-repo
          ls -alt

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.13"

      - name: Modify version for nightly release
        run: |
          # Get current version from pyproject.toml
          CURRENT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])" 2>/dev/null || python -c "import tomli; print(tomli.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])")
          # Create nightly version with date: e.g., 0.2.1.dev20250128
          NIGHTLY_VERSION="${CURRENT_VERSION}.dev$(date +%Y%m%d)"
          echo "Nightly version: $NIGHTLY_VERSION"

          # Update pyproject.toml with nightly version (temporary, not committed)
          sed -i "s/version = \"${CURRENT_VERSION}\"/version = \"${NIGHTLY_VERSION}\"/" bindings/python/pyproject.toml

          # Verify the change
          cat bindings/python/pyproject.toml | grep "^version"

      - name: Build SDist
        uses: PyO3/maturin-action@v1
        with:
          working-directory: bindings/python
          command: sdist
          args: --out dist
          rust-toolchain: stable

      - uses: actions/upload-artifact@v4
        with:
          name: sdist
          path: bindings/python/dist/*.tar.gz

  upload:
    name: Upload to TestPyPI
    if: github.repository == 'sgl-project/sglang'  # Ensure this job only runs for the sgl-project/sglang repository
    needs: [build, build-sdist]
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v4
        with:
          path: dist
          merge-multiple: true

      - name: Upload to TestPyPI
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN_ROUTER }}
        run: |
          pip install twine
          twine upload --repository testpypi dist/* --verbose

nightly-test-amd .github/workflows/nightly-test-amd.yml

Triggers

schedule, push, workflow_dispatch, workflow_call

Runs on

linux-mi325-1gpu-sglang, linux-mi325-2gpu-sglang, linux-mi325-2gpu-sglang, linux-mi325-2gpu-sglang, linux-mi325-2gpu-sglang, linux-mi325-4gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-8gpu-sglang, linux-mi325-1gpu-sglang, linux-mi35x-gpu-1, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, linux-mi35x-gpu-8, ubuntu-latest

Jobs

nightly-test-1-gpu-unit, nightly-accuracy-2-gpu, nightly-accuracy-2-gpu-vlm, nightly-perf-2-gpu-text, nightly-perf-2-gpu-vlm, nightly-4-gpu, nightly-accuracy-8-gpu, nightly-8-gpu-grok1-int4, nightly-8-gpu-grok2, nightly-8-gpu-deepseek-v31, nightly-8-gpu-deepseek-v32, nightly-8-gpu-deepseek-v32-mtp, nightly-8-gpu-deepseek-v3-kv-fp8, nightly-8-gpu-kimi-k25, nightly-8-gpu-qwen3-235b, nightly-8-gpu-qwen35, nightly-8-gpu-glm5, nightly-8-gpu-minimax-m25, nightly-1-gpu-zimage-turbo, nightly-test-1-gpu-mi35x, nightly-accuracy-8-gpu-mi35x, nightly-8-gpu-mi35x-grok1-int4, nightly-8-gpu-mi35x-grok2, nightly-8-gpu-mi35x-deepseek-r1-mxfp4, nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8, nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion, nightly-accuracy-8-gpu-mi35x-deepseek-v32, nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp, nightly-perf-8-gpu-mi35x-deepseek-v32-basic, nightly-8-gpu-mi35x-kimi-k25, nightly-8-gpu-mi35x-qwen3-235b-mxfp4, nightly-8-gpu-mi35x-qwen35, nightly-8-gpu-mi35x-glm5, nightly-8-gpu-mi35x-minimax-m25, nightly-perf-8-gpu-mi35x-deepseek-v32-mtp, check-all-jobs

Commands

touch github_summary.md bash scripts/ci/amd/amd_ci_start_container.sh
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true exit ${TEST_EXIT_CODE:-0}
touch github_summary.md bash scripts/ci/amd/amd_ci_start_container.sh
bash scripts/ci/amd/amd_ci_install_dependency.sh
> github_summary.md # Clear summary file bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true exit ${TEST_EXIT_CODE:-0}
touch github_summary.md bash scripts/ci/amd/amd_ci_start_container.sh
bash scripts/ci/amd/amd_ci_install_dependency.sh

View raw YAML

name: Nightly Test (AMD)

on:
  schedule:
    - cron: '30 17 * * *'
  push:
    branches:
      - main
    paths:
      - "python/sglang/version.py"
  workflow_dispatch:
    inputs:
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: true
      job_select:
        description: 'Select a job to run from dropdown (choose "all" to run all jobs)'
        required: false
        type: choice
        default: 'all'
        options:
          - 'all'
          - nightly-test-1-gpu-unit
          - nightly-accuracy-2-gpu
          - nightly-accuracy-2-gpu-vlm
          - nightly-perf-2-gpu-text
          - nightly-perf-2-gpu-vlm
          - nightly-4-gpu
          - nightly-accuracy-8-gpu
          - nightly-8-gpu-grok1-int4
          - nightly-8-gpu-grok2
          - nightly-8-gpu-deepseek-v31
          - nightly-8-gpu-deepseek-v32
          - nightly-8-gpu-deepseek-v32-mtp
          - nightly-8-gpu-deepseek-v3-kv-fp8
          - nightly-8-gpu-kimi-k25
          - nightly-8-gpu-qwen3-235b
          - nightly-8-gpu-qwen35
          - nightly-8-gpu-glm5
          - nightly-8-gpu-minimax-m25
          - nightly-1-gpu-zimage-turbo
          - nightly-test-1-gpu-mi35x
          - nightly-accuracy-8-gpu-mi35x
          - nightly-8-gpu-mi35x-grok1-int4
          - nightly-8-gpu-mi35x-grok2
          - nightly-8-gpu-mi35x-deepseek-r1-mxfp4
          - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
          - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
          - nightly-accuracy-8-gpu-mi35x-deepseek-v32
          - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
          - nightly-perf-8-gpu-mi35x-deepseek-v32-basic
          - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
          - nightly-8-gpu-mi35x-kimi-k25
          - nightly-8-gpu-mi35x-qwen3-235b-mxfp4
          - nightly-8-gpu-mi35x-qwen35
          - nightly-8-gpu-mi35x-glm5
          - nightly-8-gpu-mi35x-minimax-m25
      job_filter:
        description: 'Or type comma-separated job names (overrides dropdown if non-empty)'
        required: false
        type: string
        default: ''
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      job_filter:
        description: 'Select which job to run (leave empty or "all" to run all jobs)'
        required: false
        type: string
        default: 'all'
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: true

env:
  AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}

concurrency:
  # When called via workflow_call with ref set, use a unique group per caller run to avoid
  # collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name)
  # to detect this, because github.event_name inherits from the caller in workflow_call.
  group: nightly-test-amd-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }}
  cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }}

jobs:
  # ============================================== MI30x Unit Tests ==============================================
  # 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x only)
  nightly-test-1-gpu-unit:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit,'))
    runs-on: linux-mi325-1gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Nightly Unit Test (1-GPU)
        timeout-minutes: 90
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x Accuracy Tests ==============================================
  # 2-GPU Accuracy Tests - GSM8K eval (MI30x only)
  nightly-accuracy-2-gpu:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Nightly Test (2-GPU)
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation
  nightly-accuracy-2-gpu-vlm:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Nightly Accuracy Test (2-GPU VLM MMMU)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 2-GPU Text Models Performance Tests
  nightly-perf-2-gpu-text:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Performance Test (2-GPU Text Models)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 2-GPU VLM Performance Tests
  nightly-perf-2-gpu-vlm:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Performance Test (2-GPU VLM Models)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x 4-GPU Tests ==============================================
  # 4-GPU Nightly Tests - Dumper/Comparator E2E, VLM Encoder DP
  nightly-4-gpu:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu,'))
    runs-on: linux-mi325-4gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Nightly Test (4-GPU)
        timeout-minutes: 120
        run: |
          > github_summary.md
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --continue-on-error --timeout-per-file 3600 || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (accuracy only)
  nightly-accuracy-8-gpu:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU GPT-OSS)
        timeout-minutes: 180
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Accuracy Test (8-GPU Grok1-FP8)
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x Combined Accuracy + Performance Tests ==============================================
  # 8-GPU Grok1-INT4 (Accuracy + Performance combined)
  nightly-8-gpu-grok1-int4:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU Grok1-INT4)
        timeout-minutes: 60
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test (8-GPU Grok1-INT4)
        timeout-minutes: 60
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Grok2 (Accuracy + Performance combined)
  nightly-8-gpu-grok2:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU Grok2)
        timeout-minutes: 60
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test (8-GPU Grok2)
        timeout-minutes: 60
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3.1 (Accuracy + Performance combined)
  nightly-8-gpu-deepseek-v31:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU DeepSeek-V3.1)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test (8-GPU DeepSeek-V3.1)
        timeout-minutes: 300
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_ROCM700A=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf)
  nightly-8-gpu-deepseek-v32:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU DeepSeek-V3.2 Basic)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test (8-GPU DeepSeek-V3.2 Basic)
        timeout-minutes: 150
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf)
  nightly-8-gpu-deepseek-v32-mtp:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU DeepSeek-V3.2 MTP)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test (8-GPU DeepSeek-V3.2 MTP)
        timeout-minutes: 180
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3)
  nightly-8-gpu-deepseek-v3-kv-fp8:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: DeepSeek-V3 KV FP8 Test (8-GPU Basic + MTP)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Kimi-K2.5 (Accuracy)
  nightly-8-gpu-kimi-k25:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU Kimi-K2.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  nightly-8-gpu-qwen3-235b:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test + Performance Test (8-GPU Qwen3)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Qwen 3.5 (Accuracy)
  nightly-8-gpu-qwen35:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"

      - name: Accuracy Test (8-GPU Qwen 3.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  nightly-8-gpu-glm5:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75

      - name: Accuracy Test (8-GPU GLM-5 NSA)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU MiniMax-M2.5 (Accuracy)
  nightly-8-gpu-minimax-m25:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Accuracy Test (8-GPU MiniMax-M2.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x Diffusion Tests ==============================================
  # 1-GPU Z-Image-Turbo (Diffusion T2I)
  nightly-1-gpu-zimage-turbo:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo,'))
    runs-on: linux-mi325-1gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Z-Image-Turbo Diffusion Test (1-GPU)
        timeout-minutes: 45
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            -e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \
            pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Upload generated images
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: zimage-turbo-outputs
          path: diffusion-artifacts/
          if-no-files-found: ignore
          retention-days: 30

  # ============================================== MI35x Tests ==============================================
  # MI35x 1-GPU tests - platform-agnostic tests that may work on CDNA4 (gfx950)
  nightly-test-1-gpu-mi35x:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x,'))
    runs-on: linux-mi35x-gpu-1
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Nightly Test MI35x (1-GPU)
        timeout-minutes: 90
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Accuracy Tests - GPT-OSS (accuracy only)
  nightly-accuracy-8-gpu-mi35x:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU GPT-OSS)
        timeout-minutes: 180
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Grok1-INT4 (Accuracy + Performance combined)
  nightly-8-gpu-mi35x-grok1-int4:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU Grok1-INT4)
        timeout-minutes: 90
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x (8-GPU Grok1-INT4)
        timeout-minutes: 60
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Grok2 (Accuracy + Performance combined)
  nightly-8-gpu-mi35x-grok2:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU Grok2)
        timeout-minutes: 60
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x (8-GPU Grok2)
        timeout-minutes: 60
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance combined)
  nightly-8-gpu-mi35x-deepseek-r1-mxfp4:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4)
        timeout-minutes: 300
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance combined)
  nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
        timeout-minutes: 300
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance combined)
  nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
        timeout-minutes: 300
        continue-on-error: true  # Perf test failure doesn't fail the job if accuracy passed
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 Accuracy Test
  nightly-accuracy-8-gpu-mi35x-deepseek-v32:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test
  nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2 TP+MTP)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic)
  nightly-perf-8-gpu-mi35x-deepseek-v32-basic:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Performance Test MI35x (8-GPU DeepSeek-V3.2 Basic)
        timeout-minutes: 150
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Kimi-K2.5 (Accuracy)
  nightly-8-gpu-mi35x-kimi-k25:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU Kimi-K2.5)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance)
  nightly-8-gpu-mi35x-qwen3-235b-mxfp4:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test + Performance Test MI35x (8-GPU Qwen3-235B-MXFP4)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Qwen 3.5 (Accuracy)
  nightly-8-gpu-mi35x-qwen35:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
          bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"

      - name: Accuracy Test MI35x (8-GPU Qwen 3.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  nightly-8-gpu-mi35x-glm5:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
          bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75

      - name: Accuracy Test MI35x (8-GPU GLM-5 NSA)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU MiniMax-M2.5 (Accuracy)
  nightly-8-gpu-mi35x-minimax-m25:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x (8-GPU MiniMax-M2.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP)
  nightly-perf-8-gpu-mi35x-deepseek-v32-mtp:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Performance Test MI35x (8-GPU DeepSeek-V3.2 MTP)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  check-all-jobs:
    if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
    needs:
      # MI30x Unit Tests
      - nightly-test-1-gpu-unit
      # MI30x Accuracy Tests
      - nightly-accuracy-2-gpu
      - nightly-accuracy-2-gpu-vlm
      # MI30x 4-GPU Tests
      - nightly-4-gpu
      - nightly-accuracy-8-gpu
      # MI30x Performance Tests - excluded from check (perf failures don't block CI)
      # - nightly-perf-2-gpu-text
      # - nightly-perf-2-gpu-vlm
      # MI30x Combined Accuracy + Performance Tests
      - nightly-8-gpu-grok1-int4
      - nightly-8-gpu-grok2
      - nightly-8-gpu-deepseek-v31
      - nightly-8-gpu-deepseek-v32
      - nightly-8-gpu-deepseek-v32-mtp
      - nightly-8-gpu-deepseek-v3-kv-fp8
      - nightly-8-gpu-kimi-k25
      - nightly-8-gpu-qwen3-235b
      - nightly-8-gpu-qwen35
      - nightly-8-gpu-glm5
      - nightly-8-gpu-minimax-m25
      # MI30x Diffusion Tests
      - nightly-1-gpu-zimage-turbo
      # MI35x jobs
      - nightly-test-1-gpu-mi35x
      - nightly-accuracy-8-gpu-mi35x
      - nightly-8-gpu-mi35x-grok1-int4
      - nightly-8-gpu-mi35x-grok2
      - nightly-8-gpu-mi35x-deepseek-r1-mxfp4
      - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
      - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
      - nightly-accuracy-8-gpu-mi35x-deepseek-v32
      - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
      - nightly-8-gpu-mi35x-kimi-k25
      - nightly-8-gpu-mi35x-qwen3-235b-mxfp4
      - nightly-8-gpu-mi35x-qwen35
      - nightly-8-gpu-mi35x-glm5
      - nightly-8-gpu-mi35x-minimax-m25
      # MI35x perf jobs excluded from check - perf failures don't block CI
      # - nightly-perf-8-gpu-mi35x-deepseek-v32-basic
      # - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
    runs-on: ubuntu-latest
    steps:
      - name: Check if any job failed
        run: |
          if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
            echo "One or more nightly test jobs failed"
            exit 1
          fi
          if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
            echo "One or more nightly test jobs were cancelled"
            exit 1
          fi
          echo "All nightly test jobs passed"

nightly-test-amd-rocm720 .github/workflows/nightly-test-amd-rocm720.yml

Triggers

schedule, push, workflow_dispatch, workflow_call

Runs on

Jobs

nightly-test-1-gpu-unit-rocm720, nightly-accuracy-2-gpu-rocm720, nightly-accuracy-2-gpu-vlm-rocm720, nightly-perf-2-gpu-text-rocm720, nightly-perf-2-gpu-vlm-rocm720, nightly-4-gpu-rocm720, nightly-accuracy-8-gpu-rocm720, nightly-8-gpu-grok1-int4-rocm720, nightly-8-gpu-grok2-rocm720, nightly-8-gpu-deepseek-v31-rocm720, nightly-8-gpu-deepseek-v32-rocm720, nightly-8-gpu-deepseek-v32-mtp-rocm720, nightly-8-gpu-deepseek-v3-kv-fp8-rocm720, nightly-8-gpu-kimi-k25-rocm720, nightly-8-gpu-qwen3-235b-rocm720, nightly-8-gpu-qwen35-rocm720, nightly-8-gpu-glm5-rocm720, nightly-8-gpu-minimax-m25-rocm720, nightly-1-gpu-zimage-turbo-rocm720, nightly-test-1-gpu-mi35x-rocm720, nightly-accuracy-8-gpu-mi35x-rocm720, nightly-8-gpu-mi35x-grok1-int4-rocm720, nightly-8-gpu-mi35x-grok2-rocm720, nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720, nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720, nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720, nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720, nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720, nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720, nightly-8-gpu-mi35x-kimi-k25-rocm720, nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720, nightly-8-gpu-mi35x-qwen35-rocm720, nightly-8-gpu-mi35x-glm5-rocm720, nightly-8-gpu-mi35x-glm47-fp8-rocm720, nightly-8-gpu-mi35x-minimax-m25-rocm720, nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720, check-all-jobs

Commands

touch github_summary.md bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true exit ${TEST_EXIT_CODE:-0}
touch github_summary.md bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
bash scripts/ci/amd/amd_ci_install_dependency.sh
> github_summary.md # Clear summary file bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true exit ${TEST_EXIT_CODE:-0}
touch github_summary.md bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
bash scripts/ci/amd/amd_ci_install_dependency.sh

View raw YAML

name: Nightly Test (AMD ROCm 7.2)

on:
  schedule:
    - cron: '30 17 * * *'
  push:
    branches:
      - main
    paths:
      - "python/sglang/version.py"
  workflow_dispatch:
    inputs:
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: true
      job_select:
        description: 'Select a job to run from dropdown (choose "all" to run all jobs)'
        required: false
        type: choice
        default: 'all'
        options:
          - 'all'
          - nightly-test-1-gpu-unit-rocm720
          - nightly-accuracy-2-gpu-rocm720
          - nightly-accuracy-2-gpu-vlm-rocm720
          - nightly-perf-2-gpu-text-rocm720
          - nightly-perf-2-gpu-vlm-rocm720
          - nightly-4-gpu-rocm720
          - nightly-accuracy-8-gpu-rocm720
          - nightly-8-gpu-grok1-int4-rocm720
          - nightly-8-gpu-grok2-rocm720
          - nightly-8-gpu-deepseek-v31-rocm720
          - nightly-8-gpu-deepseek-v32-rocm720
          - nightly-8-gpu-deepseek-v32-mtp-rocm720
          - nightly-8-gpu-deepseek-v3-kv-fp8-rocm720
          - nightly-8-gpu-kimi-k25-rocm720
          - nightly-8-gpu-qwen3-235b-rocm720
          - nightly-8-gpu-qwen35-rocm720
          - nightly-8-gpu-glm5-rocm720
          - nightly-8-gpu-minimax-m25-rocm720
          - nightly-1-gpu-zimage-turbo-rocm720
          - nightly-test-1-gpu-mi35x-rocm720
          - nightly-accuracy-8-gpu-mi35x-rocm720
          - nightly-8-gpu-mi35x-grok1-int4-rocm720
          - nightly-8-gpu-mi35x-grok2-rocm720
          - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720
          - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720
          - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720
          - nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720
          - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720
          - nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720
          - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720
          - nightly-8-gpu-mi35x-kimi-k25-rocm720
          - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720
          - nightly-8-gpu-mi35x-qwen35-rocm720
          - nightly-8-gpu-mi35x-glm5-rocm720
          - nightly-8-gpu-mi35x-glm47-fp8-rocm720
          - nightly-8-gpu-mi35x-minimax-m25-rocm720
      job_filter:
        description: 'Or type comma-separated job names (overrides dropdown if non-empty)'
        required: false
        type: string
        default: ''
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      job_filter:
        description: 'Select which job to run (leave empty or "all" to run all jobs)'
        required: false
        type: string
        default: 'all'
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: true

env:
  AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}

concurrency:
  # When called via workflow_call with ref set, use a unique group per caller run to avoid
  # collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name)
  # to detect this, because github.event_name inherits from the caller in workflow_call.
  group: nightly-test-amd-rocm720-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }}
  cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }}

jobs:
  # ============================================== MI30x ROCm 7.2 Unit Tests ==============================================
  # 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x ROCm 7.2)
  nightly-test-1-gpu-unit-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit-rocm720,'))
    runs-on: linux-mi325-1gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Nightly Unit Test ROCm 7.2 (1-GPU)
        timeout-minutes: 90
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x ROCm 7.2 Accuracy Tests ==============================================
  # 2-GPU Accuracy Tests - GSM8K eval (MI30x ROCm 7.2)
  nightly-accuracy-2-gpu-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-rocm720,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Nightly Test ROCm 7.2 (2-GPU)
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation (ROCm 7.2)
  nightly-accuracy-2-gpu-vlm-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm-rocm720,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Nightly Accuracy Test ROCm 7.2 (2-GPU VLM MMMU)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 2-GPU Text Models Performance Tests (ROCm 7.2)
  nightly-perf-2-gpu-text-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text-rocm720,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Performance Test ROCm 7.2 (2-GPU Text Models)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 2-GPU VLM Performance Tests (ROCm 7.2)
  nightly-perf-2-gpu-vlm-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm-rocm720,'))
    runs-on: linux-mi325-2gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Performance Test ROCm 7.2 (2-GPU VLM Models)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x ROCm 7.2 4-GPU Tests ==============================================
  # 4-GPU Nightly Tests - Dumper/Comparator E2E, VLM Encoder DP (ROCm 7.2)
  nightly-4-gpu-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu-rocm720,'))
    runs-on: linux-mi325-4gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Nightly Test ROCm 7.2 (4-GPU)
        timeout-minutes: 120
        run: |
          > github_summary.md
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --continue-on-error --timeout-per-file 3600 || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (ROCm 7.2)
  nightly-accuracy-8-gpu-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU GPT-OSS)
        timeout-minutes: 180
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Accuracy Test ROCm 7.2 (8-GPU Grok1-FP8)
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x ROCm 7.2 Combined Accuracy + Performance Tests ==============================================
  # 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-grok1-int4-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU Grok1-INT4)
        timeout-minutes: 60
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test ROCm 7.2 (8-GPU Grok1-INT4)
        timeout-minutes: 60
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-grok2-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU Grok2)
        timeout-minutes: 60
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test ROCm 7.2 (8-GPU Grok2)
        timeout-minutes: 60
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3.1 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-deepseek-v31-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.1)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.1)
        timeout-minutes: 300
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_ROCM700A=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf) ROCm 7.2
  nightly-8-gpu-deepseek-v32-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
        timeout-minutes: 150
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf) ROCm 7.2
  nightly-8-gpu-deepseek-v32-mtp-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
        timeout-minutes: 180
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3) ROCm 7.2
  nightly-8-gpu-deepseek-v3-kv-fp8-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: DeepSeek-V3 KV FP8 Test ROCm 7.2 (8-GPU Basic + MTP)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2
  nightly-8-gpu-kimi-k25-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU Kimi-K2.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Qwen3-235B (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-qwen3-235b-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test + Performance Test ROCm 7.2 (8-GPU Qwen3)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2
  nightly-8-gpu-qwen35-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps
          bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"

      - name: Accuracy Test ROCm 7.2 (8-GPU Qwen 3.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU GLM-5 (Accuracy) ROCm 7.2
  nightly-8-gpu-glm5-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75

      - name: Accuracy Test ROCm 7.2 (8-GPU GLM-5 NSA)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
  nightly-8-gpu-minimax-m25-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25-rocm720,'))
    runs-on: linux-mi325-8gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps

      - name: Accuracy Test ROCm 7.2 (8-GPU MiniMax-M2.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # ============================================== MI30x ROCm 7.2 Diffusion Tests ==============================================
  # 1-GPU Z-Image-Turbo (Diffusion T2I) ROCm 7.2
  nightly-1-gpu-zimage-turbo-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo-rocm720,'))
    runs-on: linux-mi325-1gpu-sglang
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Z-Image-Turbo Diffusion Test ROCm 7.2 (1-GPU)
        timeout-minutes: 45
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            -e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \
            pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Upload generated images
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: zimage-turbo-outputs-rocm720
          path: diffusion-artifacts/
          if-no-files-found: ignore
          retention-days: 30

  # ============================================== MI35x ROCm 7.2 Tests ==============================================
  # MI35x 1-GPU ROCm 7.2 tests
  nightly-test-1-gpu-mi35x-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x-rocm720,'))
    runs-on: linux-mi35x-gpu-1
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Nightly Test MI35x ROCm 7.2 (1-GPU)
        timeout-minutes: 90
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Accuracy Tests - GPT-OSS (ROCm 7.2)
  nightly-accuracy-8-gpu-mi35x-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GPT-OSS)
        timeout-minutes: 180
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-mi35x-grok1-int4-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok1-INT4)
        timeout-minutes: 60
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x ROCm 7.2 (8-GPU Grok1-INT4)
        timeout-minutes: 60
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-mi35x-grok2-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok2)
        timeout-minutes: 60
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x ROCm 7.2 (8-GPU Grok2)
        timeout-minutes: 60
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e RCCL_MSCCL_ENABLE=0 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4)
        timeout-minutes: 300
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8)
        timeout-minutes: 300
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

      - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
        timeout-minutes: 300
        continue-on-error: true
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 Accuracy Test (ROCm 7.2)
  nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test (ROCm 7.2)
  nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 TP+MTP)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic) ROCm 7.2
  nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
        timeout-minutes: 150
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2
  nightly-8-gpu-mi35x-kimi-k25-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Kimi-K2.5)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance) ROCm 7.2
  nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test + Performance Test MI35x ROCm 7.2 (8-GPU Qwen3-235B-MXFP4)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2
  nightly-8-gpu-mi35x-qwen35-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
          bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU Qwen 3.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  nightly-8-gpu-mi35x-glm5-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
          bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-5 NSA)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU GLM-4.7-FP8 (Accuracy) ROCm 7.2
  nightly-8-gpu-mi35x-glm47-fp8-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm47-fp8-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-4.7-FP8)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm47-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
  nightly-8-gpu-mi35x-minimax-m25-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Accuracy Test MI35x ROCm 7.2 (8-GPU MiniMax-M2.5)
        timeout-minutes: 120
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e SGLANG_USE_AITER=1 \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  # MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP) ROCm 7.2
  nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720,'))
    runs-on: linux-mi35x-gpu-8
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Setup docker (ROCm 7.2)
        run: |
          touch github_summary.md
          bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
          # Install tabulate for run_suite.py (missing in MI35x container)
          bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate

      - name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
        timeout-minutes: 180
        run: |
          > github_summary.md  # Clear summary file
          bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
            -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
            python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
          echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
          exit ${TEST_EXIT_CODE:-0}

  check-all-jobs:
    if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
    needs:
      # MI30x ROCm 7.2 Unit Tests
      - nightly-test-1-gpu-unit-rocm720
      # MI30x ROCm 7.2 Accuracy Tests
      - nightly-accuracy-2-gpu-rocm720
      - nightly-accuracy-2-gpu-vlm-rocm720
      # MI30x ROCm 7.2 Performance Tests
      - nightly-perf-2-gpu-text-rocm720
      - nightly-perf-2-gpu-vlm-rocm720
      # MI30x ROCm 7.2 4-GPU Tests
      - nightly-4-gpu-rocm720
      - nightly-accuracy-8-gpu-rocm720
      # MI30x ROCm 7.2 Combined Accuracy + Performance Tests
      - nightly-8-gpu-grok1-int4-rocm720
      - nightly-8-gpu-grok2-rocm720
      - nightly-8-gpu-deepseek-v31-rocm720
      - nightly-8-gpu-deepseek-v32-rocm720
      - nightly-8-gpu-deepseek-v32-mtp-rocm720
      - nightly-8-gpu-deepseek-v3-kv-fp8-rocm720
      - nightly-8-gpu-kimi-k25-rocm720
      - nightly-8-gpu-qwen3-235b-rocm720
      - nightly-8-gpu-qwen35-rocm720
      - nightly-8-gpu-glm5-rocm720
      - nightly-8-gpu-minimax-m25-rocm720
      # MI30x ROCm 7.2 Diffusion Tests
      - nightly-1-gpu-zimage-turbo-rocm720
      # MI35x ROCm 7.2 jobs
      - nightly-test-1-gpu-mi35x-rocm720
      - nightly-accuracy-8-gpu-mi35x-rocm720
      - nightly-8-gpu-mi35x-grok1-int4-rocm720
      - nightly-8-gpu-mi35x-grok2-rocm720
      - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720
      - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720
      - nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720
      - nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720
      - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720
      - nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720
      - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720
      - nightly-8-gpu-mi35x-kimi-k25-rocm720
      - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720
      - nightly-8-gpu-mi35x-qwen35-rocm720
      - nightly-8-gpu-mi35x-glm5-rocm720
      - nightly-8-gpu-mi35x-glm47-fp8-rocm720
      - nightly-8-gpu-mi35x-minimax-m25-rocm720
    runs-on: ubuntu-latest
    steps:
      - name: Check if any job failed
        run: |
          if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
            echo "One or more ROCm 7.2 nightly test jobs failed"
            exit 1
          fi
          if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
            echo "One or more ROCm 7.2 nightly test jobs were cancelled"
            exit 1
          fi
          echo "All ROCm 7.2 nightly test jobs passed"

nightly-test-intel .github/workflows/nightly-test-intel.yml

Triggers

schedule, push, workflow_dispatch, workflow_call

Runs on

ubuntu-latest

Jobs

placeholder

Commands

echo "Intel nightly tests will be added here"

View raw YAML

name: Nightly Test (Intel)

on:
  schedule:
    - cron: '0 0 * * *'
  push:
    branches:
      - main
    paths:
      - "python/sglang/version.py"
  workflow_dispatch:
  workflow_call:
    inputs:
      ref:
        description: "Branch, tag or SHA to checkout"
        required: false
        type: string
        default: ""

concurrency:
  group: nightly-test-intel-${{ inputs.ref || github.ref }}
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

jobs:
  # Placeholder for Intel GPU tests
  # Add Intel-specific nightly test workflows here when available

  placeholder:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    steps:
      - name: Placeholder
        run: echo "Intel nightly tests will be added here"

nightly-test-npu matrix .github/workflows/nightly-test-npu.yml

Triggers

schedule, pull_request, workflow_dispatch, workflow_call

Runs on

linux-aarch64-a3-2, linux-aarch64-a3-2, linux-aarch64-a3-4, linux-aarch64-a3-8, linux-aarch64-a3-16, ubuntu-latest

Jobs

nightly-1-npu-a3, nightly-2-npu-a3, nightly-4-npu-a3, nightly-8-npu-a3, nightly-16-npu-a3, check-all-jobs

Matrix

part→ 0, 1

Commands

# speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple" pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn" bash scripts/ci/npu/npu_ci_install_dependency.sh a3 # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy download through proxy curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
bash scripts/ci/npu/npu_log_print.sh
pip install sglang_router hf download lmms-lab/MMMU --repo-type dataset pip install sentence_transformers torchaudio==2.8.0 pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git cd ./lmms-eval nohup pip install . > lmmslog.txt 2>&1 & sleep 120 export PYTHONPATH=$PYTHONPATH:$(pwd) cd ../ cd test python3 run_suite.py --hw npu --suite nightly-1-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
# speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple" pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn" bash scripts/ci/npu/npu_ci_install_dependency.sh a3 # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy download through proxy curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
bash scripts/ci/npu/npu_log_print.sh
pip install sglang_router hf download lmms-lab/MMMU --repo-type dataset pip install sentence_transformers torchaudio==2.8.0 pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1 pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git cd ./lmms-eval nohup pip install . > lmmslog.txt 2>&1 & sleep 120 export PYTHONPATH=$PYTHONPATH:$(pwd) cd ../ cd test python3 run_suite.py --hw npu --suite nightly-2-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
# speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple" pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn" bash scripts/ci/npu/npu_ci_install_dependency.sh a3 # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy download through proxy curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
bash scripts/ci/npu/npu_log_print.sh

View raw YAML

name: Nightly Test (NPU)

on:
  schedule:
    - cron: '0 17 * * *'  # Execute at 1:00 a.m. Beijing Time every day
  pull_request:
    branches:
      - main
    paths:
      - ".github/workflows/nightly-test-npu.yml"
  workflow_dispatch:
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      job_filter:
        description: 'Select which job to run (leave empty or "all" to run all jobs)'
        required: false
        type: string
        default: 'all'

concurrency:
  group: nightly-test-npu-${{ inputs.ref || github.ref }}
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

jobs:
  nightly-1-npu-a3:
    if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
    runs-on: linux-aarch64-a3-2
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
          pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Print Log Information
        run: |
          bash scripts/ci/npu/npu_log_print.sh

      - name: Run test
        timeout-minutes: 240
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          pip install sglang_router
          hf download lmms-lab/MMMU --repo-type dataset
          pip install sentence_transformers torchaudio==2.8.0
          pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
          pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
          pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
          git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
          cd ./lmms-eval
          nohup pip install . > lmmslog.txt 2>&1 &
          sleep 120
          export PYTHONPATH=$PYTHONPATH:$(pwd)
          cd ../
          cd test
          python3 run_suite.py --hw npu --suite nightly-1-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

  nightly-2-npu-a3:
    if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
    runs-on: linux-aarch64-a3-2
    strategy:
      fail-fast: false
      matrix:
        part: [0]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
          pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Print Log Information
        run: |
          bash scripts/ci/npu/npu_log_print.sh
      - name: Run test
        timeout-minutes: 240
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          pip install sglang_router
          hf download lmms-lab/MMMU --repo-type dataset
          pip install sentence_transformers torchaudio==2.8.0
          pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
          pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
          pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
          git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
          cd ./lmms-eval
          nohup pip install . > lmmslog.txt 2>&1 &
          sleep 120
          export PYTHONPATH=$PYTHONPATH:$(pwd)
          cd ../
          cd test
          python3 run_suite.py --hw npu --suite nightly-2-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1

  nightly-4-npu-a3:
    if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
    runs-on: linux-aarch64-a3-4
    strategy:
      fail-fast: false
      matrix:
        part: [0]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
          pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Print Log Information
        run: |
          bash scripts/ci/npu/npu_log_print.sh

      - name: Run test
        timeout-minutes: 240
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          pip install sglang_router
          hf download lmms-lab/MMMU --repo-type dataset
          pip install sentence_transformers torchaudio==2.8.0
          pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
          pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
          pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
          git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
          cd ./lmms-eval
          nohup pip install . > lmmslog.txt 2>&1 &
          sleep 120
          export PYTHONPATH=$PYTHONPATH:$(pwd)
          cd ../
          cd test
          python3 run_suite.py --hw npu --suite nightly-4-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1

  nightly-8-npu-a3:
    if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
    runs-on: linux-aarch64-a3-8
    strategy:
      fail-fast: false
      matrix:
        part: [0]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
          pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Print Log Information
        run: |
          bash scripts/ci/npu/npu_log_print.sh

      - name: Run test
        timeout-minutes: 240
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          pip install sglang_router
          hf download lmms-lab/MMMU --repo-type dataset
          pip install sentence_transformers torchaudio==2.8.0
          pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
          pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
          pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
          git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
          cd ./lmms-eval
          nohup pip install . > lmmslog.txt 2>&1 &
          sleep 120
          export PYTHONPATH=$PYTHONPATH:$(pwd)
          cd ../
          cd test
          python3 run_suite.py --hw npu --suite nightly-8-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1

  nightly-16-npu-a3:
    if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
    runs-on: linux-aarch64-a3-16
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
          pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Print Log Information
        run: |
          bash scripts/ci/npu/npu_log_print.sh

      - name: Run test
        timeout-minutes: 240
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          pip install sglang_router
          hf download lmms-lab/MMMU --repo-type dataset
          pip install sentence_transformers torchaudio==2.8.0
          pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
          pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
          pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
          git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
          cd ./lmms-eval
          nohup pip install . > lmmslog.txt 2>&1 &
          sleep 120
          export PYTHONPATH=$PYTHONPATH:$(pwd)
          cd ../
          cd test
          python3 run_suite.py --hw npu --suite nightly-16-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

  check-all-jobs:
    if: github.repository == 'sgl-project/sglang' && always()
    needs:
      - nightly-1-npu-a3
      - nightly-2-npu-a3
      - nightly-4-npu-a3
      - nightly-8-npu-a3
      - nightly-16-npu-a3
    runs-on: ubuntu-latest
    container:
      image: docker.m.daocloud.io/ubuntu:22.04
    steps:
      - name: Check if any job failed
        run: |
          if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
            echo "One or more nightly test jobs failed"
            exit 1
          fi
          if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
            echo "One or more nightly test jobs were cancelled"
            exit 1
          fi
          echo "All nightly test jobs passed"

nightly-test-nvidia matrix .github/workflows/nightly-test-nvidia.yml

Triggers

schedule, workflow_dispatch, workflow_call

Runs on

1-gpu-h100, 1-gpu-h100, 8-gpu-h200, 4-gpu-h100, 8-gpu-h200, 8-gpu-h20, 8-gpu-b200, 2-gpu-h100, 2-gpu-h100, 2-gpu-h100, 2-gpu-h100, 1-gpu-h100, 2-gpu-h100, 4-gpu-b200, 8-gpu-b200, 4-gpu-h100, ubuntu-latest, ubuntu-latest

Jobs

nightly-test-general-1-gpu-h100, nightly-test-kernel-1-gpu-h100, nightly-test-kernel-8-gpu-h200, nightly-test-general-4-gpu-h100, nightly-test-general-8-gpu-h200, nightly-test-general-8-gpu-h20, nightly-test-general-8-gpu-b200, nightly-test-text-accuracy-2-gpu-h100, nightly-test-text-perf-2-gpu-h100, nightly-test-vlm-accuracy-2-gpu-h100, nightly-test-vlm-perf-2-gpu-h100, nightly-test-multimodal-server-1-gpu, nightly-test-multimodal-server-2-gpu, nightly-test-perf-4-gpu-b200, nightly-test-specialized-8-gpu-b200, nightly-test-diffusion-comparison, consolidate-metrics, check-all-jobs

Matrix

part, partition→ 0, 1, 2, 3

Commands

bash scripts/ci/cuda/ci_install_dependency.sh
cd test python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error
bash scripts/ci/cuda/ci_install_dependency.sh
cd test python3 run_suite.py --hw cuda --suite nightly-kernel-1-gpu --nightly --continue-on-error
bash scripts/ci/cuda/ci_install_dependency.sh
cd test python3 run_suite.py --hw cuda --suite nightly-kernel-8-gpu-h200 --nightly --continue-on-error
bash scripts/ci/cuda/ci_install_dependency.sh
cd test python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error

View raw YAML

name: Nightly Test (Nvidia)

on:
  schedule:
    - cron: '0 0 * * *'
  workflow_dispatch:
    inputs:
      job_filter:
        description: 'Select which job to run (leave empty or "all" to run all jobs)'
        required: false
        type: choice
        default: 'all'
        options:
          - 'all'
          - 'nightly-test-general-1-gpu-h100'
          - 'nightly-test-general-4-gpu-h100'
          - 'nightly-test-general-8-gpu-h200'
          - 'nightly-test-general-8-gpu-h20'
          - 'nightly-test-general-8-gpu-b200'
          - 'nightly-test-text-accuracy-2-gpu-h100'
          - 'nightly-test-text-perf-2-gpu-h100'
          - 'nightly-test-vlm-accuracy-2-gpu-h100'
          - 'nightly-test-vlm-perf-2-gpu-h100'
          - 'nightly-test-multimodal-server-1-gpu'
          - 'nightly-test-multimodal-server-2-gpu'
          - 'nightly-test-perf-4-gpu-b200'
          - 'nightly-test-perf-8-gpu-b200'
          - 'nightly-test-kernel-1-gpu-h100'
          - 'nightly-test-diffusion-comparison'
          - 'nightly-test-kernel-8-gpu-h200'
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      job_filter:
        description: 'Select which job to run (leave empty or "all" to run all jobs)'
        required: false
        type: string
        default: 'all'

concurrency:
  group: nightly-test-nvidia-${{ inputs.ref || github.ref }}
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

env:
  SGLANG_IS_IN_CI: true
  SGLANG_CUDA_COREDUMP: "1"
  HF_HUB_DOWNLOAD_TIMEOUT: 300
  HF_HUB_ETAG_TIMEOUT: 300

jobs:
  # General tests - 1 GPU
  nightly-test-general-1-gpu-h100:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-1-gpu-h100')
    runs-on: 1-gpu-h100
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 60
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # JIT kernel full unit tests (expanded parameter ranges via SGLANG_JIT_KERNEL_RUN_FULL_TESTS)
  nightly-test-kernel-1-gpu-h100:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-1-gpu-h100')
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    env:
      # Full jit_kernel test grids (see sglang.jit_kernel.utils.should_run_full_tests)
      SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
      # Match pr-test-jit-kernel workflow for consistent JIT warmup behavior
      SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
      # Allow maintenance bypass on default branch (same semantics as PR JIT workflow)
      SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run jit kernel nightly suite
        timeout-minutes: 60
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-kernel-1-gpu --nightly --continue-on-error

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  nightly-test-kernel-8-gpu-h200:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-8-gpu-h200')
    runs-on: 8-gpu-h200
    timeout-minutes: 240
    env:
      SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
      SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
      SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run multi-GPU jit kernel nightly suite
        timeout-minutes: 90
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-kernel-8-gpu-h200 --nightly --continue-on-error

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # General tests - 4 GPU H100
  nightly-test-general-4-gpu-h100:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-4-gpu-h100')
    runs-on: 4-gpu-h100
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # General tests - 8 GPU H200
  nightly-test-general-8-gpu-h200:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h200')
    runs-on: 8-gpu-h200
    strategy:
      fail-fast: false
      matrix:
        partition: [0, 1, 2, 3]
    env:
      RUNNER_LABELS: 8-gpu-h200
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run common 8-GPU model tests
        if: always()
        timeout-minutes: 300
        env:
          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
          GPU_CONFIG: "8-gpu-h200"
          IS_H200: "1"
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4

      - name: Publish traces to storage repo
        if: always()
        continue-on-error: true
        env:
          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
        run: |
          TRACE_ARGS=""
          for dir in test/performance_profiles_*/; do
            [ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
          done
          if [ -n "$TRACE_ARGS" ]; then
            python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
            find test/performance_profiles_*/ -name '*.json.gz' -delete
          else
            echo "No trace directories found, skipping publish"
          fi

      - name: Run test
        timeout-minutes: 30
        env:
          GPU_CONFIG: "8-gpu-h200"
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error

      - name: Collect performance metrics
        if: always()
        run: |
          python3 scripts/ci/utils/save_metrics.py \
            --gpu-config 8-gpu-h200 \
            --partition ${{ matrix.partition }} \
            --run-id ${{ github.run_id }} \
            --output test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json \
            --search-dir test/performance_profiles_8_gpu \
            --search-dir test

      - name: Upload partition metrics
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: metrics-8gpu-h200-partition-${{ matrix.partition }}
          path: test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json
          retention-days: 5
          if-no-files-found: ignore

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.partition }}

  # General tests - 8 GPU H20
  nightly-test-general-8-gpu-h20:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20')
    runs-on: 8-gpu-h20
    env:
      SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        env:
          GPU_CONFIG: "8-gpu-h20"
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # General tests - 8 GPU B200
  nightly-test-general-8-gpu-b200:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-b200')
    runs-on: 8-gpu-b200
    strategy:
      fail-fast: false
      matrix:
        partition: [0, 1, 2, 3]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run common 8-GPU model tests
        if: always()
        timeout-minutes: 300
        env:
          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
          GPU_CONFIG: "8-gpu-b200"
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4

      - name: Publish traces to storage repo
        if: always()
        continue-on-error: true
        env:
          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
        run: |
          TRACE_ARGS=""
          for dir in test/performance_profiles_*/; do
            [ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
          done
          if [ -n "$TRACE_ARGS" ]; then
            python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
            find test/performance_profiles_*/ -name '*.json.gz' -delete
          else
            echo "No trace directories found, skipping publish"
          fi

      - name: Collect performance metrics
        if: always()
        run: |
          python3 scripts/ci/utils/save_metrics.py \
            --gpu-config 8-gpu-b200 \
            --partition ${{ matrix.partition }} \
            --run-id ${{ github.run_id }} \
            --output test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json \
            --search-dir test/performance_profiles_8_gpu \
            --search-dir test

      - name: Upload partition metrics
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: metrics-8gpu-b200-partition-${{ matrix.partition }}
          path: test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json
          retention-days: 5
          if-no-files-found: ignore

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.partition }}

  # Text model accuracy tests
  nightly-test-text-accuracy-2-gpu-h100:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-h100')
    runs-on: 2-gpu-h100
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run eval test for text models
        timeout-minutes: 120
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # Text model performance tests
  nightly-test-text-perf-2-gpu-h100:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-perf-2-gpu-h100')
    runs-on: 2-gpu-h100
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run performance test for text models
        timeout-minutes: 180
        env:
          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
          GPU_CONFIG: "2-gpu-h100"
        run: |
          cd test
          rm -rf performance_profiles_text_models/
          python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error --timeout-per-file 3600

      - name: Publish traces to storage repo
        env:
          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
        run: |
          python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_text_models

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # VLM accuracy tests
  nightly-test-vlm-accuracy-2-gpu-h100:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-h100')
    runs-on: 2-gpu-h100
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run eval test for VLM models (fixed MMMU-100)
        timeout-minutes: 240
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # VLM performance tests
  nightly-test-vlm-perf-2-gpu-h100:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-h100')
    runs-on: 2-gpu-h100
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run perf test for VLM models (MMMU)
        timeout-minutes: 240
        env:
          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
          GPU_CONFIG: "2-gpu-h100"
        run: |
          cd test
          rm -rf performance_profiles_vlms/
          python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 3600

      - name: Publish traces to storage repo
        env:
          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
        run: |
          python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_vlms

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # diffusion performance tests
  nightly-test-multimodal-server-1-gpu:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-1-gpu')
    runs-on: 1-gpu-h100
    strategy:
      fail-fast: false
      max-parallel: 5
      matrix:
        part: [0, 1]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh diffusion
          pip install slack_sdk

      - name: Run diffusion server tests
        env:
          SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GPU_CONFIG: "1-gpu-h100"

        timeout-minutes: 60
        run: |
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py \
            --suite 1-gpu \
            --partition-id ${{ matrix.part }} \
            --total-partitions 2

      - name: Collect diffusion performance metrics
        if: always()
        run: |
          python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
            --gpu-config 1-gpu-h100 \
            --run-id ${{ github.run_id }} \
            --output python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json \
            --results-json python/diffusion-results.json

      - name: Upload diffusion metrics
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: diffusion-metrics-1gpu-partition-${{ matrix.part }}
          path: python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json
          retention-days: 90
          if-no-files-found: ignore

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  nightly-test-multimodal-server-2-gpu:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-2-gpu')
    runs-on: 2-gpu-h100
    strategy:
      fail-fast: false
      max-parallel: 5
      matrix:
        part: [0, 1]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh diffusion
          pip install slack_sdk

      - name: Run diffusion server tests
        env:
          SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GPU_CONFIG: "2-gpu-h100"

        timeout-minutes: 60
        run: |
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py \
            --suite 2-gpu \
            --partition-id ${{ matrix.part }} \
            --total-partitions 2

      - name: Collect diffusion performance metrics
        if: always()
        run: |
          python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
            --gpu-config 2-gpu-h100 \
            --run-id ${{ github.run_id }} \
            --output python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json \
            --results-json python/diffusion-results.json

      - name: Upload diffusion metrics
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: diffusion-metrics-2gpu-partition-${{ matrix.part }}
          path: python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json
          retention-days: 90
          if-no-files-found: ignore

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  # B200 Performance tests - 4 GPU
  nightly-test-perf-4-gpu-b200:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-4-gpu-b200')
    runs-on: 4-gpu-b200
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 300
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error --timeout-per-file 12000

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # Specialized B200 tests - 8 GPU, for specific backends and configs
  nightly-test-specialized-8-gpu-b200:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-8-gpu-b200')
    runs-on: 8-gpu-b200
    env:
      RUNNER_LABELS: 8-gpu-b200
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 120
        env:
          GPU_CONFIG: "8-gpu-b200"
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # Diffusion cross-framework comparison
  nightly-test-diffusion-comparison:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-diffusion-comparison')
    runs-on: 4-gpu-h100
    timeout-minutes: 240
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run cross-framework comparison
        env:
          GITHUB_SHA: ${{ github.sha }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          PYTHONUNBUFFERED: "1"
        timeout-minutes: 210
        run: |
          python3 -u scripts/ci/utils/diffusion/run_comparison.py \
            --output comparison-results.json

      - name: Generate dashboard
        if: always()
        env:
          GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
        run: |
          python3 scripts/ci/utils/diffusion/generate_diffusion_dashboard.py \
            --results comparison-results.json \
            --output dashboard.md \
            --charts-dir comparison-charts \
            --fetch-history \
            --step-summary

      - name: Publish to sglang-ci-data
        if: always()
        env:
          GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
        run: |
          python3 scripts/ci/utils/diffusion/publish_comparison_results.py \
            --results comparison-results.json \
            --dashboard dashboard.md \
            --charts-dir comparison-charts

      - name: Upload comparison artifacts
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: diffusion-comparison-${{ github.run_id }}
          path: |
            comparison-results.json
            dashboard.md
            comparison-charts/
            comparison-logs/
          retention-days: 90
          if-no-files-found: ignore

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  # Consolidate performance metrics from all jobs
  consolidate-metrics:
    if: github.repository == 'sgl-project/sglang' && always()
    needs:
      - nightly-test-general-8-gpu-h200
      - nightly-test-general-8-gpu-b200
      - nightly-test-multimodal-server-1-gpu
      - nightly-test-multimodal-server-2-gpu
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Download all partition metrics
        uses: actions/download-artifact@v4
        with:
          pattern: "*metrics-*"
          path: metrics/
          merge-multiple: true

      - name: List downloaded metrics
        run: |
          echo "Downloaded metrics files:"
          find metrics/ -name "*.json" -type f 2>/dev/null || echo "No metrics files found"

      - name: Merge metrics
        run: |
          python3 scripts/ci/utils/merge_metrics.py \
            --input-dir metrics/ \
            --output consolidated-metrics-${{ github.run_id }}.json \
            --run-id ${{ github.run_id }} \
            --commit-sha ${{ github.sha }} \
            --branch ${{ github.ref_name }}

      - name: Upload consolidated metrics
        uses: actions/upload-artifact@v4
        with:
          name: consolidated-metrics-${{ github.run_id }}
          path: consolidated-metrics-${{ github.run_id }}.json
          retention-days: 90
          if-no-files-found: warn

  # Final check job
  check-all-jobs:
    if: github.repository == 'sgl-project/sglang' && always()
    needs:
      - nightly-test-general-1-gpu-h100
      - nightly-test-general-4-gpu-h100
      - nightly-test-general-8-gpu-h200
      - nightly-test-general-8-gpu-h20
      - nightly-test-general-8-gpu-b200
      - nightly-test-text-accuracy-2-gpu-h100
      - nightly-test-text-perf-2-gpu-h100
      - nightly-test-vlm-accuracy-2-gpu-h100
      - nightly-test-vlm-perf-2-gpu-h100
      - nightly-test-multimodal-server-1-gpu
      - nightly-test-multimodal-server-2-gpu
      - nightly-test-perf-4-gpu-b200
      - nightly-test-specialized-8-gpu-b200
      - nightly-test-diffusion-comparison
      - consolidate-metrics
    runs-on: ubuntu-latest
    steps:
      - name: Check if any job failed
        run: |
          if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
            echo "One or more nightly test jobs failed"
            exit 1
          fi
          if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
            echo "One or more nightly test jobs were cancelled"
            exit 1
          fi
          echo "All nightly test jobs passed"

open-pr-copy-from-oss perms .github/workflows/open-pr-copy-from-oss.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

copy

Commands

bash scripts/code_sync/install_github_cli.sh
python3 scripts/code_sync/copy_from_oss.py

View raw YAML

name: Open A PR to Copy Code From OSS

on:
  workflow_dispatch:
  # schedule:
  #   - cron: '0 10 * * *'

permissions:
  contents: write

jobs:
  copy:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: 'main'

      - name: Install GitHub CLI (if not present)
        run: |
          bash scripts/code_sync/install_github_cli.sh

      - name: Copy from OSS code
        env:
          GH_TOKEN: ${{ secrets.GH_PAT_FOR_OPEN_PR_TO_PRIVATE }}
        run: |
          python3 scripts/code_sync/copy_from_oss.py

open-pr-copy-to-oss perms .github/workflows/open-pr-copy-to-oss.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

copy

Commands

bash scripts/code_sync/install_github_cli.sh
python3 scripts/code_sync/copy_to_oss.py --commit ${{ github.event.inputs.commit_sha }}

View raw YAML

name: Open A PR to Copy Diff To OSS

on:
  workflow_dispatch:
    inputs:
      commit_sha:
        description: 'The commit SHA to copy. Defaults to LAST to copy the latest commit.'
        required: false
        default: 'LAST'

permissions:
  contents: write

jobs:
  copy:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install GitHub CLI (if not present)
        run: |
          bash scripts/code_sync/install_github_cli.sh

      - name: Copy to OSS code
        env:
          GH_TOKEN: ${{ secrets.GH_PAT_FOR_OPEN_PR_TO_OSS }}
        run: |
          python3 scripts/code_sync/copy_to_oss.py --commit ${{ github.event.inputs.commit_sha }}

patch-docker-dev .github/workflows/patch-docker-dev.yml

Triggers

workflow_dispatch

Runs on

x64-docker-build-node

Jobs

patch

Actions

docker/login-action

Commands

IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}" docker pull "${IMAGE}" if BASE_SHA=$(docker run --rm "${IMAGE}" git -C /sgl-workspace/sglang rev-parse HEAD 2>/dev/null); then echo "Image built from commit: ${BASE_SHA}" else BASE_SHA="" echo "::warning::Image has no .git directory — cannot extract base commit" fi echo "BASE_SHA=${BASE_SHA}" >> "$GITHUB_ENV"
git config --global --add safe.directory "$GITHUB_WORKSPACE" git fetch origin main mkdir -p /tmp/patch-ctx if [ -n "${{ inputs.pr_numbers }}" ]; then IFS=',' read -ra PRS <<< "${{ inputs.pr_numbers }}" for pr in "${PRS[@]}"; do pr=$(echo "${pr}" | xargs) echo "Fetching PR #${pr}" git fetch origin "pull/${pr}/head:pr-${pr}" MERGE_BASE=$(git merge-base origin/main "pr-${pr}") echo " PR #${pr}: merge-base=${MERGE_BASE}" git diff "${MERGE_BASE}..pr-${pr}" > "/tmp/patch-ctx/${pr}.patch" echo " PR #${pr}: $(wc -l < /tmp/patch-ctx/${pr}.patch) lines" done elif [ -n "${BASE_SHA}" ]; then echo "Generating diff: image ${BASE_SHA} → latest main" git fetch origin "${BASE_SHA}" git diff "${BASE_SHA}..origin/main" > /tmp/patch-ctx/main.patch echo " main: $(wc -l < /tmp/patch-ctx/main.patch) lines" else echo "::error::No PR numbers specified and image has no .git — cannot generate diff against main" exit 1 fi TOTAL=$(cat /tmp/patch-ctx/*.patch | wc -l) if [ "${TOTAL}" -eq 0 ]; then echo "::warning::All patches are empty — image is already up to date" echo "SKIP_BUILD=true" >> "$GITHUB_ENV" fi
IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}" cat <<'DOCKERFILE' > /tmp/patch-ctx/Dockerfile ARG BASE_IMAGE FROM ${BASE_IMAGE} COPY *.patch /tmp/patches/ RUN cd /sgl-workspace/sglang \ && for p in /tmp/patches/*.patch; do \ if [ ! -s "${p}" ]; then \ echo "Skipping ${p} (empty)"; \ else \ echo "Applying ${p}..." \ && patch -p1 --fuzz=2 --no-backup-if-mismatch -f < "${p}" \ || { echo "ERROR: Failed to apply ${p}"; exit 1; }; \ fi; \ done \ && rm -rf /tmp/patches DOCKERFILE docker build \ --no-cache \ --build-arg BASE_IMAGE="${IMAGE}" \ -t "${IMAGE}" \ /tmp/patch-ctx/
IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}" docker push "${IMAGE}" echo "### Patched \`${IMAGE}\`" >> "$GITHUB_STEP_SUMMARY" echo "- **Base commit:** \`${BASE_SHA:-unknown (no .git)}\`" >> "$GITHUB_STEP_SUMMARY" echo "- **Source:** ${{ inputs.pr_numbers && format('PRs: {0}', inputs.pr_numbers) || 'latest main' }}" >> "$GITHUB_STEP_SUMMARY"

View raw YAML

name: Patch Docker Image

on:
  workflow_dispatch:
    inputs:
      pr_numbers:
        description: "Comma-separated PR numbers to apply (e.g. 18962,19010)"
        required: false
        default: ""
      image_tag:
        description: "Base image tag to patch (e.g. dev-x86, dev-x86-cu13)"
        required: true

concurrency:
  group: patch-docker-${{ inputs.image_tag }}
  cancel-in-progress: true

jobs:
  patch:
    if: github.repository == 'sgl-project/sglang'
    runs-on: x64-docker-build-node
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Pull base image and extract commit
        run: |
          IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}"
          docker pull "${IMAGE}"
          if BASE_SHA=$(docker run --rm "${IMAGE}" git -C /sgl-workspace/sglang rev-parse HEAD 2>/dev/null); then
            echo "Image built from commit: ${BASE_SHA}"
          else
            BASE_SHA=""
            echo "::warning::Image has no .git directory — cannot extract base commit"
          fi
          echo "BASE_SHA=${BASE_SHA}" >> "$GITHUB_ENV"

      - name: Generate patches
        run: |
          git config --global --add safe.directory "$GITHUB_WORKSPACE"
          git fetch origin main
          mkdir -p /tmp/patch-ctx

          if [ -n "${{ inputs.pr_numbers }}" ]; then
            IFS=',' read -ra PRS <<< "${{ inputs.pr_numbers }}"
            for pr in "${PRS[@]}"; do
              pr=$(echo "${pr}" | xargs)
              echo "Fetching PR #${pr}"
              git fetch origin "pull/${pr}/head:pr-${pr}"
              MERGE_BASE=$(git merge-base origin/main "pr-${pr}")
              echo "  PR #${pr}: merge-base=${MERGE_BASE}"
              git diff "${MERGE_BASE}..pr-${pr}" > "/tmp/patch-ctx/${pr}.patch"
              echo "  PR #${pr}: $(wc -l < /tmp/patch-ctx/${pr}.patch) lines"
            done
          elif [ -n "${BASE_SHA}" ]; then
            echo "Generating diff: image ${BASE_SHA} → latest main"
            git fetch origin "${BASE_SHA}"
            git diff "${BASE_SHA}..origin/main" > /tmp/patch-ctx/main.patch
            echo "  main: $(wc -l < /tmp/patch-ctx/main.patch) lines"
          else
            echo "::error::No PR numbers specified and image has no .git — cannot generate diff against main"
            exit 1
          fi

          TOTAL=$(cat /tmp/patch-ctx/*.patch | wc -l)
          if [ "${TOTAL}" -eq 0 ]; then
            echo "::warning::All patches are empty — image is already up to date"
            echo "SKIP_BUILD=true" >> "$GITHUB_ENV"
          fi

      - name: Build patched image
        if: env.SKIP_BUILD != 'true'
        run: |
          IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}"

          cat <<'DOCKERFILE' > /tmp/patch-ctx/Dockerfile
          ARG BASE_IMAGE
          FROM ${BASE_IMAGE}
          COPY *.patch /tmp/patches/
          RUN cd /sgl-workspace/sglang \
              && for p in /tmp/patches/*.patch; do \
                   if [ ! -s "${p}" ]; then \
                     echo "Skipping ${p} (empty)"; \
                   else \
                     echo "Applying ${p}..." \
                     && patch -p1 --fuzz=2 --no-backup-if-mismatch -f < "${p}" \
                     || { echo "ERROR: Failed to apply ${p}"; exit 1; }; \
                   fi; \
                 done \
              && rm -rf /tmp/patches
          DOCKERFILE

          docker build \
            --no-cache \
            --build-arg BASE_IMAGE="${IMAGE}" \
            -t "${IMAGE}" \
            /tmp/patch-ctx/

      - name: Push patched image
        if: env.SKIP_BUILD != 'true'
        run: |
          IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}"
          docker push "${IMAGE}"

          echo "### Patched \`${IMAGE}\`" >> "$GITHUB_STEP_SUMMARY"
          echo "- **Base commit:** \`${BASE_SHA:-unknown (no .git)}\`" >> "$GITHUB_STEP_SUMMARY"
          echo "- **Source:** ${{ inputs.pr_numbers && format('PRs: {0}', inputs.pr_numbers) || 'latest main' }}" >> "$GITHUB_STEP_SUMMARY"

pr-benchmark-rust matrix perms .github/workflows/pr-benchmark-rust.yml

Triggers

push, pull_request, workflow_dispatch

Runs on

ubuntu-latest, ${{ matrix.runner }}, ubuntu-latest

Jobs

benchmark-compile-check, benchmark, benchmark-summary

Matrix

include, include.artifact_name, include.artifact_path, include.bench_args, include.bench_name, include.name, include.runner, include.sccache_version→ , Manual Policy, Request Processing, benchmark_summary --exact, criterion/benchmark_summary/, criterion/manual_policy*/, manual-policy-results, manual_policy_benchmark, request-processing-results, request_processing, ubuntu-latest, v0.12.0

Actions

mozilla-actions/sccache-action, Swatinem/rust-cache, mozilla-actions/sccache-action, Swatinem/rust-cache

Commands

bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
source "$HOME/.cargo/env" cd sgl-model-gateway/ cargo check --benches
sccache --show-stats
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
source "$HOME/.cargo/env" cd sgl-model-gateway/ if command -v sccache &> /dev/null; then echo "Testing sccache availability..." export RUSTC_WRAPPER=sccache export SCCACHE_GHA_ENABLED="true" if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then echo "sccache is working, using it for compilation" else echo "sccache failed to start, falling back to regular cargo" unset RUSTC_WRAPPER unset SCCACHE_GHA_ENABLED fi else echo "sccache not available, using regular cargo" fi cargo bench --bench ${{ matrix.bench_name }} -- ${{ matrix.bench_args }} 2>&1 | tee benchmark_output.txt
sccache --show-stats
generate_section() { local title="$1" dir_name="$2" lines="${3:-100}" local dir="benchmark-results/${dir_name}-${{ github.sha }}" echo "### $title" >> summary.md if [ -d "$dir" ]; then echo "✅ **Completed**" >> summary.md if [ -f "$dir/benchmark_output.txt" ]; then echo -e "\n<details>\n<summary>View Results</summary>\n\n\`\`\`" >> summary.md tail -"$lines" "$dir/benchmark_output.txt" >> summary.md echo -e "\`\`\`\n</details>" >> summary.md fi else echo "❌ Failed or skipped" >> summary.md fi echo "" >> summary.md } echo "## 🚀 Benchmark Results Summary" > summary.md echo "" >> summary.md generate_section "Request Processing" "request-processing-results" 60 generate_section "Manual Policy (Sticky Sessions)" "manual-policy-results" 100 echo -e "---\n_Generated at $(date -u '+%Y-%m-%d %H:%M:%S UTC')_" >> summary.md cat summary.md cat summary.md >> $GITHUB_STEP_SUMMARY

View raw YAML

name: PR Benchmark (SMG Components)

on:
  push:
    branches: [ main ]
    paths:
      - "sgl-model-gateway/**"
  pull_request:
    branches: [ main ]
    paths:
      - "sgl-model-gateway/**"
  workflow_dispatch:

concurrency:
  group: pr-benchmark-rust-${{ github.ref }}
  cancel-in-progress: true

env:
  RUSTC_WRAPPER: sccache
  SCCACHE_GHA_ENABLED: "true"

permissions:
  contents: read
  pull-requests: write
  issues: write

jobs:
  benchmark-compile-check:
    name: Benchmark Compilation Check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_gateway_dependencies.sh

      - name: Configure sccache
        uses: mozilla-actions/sccache-action@v0.0.9
        with:
          version: "v0.12.0"
          disable_annotations: true

      - name: Rust cache
        uses: Swatinem/rust-cache@v2
        with:
          workspaces: sgl-model-gateway
          shared-key: "rust-cache"
          save-if: true
          cache-all-crates: true
          cache-on-failure: true

      - name: Check benchmarks compile
        run: |
          source "$HOME/.cargo/env"
          cd sgl-model-gateway/
          cargo check --benches

      - name: Show sccache stats
        if: always()
        run: sccache --show-stats

  benchmark:
    name: Benchmark - ${{ matrix.name }}
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event_name == 'push' ||
       github.event_name == 'workflow_dispatch' ||
       (contains(github.event.pull_request.labels.*.name, 'router-benchmark') &&
        contains(github.event.pull_request.labels.*.name, 'run-ci')))
    strategy:
      fail-fast: false
      matrix:
        include:
          - name: Request Processing
            bench_name: request_processing
            bench_args: "benchmark_summary --exact"
            runner: ubuntu-latest
            sccache_version: "v0.12.0"
            artifact_name: request-processing-results
            artifact_path: criterion/benchmark_summary/
          - name: Manual Policy
            bench_name: manual_policy_benchmark
            bench_args: ""
            runner: ubuntu-latest
            sccache_version: "v0.12.0"
            artifact_name: manual-policy-results
            artifact_path: criterion/manual_policy*/
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 100

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_gateway_dependencies.sh

      - name: Configure sccache
        uses: mozilla-actions/sccache-action@v0.0.9
        with:
          version: ${{ matrix.sccache_version }}
          disable_annotations: true

      - name: Rust cache
        uses: Swatinem/rust-cache@v2
        with:
          workspaces: sgl-model-gateway
          shared-key: "rust-cache"
          cache-all-crates: true
          cache-on-failure: true
          save-if: true

      - name: Run benchmark
        timeout-minutes: 30
        run: |
          source "$HOME/.cargo/env"
          cd sgl-model-gateway/
          if command -v sccache &> /dev/null; then
            echo "Testing sccache availability..."
            export RUSTC_WRAPPER=sccache
            export SCCACHE_GHA_ENABLED="true"
            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
              echo "sccache is working, using it for compilation"
            else
              echo "sccache failed to start, falling back to regular cargo"
              unset RUSTC_WRAPPER
              unset SCCACHE_GHA_ENABLED
            fi
          else
            echo "sccache not available, using regular cargo"
          fi
          cargo bench --bench ${{ matrix.bench_name }} -- ${{ matrix.bench_args }} 2>&1 | tee benchmark_output.txt

      - name: Upload benchmark results
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.artifact_name }}-${{ github.sha }}
          path: |
            sgl-model-gateway/target/${{ matrix.artifact_path }}
            sgl-model-gateway/benchmark_output.txt
          retention-days: 30

      - name: Show sccache stats
        if: always()
        run: sccache --show-stats

  benchmark-summary:
    name: Benchmark Summary
    needs: [benchmark]
    if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
    runs-on: ubuntu-latest
    steps:
      - name: Download all benchmark results
        uses: actions/download-artifact@v4
        with:
          pattern: '*-results-${{ github.sha }}'
          path: benchmark-results

      - name: Generate summary
        run: |
          generate_section() {
            local title="$1" dir_name="$2" lines="${3:-100}"
            local dir="benchmark-results/${dir_name}-${{ github.sha }}"
            echo "### $title" >> summary.md
            if [ -d "$dir" ]; then
              echo "✅ **Completed**" >> summary.md
              if [ -f "$dir/benchmark_output.txt" ]; then
                echo -e "\n<details>\n<summary>View Results</summary>\n\n\`\`\`" >> summary.md
                tail -"$lines" "$dir/benchmark_output.txt" >> summary.md
                echo -e "\`\`\`\n</details>" >> summary.md
              fi
            else
              echo "❌ Failed or skipped" >> summary.md
            fi
            echo "" >> summary.md
          }

          echo "## 🚀 Benchmark Results Summary" > summary.md
          echo "" >> summary.md

          generate_section "Request Processing" "request-processing-results" 60
          generate_section "Manual Policy (Sticky Sessions)" "manual-policy-results" 100

          echo -e "---\n_Generated at $(date -u '+%Y-%m-%d %H:%M:%S UTC')_" >> summary.md

          cat summary.md
          cat summary.md >> $GITHUB_STEP_SUMMARY

      - name: Upload summary
        uses: actions/upload-artifact@v4
        with:
          name: benchmark-summary-${{ github.sha }}
          path: summary.md
          retention-days: 30

pr-gate .github/workflows/pr-gate.yml

Triggers

workflow_call

Runs on

ubuntu-latest

Jobs

pr-gate

Commands

echo "===== PR Info =====" echo "PR Event: ${{ github.event_name }}" echo "PR Labels: ${{ steps.pr.outputs.labels }}" echo "PR Draft: ${{ steps.pr.outputs.draft }}" echo "PR User: ${{ steps.pr.outputs.user }}" echo "Require run-ci: ${{ inputs.require-run-ci }}" echo "Cool down minutes: ${{ inputs.cool-down-minutes }}" echo "==================="
echo "PR is draft. Blocking CI." exit 1
labels='${{ steps.pr.outputs.labels }}' if [[ "${{ contains(fromJson(steps.pr.outputs.labels), 'run-ci') }}" == "false" ]]; then echo "Missing required label 'run-ci'. See https://docs.sglang.io/developer_guide/contribution_guide.html#how-to-trigger-ci-tests for more details." exit 1 fi

View raw YAML

on:
  workflow_call:
    inputs:
      require-run-ci:
        description: "Whether the PR must have the run-ci label"
        type: boolean
        default: true
      cool-down-minutes:
        description: "Cooldown period in minutes for low-permission users; 0 disables rate limiting"
        type: number
        default: 120

jobs:
  pr-gate:
    # 1. for commits on main: no gating needed
    # 2. for workflow_dispatch: this can only be triggered by users with write access
    runs-on: ubuntu-latest
    steps:
      - name: Fetch latest PR info
        if: github.event_name == 'pull_request'
        id: pr
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            const pr = await github.rest.pulls.get({
              owner: context.repo.owner,
              repo: context.repo.repo,
              pull_number: context.issue.number
            });
            core.setOutput("labels", JSON.stringify(pr.data.labels.map(l => l.name)));
            core.setOutput("draft", pr.data.draft);
            core.setOutput("user", pr.data.user.login);

      - name: Log PR info
        if: github.event_name == 'pull_request'
        run: |
          echo "===== PR Info ====="
          echo "PR Event: ${{ github.event_name }}"
          echo "PR Labels: ${{ steps.pr.outputs.labels }}"
          echo "PR Draft: ${{ steps.pr.outputs.draft }}"
          echo "PR User: ${{ steps.pr.outputs.user }}"
          echo "Require run-ci: ${{ inputs.require-run-ci }}"
          echo "Cool down minutes: ${{ inputs.cool-down-minutes }}"
          echo "==================="

      - name: Block draft PR
        if: github.event_name == 'pull_request' && fromJson(steps.pr.outputs.draft)
        run: |
          echo "PR is draft. Blocking CI."
          exit 1

      - name: Require run-ci label (optional)
        if:  github.event_name == 'pull_request' && inputs.require-run-ci == true
        run: |
          labels='${{ steps.pr.outputs.labels }}'
          if [[ "${{ contains(fromJson(steps.pr.outputs.labels), 'run-ci') }}" == "false" ]]; then
            echo "Missing required label 'run-ci'. See https://docs.sglang.io/developer_guide/contribution_guide.html#how-to-trigger-ci-tests for more details."
            exit 1
          fi

      - name: Enforce rate limit for low-permission actors (optional)
        if: github.event_name == 'pull_request' && inputs.cool-down-minutes > 0
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            const DEFAULT_MINUTES = Number("${{ inputs.cool-down-minutes }}");
            const owner = context.repo.owner;
            const repo = context.repo.repo;
            const eventName = context.eventName;
            const curRun = await github.rest.actions.getWorkflowRun({
              owner, repo, run_id: context.runId
            });
            let triggeringActor = curRun.data.triggering_actor?.login || context.actor;
            if (triggeringActor === "github-actions[bot]") {
              triggeringActor = `${{ steps.pr.outputs.user }}`;
              core.info(
                `triggering_actor is github-actions[bot]; substituting PR author '${triggeringActor}'.`
              );
            }

            async function hasHighPermission(username) {
              try {
                const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ owner, repo, username });
                const perm = data.permission || 'none';
                return perm === 'write' || perm === 'maintain' || perm === 'admin';
              } catch (e) {
                if (e.status === 404 || e.status === 403) return false;
                throw e;
              }
            }

            if (await hasHighPermission(triggeringActor)) {
              core.info(`Triggering user '${triggeringActor}' has high permission. No rate limit applied.`);
              return;
            }

            let effectiveCooldownMinutes = DEFAULT_MINUTES;
            let perUserCooldownMinutes = null;

            try {
              const contentResp = await github.rest.repos.getContent({
                owner,
                repo,
                path: ".github/CI_PERMISSIONS.json",
                ref: "main",
              });

              if (!Array.isArray(contentResp.data) && contentResp.data && "content" in contentResp.data) {
                const raw = Buffer.from(
                  contentResp.data.content,
                  contentResp.data.encoding || "base64"
                ).toString();
                const ciPermissions = JSON.parse(raw);

                const userPerm = ciPermissions[triggeringActor];
                if (userPerm && typeof userPerm.cooldown_interval_minutes === "number") {
                  perUserCooldownMinutes = userPerm.cooldown_interval_minutes;
                  core.info(
                    `Per-user cooldown for '${triggeringActor}' from CI_PERMISSIONS.json: ${perUserCooldownMinutes} minutes.`
                  );
                } else {
                  core.info(`No per-user cooldown found for '${triggeringActor}' in CI_PERMISSIONS.json.`);
                }
              } else {
                core.info("CI_PERMISSIONS.json content response is not a file; skipping per-user cooldown.");
              }
            } catch (e) {
              core.info(`CI_PERMISSIONS.json not found or unreadable: ${e.message}. Using default rate limit only.`);
            }

            if (perUserCooldownMinutes !== null) {
              effectiveCooldownMinutes = Math.min(effectiveCooldownMinutes, perUserCooldownMinutes);
            }

            if (effectiveCooldownMinutes <= 0) {
              core.info(
                `Effective cooldown for '${triggeringActor}' is 0 minutes; no rate limit enforced for this user.`
              );
              return;
            }

            const cutoff = new Date(Date.now() - effectiveCooldownMinutes * 60 * 1000);
            core.info(
              `Checking for workflow runs since ${cutoff.toISOString()} (last ${effectiveCooldownMinutes} minutes) for event '${eventName}'.`
            );

            const { data } = await github.rest.actions.listWorkflowRuns({
              owner,
              repo,
              workflow_id: 'pr-test.yml',
              event: eventName,
              per_page: 100,
            });

            const runs = data.workflow_runs || [];

            // Rate Limiting Logic:
            // We only count workflow runs that actually consumed CI resources (i.e., passed the gate).
            // A run "passes the gate" if any jobs beyond the gate jobs (check-changes, pr-gate, call-gate)
            // actually executed (not skipped/cancelled). This prevents scenarios where:
            // - User has PR A with missing 'run-ci' label (fails at gate)
            // - User opens PR B with 'run-ci' label
            // - PR B should be able to run even though PR A triggered a run recently

            // Helper function to check if a run passed the gate (i.e., actually consumed CI resources)
            async function didRunPassGate(run) {
              try {
                // Note: Fetching up to 100 jobs (API maximum). If a workflow has >100 jobs,
                // we may miss some, but this is unlikely in practice.
                const { data: jobsData } = await github.rest.actions.listJobsForWorkflowRun({
                  owner, repo, run_id: run.id, per_page: 100
                });
                const jobs = jobsData.jobs || [];

                // If no jobs exist yet, the run hasn't started consuming resources
                if (jobs.length === 0) {
                  core.info(`Run ${run.id} has no jobs yet; not counting against rate limit.`);
                  return false;
                }

                // Gate jobs that don't consume significant CI resources
                const gateJobs = ['check-changes', 'pr-gate', 'call-gate', 'pr-test-finish'];
                const jobsBeyondGate = jobs.filter(j => !gateJobs.some(g => j.name === g || j.name.startsWith(g + ' ')));

                // A job "ran" if it reached a terminal conclusion state that indicates actual execution
                const ranStates = ['success', 'failure', 'timed_out', 'action_required'];
                const hasJobsThatRan = jobsBeyondGate.some(j => j.conclusion && ranStates.includes(j.conclusion));
                return hasJobsThatRan;
              } catch (e) {
                core.warning(`Could not check jobs for run ${run.id}: ${e.message}`);

                // If it's a rate limit error, count it conservatively to prevent abuse
                if (e.status === 429) {
                  core.warning(`Hit rate limit checking run ${run.id}; counting it to be safe.`);
                  return true;
                }

                // For cancelled/skipped runs, they likely didn't consume resources
                if (run.conclusion === 'cancelled' || run.conclusion === 'skipped') {
                  return false;
                }

                // Default to counting it to prevent abuse
                return true;
              }
            }

            // Limit the number of runs we'll check in detail to avoid API rate limits
            const MAX_RUNS_TO_CHECK = 5;
            let runsChecked = 0;
            let runsSkippedAtGate = 0;
            let recentFound = null;

            for (const run of runs) {
              if (String(run.id) === String(context.runId)) continue;
              if (new Date(run.created_at) < cutoff) continue;
              const isUserRun = (run.actor?.login === triggeringActor) || (run.triggering_actor?.login === triggeringActor);
              if (!isUserRun) continue;

              runsChecked++;
              core.info(`Checking run ${run.id} (created: ${run.created_at}, conclusion: ${run.conclusion})`);

              // Safety limit: if we've checked too many runs, assume the next one passed to be conservative
              if (runsChecked > MAX_RUNS_TO_CHECK) {
                core.warning(`Checked ${MAX_RUNS_TO_CHECK} runs; assuming this one passed gate to avoid API limits.`);
                recentFound = run;
                break;
              }

              // Only count runs that actually passed the gate and consumed CI resources
              if (await didRunPassGate(run)) {
                recentFound = run;
                core.info(`Found recent run ${run.id} that passed gate.`);
                break;
              } else {
                runsSkippedAtGate++;
                core.info(`Run ${run.id} failed at gate; not counting against rate limit.`);
              }
            }

            core.info(`Rate limit check summary: checked ${runsChecked} runs, ${runsSkippedAtGate} failed at gate.`);

            if (recentFound) {
              core.setFailed(
                `User '${triggeringActor}' already triggered '${context.workflow}' via '${eventName}' at ${recentFound.created_at}. ` +
                `Please wait ${effectiveCooldownMinutes} minutes before triggering again.`
              );
            } else {
              core.info(
                `No recent runs detected for '${triggeringActor}' within the last ${effectiveCooldownMinutes} minutes; proceeding.`
              );
            }

pr-test matrix perms .github/workflows/pr-test.yml

Triggers

schedule, pull_request, workflow_dispatch, workflow_call

Runs on

ubuntu-latest, ubuntu-latest, ubuntu-latest, x64-kernel-build-node, arm-kernel-build-node, 1-gpu-5090, ubuntu-latest, 1-gpu-5090, 1-gpu-h100, 2-gpu-h100, ${{ needs.check-changes.outputs.b200_runner }}, 4-gpu-h100, 8-gpu-h200, 8-gpu-h20, 4-gpu-h100, 8-gpu-h200, ${{ needs.check-changes.outputs.b200_runner }}, ubuntu-latest

Jobs

check-changes, wait-for-stage-a, wait-for-stage-b, call-gate, sgl-kernel-build-wheels, sgl-kernel-build-wheels-arm, call-sgl-kernel-tests, call-jit-kernel-tests, stage-a-test-1-gpu-small, stage-a-test-cpu, stage-b-test-1-gpu-small, stage-b-test-1-gpu-large, stage-b-test-2-gpu-large, stage-b-test-4-gpu-b200, call-multimodal-gen-tests, stage-c-test-4-gpu-h100, stage-c-test-8-gpu-h200, stage-c-test-8-gpu-h20, stage-c-test-deepep-4-gpu-h100, stage-c-test-deepep-8-gpu-h200, stage-c-test-4-gpu-b200, pr-test-finish

Matrix

include, include.cuda-version, include.python-version, part, partition→ 0, 1, 10, 11, 12, 12.9, 13, 2, 3, 3.10, 4, 5, 6, 7, 8, 9

Actions

dorny/paths-filter, astral-sh/setup-uv

Commands

# Run all tests for scheduled runs and workflow_call (when ref input is provided) # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.git_ref if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then echo "run_all_tests=true" >> $GITHUB_OUTPUT echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})" else echo "run_all_tests=false" >> $GITHUB_OUTPUT echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" fi
echo "Detecting file changes via GitHub API for target_stage mode..." echo "PR head SHA: ${{ inputs.pr_head_sha }}" # Get the list of changed files by comparing PR commit against main # This correctly handles merge commits by looking at the actual PR diff CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \ --jq '[.files[].filename] | .[]' 2>/dev/null || echo "") if [ -z "$CHANGED_FILES" ]; then echo "Warning: Could not fetch changed files from API, assuming no changes" echo "sgl_kernel=false" >> $GITHUB_OUTPUT echo "main_package=false" >> $GITHUB_OUTPUT echo "jit_kernel=false" >> $GITHUB_OUTPUT echo "multimodal_gen=false" >> $GITHUB_OUTPUT exit 0 fi echo "Changed files:" echo "$CHANGED_FILES" | head -20 echo "..." # Check for sgl-kernel changes if echo "$CHANGED_FILES" | grep -qE "^(sgl-kernel/|\.github/workflows/pr-test-sgl-kernel\.yml)"; then echo "sgl_kernel=true" >> $GITHUB_OUTPUT echo "Detected sgl-kernel changes" else echo "sgl_kernel=false" >> $GITHUB_OUTPUT fi # Check for main_package changes (excluding multimodal_gen) # Note: Need to filter out multimodal_gen before checking, not pipe grep -q output MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml|\.github/workflows/pr-gate\.yml|\.github/actions/)" | grep -v "^python/sglang/multimodal_gen/" || true) if [ -n "$MAIN_PKG_FILES" ]; then echo "main_package=true" >> $GITHUB_OUTPUT echo "Detected main_package changes" else echo "main_package=false" >> $GITHUB_OUTPUT fi # Check for jit_kernel changes if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-jit-kernel\.yml)"; then echo "jit_kernel=true" >> $GITHUB_OUTPUT echo "Detected jit_kernel changes" else echo "jit_kernel=false" >> $GITHUB_OUTPUT fi # Check for multimodal_gen changes, including diffusion-specific jit_kernel coverage if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/sglang/jit_kernel/diffusion/|python/sglang/jit_kernel/tests/diffusion/|python/sglang/jit_kernel/benchmark/diffusion/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-multimodal-gen\.yml)"; then echo "multimodal_gen=true" >> $GITHUB_OUTPUT echo "Detected multimodal_gen changes" else echo "multimodal_gen=false" >> $GITHUB_OUTPUT fi
# Scheduled runs and high-priority PRs get full parallelism if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "max_parallel=14" >> $GITHUB_OUTPUT echo "Scheduled run detected, setting max_parallel to 14" elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then echo "max_parallel=14" >> $GITHUB_OUTPUT echo "High priority PR detected, setting max_parallel to 14" elif [[ -n "${{ inputs.target_stage }}" ]]; then # /rerun-stage (workflow_dispatch): query PR labels via GitHub API # Try SHA lookup first (fork PRs), fallback to branch name (non-fork PRs) LABELS="" PR_HEAD_SHA="${{ inputs.pr_head_sha }}" if [[ -n "$PR_HEAD_SHA" ]]; then LABELS=$(gh api "repos/${{ github.repository }}/commits/${PR_HEAD_SHA}/pulls" \ --jq '.[0].labels[].name' 2>/dev/null || true) fi if [[ -z "$LABELS" ]]; then LABELS=$(gh pr list --head "${{ github.ref_name }}" --repo "${{ github.repository }}" \ --json labels --jq '.[0].labels[].name' 2>/dev/null || true) fi echo "PR labels: ${LABELS:-"(none)"}" if echo "$LABELS" | grep -Fxq "high priority"; then echo "max_parallel=14" >> $GITHUB_OUTPUT echo "High priority PR detected via API (/rerun-stage), setting max_parallel to 14" else echo "max_parallel=3" >> $GITHUB_OUTPUT echo "Using default max_parallel of 3 (/rerun-stage, no high priority label)" fi else echo "max_parallel=3" >> $GITHUB_OUTPUT echo "Using default max_parallel of 3" fi
# Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode # (target_stage skips wheel builds, so we can't use custom kernels) # Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter) sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}" target_stage="${{ inputs.target_stage }}" if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT else echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT fi
echo "enable_retry=true" >> $GITHUB_OUTPUT echo "Retry logic enabled for CI"
if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then echo "continue_on_error=true" >> $GITHUB_OUTPUT echo "Full test run or force flag detected, enabling continue-on-error to run all tests" else echo "continue_on_error=false" >> $GITHUB_OUTPUT echo "Filtered run, continue-on-error disabled" fi
echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes." echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files." echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds." echo "" echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes." echo "" echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels." echo "The /rerun-stage command skips the wheel build job, so the test would run against" echo "the wrong (PyPI) version of sgl-kernel instead of your changes." echo "" echo "To properly test your kernel changes, use one of these commands instead:" echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds" echo " /rerun-ci - Re-run the full workflow" echo "" exit 1
{ echo "## Change Detection" echo "" echo "| Component | Changed |" echo "|-------------------|---------|" echo "| main_package | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |" echo "| sgl_kernel (raw) | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |" echo "| sgl_kernel (used) | ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |" echo "| jit_kernel | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |" echo "| multimodal_gen | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |" echo "| target_stage | ${{ inputs.target_stage || '(none)' }} |" echo "| detection_method | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |" echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |" echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |" echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |" echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |" } >> $GITHUB_STEP_SUMMARY

View raw YAML

name: PR Test
# Dynamic run-name for /rerun-stage commands to enable URL lookup
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }}

on:
  schedule:
    - cron: '0 */6 * * *'  # Run every 6 hours (UTC)
  pull_request:
    branches: [main]
  workflow_dispatch:
    inputs:
      target_stage:
        description: "Specific stage to run (optional, for quick testing)"
        required: false
        type: string
        default: ""
      force_continue_on_error:
        description: "Force continue-on-error (test scheduled CI behavior)"
        required: false
        type: boolean
        default: false
      pr_head_sha:
        description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
        required: false
        type: string
        default: ""
      test_parallel_dispatch:
        description: "Test parallel dispatch behavior (simulates scheduled run)"
        required: false
        type: boolean
        default: false
  workflow_call:
    inputs:
      git_ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      run_all_tests:
        description: "Run all tests (for releasing or testing purpose)"
        required: false
        type: boolean
        default: false
      skip_stage_health_check:
        description: "Skip stage health check fast-fail (e.g. for release branch cuts)"
        required: false
        type: boolean
        default: false

concurrency:
  # Concurrency group structure: pr-test-{event}-{branch}-{pr_sha}-{stage}
  # - event_name prevents scheduled runs from colliding with fork PRs whose branch is named 'main'
  #   (without it, both resolve the branch segment to 'main' and block each other)
  # - github.head_ref (pull_request) or github.ref_name (workflow_dispatch) normalizes to branch name
  # - pr_head_sha isolates /rerun-stage from main branch runs
  # - target_stage allows parallel stage dispatches to run independently
  group: pr-test-${{ github.event_name }}-${{ github.head_ref || github.ref_name || 'default' }}-${{ inputs.pr_head_sha || 'current' }}-${{ inputs.target_stage || inputs.git_ref || 'all' }}
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

env:
  SGLANG_IS_IN_CI: true
  SGLANG_CUDA_COREDUMP: "1"
  SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
  SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
  # Schedule / main-branch dispatch / workflow_call from main use refs/heads/main; PR events use refs/pull/*/merge
  SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}

permissions:
  actions: write
  contents: read
  issues: read
  pull-requests: read

jobs:
  # =============================================== check changes ====================================================
  check-changes:
    runs-on: ubuntu-latest
    outputs:
      # Use API-based detection for target_stage mode (filter-api), otherwise use dorny/paths-filter (filter)
      main_package: ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
      # sgl_kernel is forced to false when target_stage is set, since sgl-kernel-build-wheels won't run
      # This prevents CUSTOM_BUILD_SGL_KERNEL=true when the wheel artifacts aren't available
      # Note: If PR has kernel changes AND target_stage is set, the validate-target-stage step will fail
      sgl_kernel: ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }}
      # Raw sgl_kernel value before target_stage override (used for validation)
      sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}
      jit_kernel: ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
      multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
      max_parallel: ${{ steps.set-parallel.outputs.max_parallel }}
      b200_runner: ${{ steps.set-runner.outputs.b200_runner }}
      enable_retry: ${{ steps.set-retry.outputs.enable_retry }}
      continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-maintenance

      - name: Determine run mode
        id: run-mode
        run: |
          # Run all tests for scheduled runs and workflow_call (when ref input is provided)
          # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.git_ref
          if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then
            echo "run_all_tests=true" >> $GITHUB_OUTPUT
            echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})"
          else
            echo "run_all_tests=false" >> $GITHUB_OUTPUT
            echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
          fi

      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        # Only use paths-filter for pull_request events (where it works correctly)
        # For workflow_dispatch with target_stage, we use GitHub API in the next step
        if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage
        with:
          filters: |
            main_package:
              - ".github/workflows/pr-test.yml"
              - ".github/workflows/pr-gate.yml"
              - ".github/actions/**"
              - "python/pyproject.toml"
              - "python/sglang/!(multimodal_gen)/**/!(*.md)"
              - "scripts/ci/cuda/*"
              - "scripts/ci/utils/*"
              - "test/**/!(*.md)"
            multimodal_gen:
              - ".github/workflows/pr-test.yml"
              - ".github/workflows/pr-test-multimodal-gen.yml"
              - "python/pyproject.toml"
              - "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
              - "python/sglang/jit_kernel/diffusion/**"
              - "python/sglang/jit_kernel/tests/diffusion/**"
              - "python/sglang/jit_kernel/benchmark/diffusion/**"
              - "python/sglang/cli/**"
            jit_kernel:
              - ".github/workflows/pr-test.yml"
              - ".github/workflows/pr-test-jit-kernel.yml"
              - "python/pyproject.toml"
              - "python/sglang/jit_kernel/**"
            sgl_kernel:
              - ".github/workflows/pr-test-sgl-kernel.yml"
              - "sgl-kernel/**/*.!(md|txt)"

      # For /rerun-stage (workflow_dispatch with target_stage), dorny/paths-filter doesn't work
      # correctly because it falls back to "last commit" detection which breaks for merge commits.
      # Instead, we use the GitHub API to compare the PR commit against main.
      - name: Detect file changes via API (for target_stage)
        id: filter-api
        if: inputs.target_stage && inputs.pr_head_sha
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          echo "Detecting file changes via GitHub API for target_stage mode..."
          echo "PR head SHA: ${{ inputs.pr_head_sha }}"

          # Get the list of changed files by comparing PR commit against main
          # This correctly handles merge commits by looking at the actual PR diff
          CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \
            --jq '[.files[].filename] | .[]' 2>/dev/null || echo "")

          if [ -z "$CHANGED_FILES" ]; then
            echo "Warning: Could not fetch changed files from API, assuming no changes"
            echo "sgl_kernel=false" >> $GITHUB_OUTPUT
            echo "main_package=false" >> $GITHUB_OUTPUT
            echo "jit_kernel=false" >> $GITHUB_OUTPUT
            echo "multimodal_gen=false" >> $GITHUB_OUTPUT
            exit 0
          fi

          echo "Changed files:"
          echo "$CHANGED_FILES" | head -20
          echo "..."

          # Check for sgl-kernel changes
          if echo "$CHANGED_FILES" | grep -qE "^(sgl-kernel/|\.github/workflows/pr-test-sgl-kernel\.yml)"; then
            echo "sgl_kernel=true" >> $GITHUB_OUTPUT
            echo "Detected sgl-kernel changes"
          else
            echo "sgl_kernel=false" >> $GITHUB_OUTPUT
          fi

          # Check for main_package changes (excluding multimodal_gen)
          # Note: Need to filter out multimodal_gen before checking, not pipe grep -q output
          MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml|\.github/workflows/pr-gate\.yml|\.github/actions/)" | grep -v "^python/sglang/multimodal_gen/" || true)
          if [ -n "$MAIN_PKG_FILES" ]; then
            echo "main_package=true" >> $GITHUB_OUTPUT
            echo "Detected main_package changes"
          else
            echo "main_package=false" >> $GITHUB_OUTPUT
          fi

          # Check for jit_kernel changes
          if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-jit-kernel\.yml)"; then
            echo "jit_kernel=true" >> $GITHUB_OUTPUT
            echo "Detected jit_kernel changes"
          else
            echo "jit_kernel=false" >> $GITHUB_OUTPUT
          fi

          # Check for multimodal_gen changes, including diffusion-specific jit_kernel coverage
          if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/sglang/jit_kernel/diffusion/|python/sglang/jit_kernel/tests/diffusion/|python/sglang/jit_kernel/benchmark/diffusion/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-multimodal-gen\.yml)"; then
            echo "multimodal_gen=true" >> $GITHUB_OUTPUT
            echo "Detected multimodal_gen changes"
          else
            echo "multimodal_gen=false" >> $GITHUB_OUTPUT
          fi

      - name: Set max-parallel based on run type
        id: set-parallel
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          # Scheduled runs and high-priority PRs get full parallelism
          if [[ "${{ github.event_name }}" == "schedule" ]]; then
            echo "max_parallel=14" >> $GITHUB_OUTPUT
            echo "Scheduled run detected, setting max_parallel to 14"
          elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then
            echo "max_parallel=14" >> $GITHUB_OUTPUT
            echo "High priority PR detected, setting max_parallel to 14"
          elif [[ -n "${{ inputs.target_stage }}" ]]; then
            # /rerun-stage (workflow_dispatch): query PR labels via GitHub API
            # Try SHA lookup first (fork PRs), fallback to branch name (non-fork PRs)
            LABELS=""
            PR_HEAD_SHA="${{ inputs.pr_head_sha }}"
            if [[ -n "$PR_HEAD_SHA" ]]; then
              LABELS=$(gh api "repos/${{ github.repository }}/commits/${PR_HEAD_SHA}/pulls" \
                --jq '.[0].labels[].name' 2>/dev/null || true)
            fi
            if [[ -z "$LABELS" ]]; then
              LABELS=$(gh pr list --head "${{ github.ref_name }}" --repo "${{ github.repository }}" \
                --json labels --jq '.[0].labels[].name' 2>/dev/null || true)
            fi
            echo "PR labels: ${LABELS:-"(none)"}"
            if echo "$LABELS" | grep -Fxq "high priority"; then
              echo "max_parallel=14" >> $GITHUB_OUTPUT
              echo "High priority PR detected via API (/rerun-stage), setting max_parallel to 14"
            else
              echo "max_parallel=3" >> $GITHUB_OUTPUT
              echo "Using default max_parallel of 3 (/rerun-stage, no high priority label)"
            fi
          else
            echo "max_parallel=3" >> $GITHUB_OUTPUT
            echo "Using default max_parallel of 3"
          fi

      - name: Set B200 runner tag
        id: set-runner
        run: |
          # Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode
          # (target_stage skips wheel builds, so we can't use custom kernels)
          # Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
          sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}"
          target_stage="${{ inputs.target_stage }}"
          if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then
            echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT
          else
            echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT
          fi

      - name: Enable retry for CI
        id: set-retry
        run: |
          echo "enable_retry=true" >> $GITHUB_OUTPUT
          echo "Retry logic enabled for CI"

      - name: Set continue-on-error for full test runs
        id: set-continue-on-error
        run: |
          if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then
            echo "continue_on_error=true" >> $GITHUB_OUTPUT
            echo "Full test run or force flag detected, enabling continue-on-error to run all tests"
          else
            echo "continue_on_error=false" >> $GITHUB_OUTPUT
            echo "Filtered run, continue-on-error disabled"
          fi

      - name: Validate target_stage with kernel changes
        # Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
        if: inputs.target_stage && (steps.filter-api.outputs.sgl_kernel == 'true' || steps.filter.outputs.sgl_kernel == 'true')
        run: |
          echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes."
          echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files."
          echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds."
          echo ""
          echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes."
          echo ""
          echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels."
          echo "The /rerun-stage command skips the wheel build job, so the test would run against"
          echo "the wrong (PyPI) version of sgl-kernel instead of your changes."
          echo ""
          echo "To properly test your kernel changes, use one of these commands instead:"
          echo "  /tag-and-rerun-ci           - Re-run the full workflow including kernel builds"
          echo "  /rerun-ci                   - Re-run the full workflow"
          echo ""
          exit 1

      - name: Show filter results in summary (table)
        run: |
          {
            echo "## Change Detection"
            echo ""
            echo "| Component         | Changed |"
            echo "|-------------------|---------|"
            echo "| main_package      | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |"
            echo "| sgl_kernel (raw)  | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |"
            echo "| sgl_kernel (used) | ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |"
            echo "| jit_kernel        | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |"
            echo "| multimodal_gen    | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |"
            echo "| target_stage      | ${{ inputs.target_stage || '(none)' }} |"
            echo "| detection_method  | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |"
            echo "| max_parallel      | ${{ steps.set-parallel.outputs.max_parallel }} |"
            echo "| b200_runner       | ${{ steps.set-runner.outputs.b200_runner }} |"
            echo "| enable_retry      | ${{ steps.set-retry.outputs.enable_retry }} |"
            echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |"
          } >> $GITHUB_STEP_SUMMARY

  # =============================================== Wait Jobs for Sequential PR Execution ====================================================
  # These jobs poll GitHub API to wait for previous stages to complete.
  # For PR runs: wait jobs run and enforce sequential execution via polling.
  # For scheduled runs: wait jobs are skipped, enabling parallel execution for easier retry.

  wait-for-stage-a:
    needs: [check-changes, call-gate]
    # Only run for PRs (not scheduled) and when not targeting a specific stage
    # Skip if call-gate failed (stage-a jobs will be skipped, nothing to wait for)
    # !cancelled() ensures this job respects workflow cancellation from concurrency group
    if: |
      always() &&
      !cancelled() &&
      github.event_name == 'pull_request' &&
      !inputs.target_stage &&
      inputs.test_parallel_dispatch != true &&
      (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
      (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
    runs-on: ubuntu-latest
    outputs:
      stage_a_result: ${{ steps.wait.outputs.result }}
    steps:
      - uses: actions/checkout@v4

      - uses: ./.github/actions/check-maintenance

      - uses: ./.github/actions/wait-for-jobs
        id: wait
        with:
          stage-name: stage-a
          jobs: '["stage-a-test-1-gpu-small", "stage-a-test-cpu"]'
          max-wait-minutes: '240'

  wait-for-stage-b:
    needs: [check-changes, call-gate, wait-for-stage-a]
    # Only run for PRs (not scheduled) and when not targeting a specific stage
    # Skip if call-gate failed (stage-b jobs will be skipped, nothing to wait for)
    if: |
      always() &&
      !cancelled() &&
      github.event_name == 'pull_request' &&
      !inputs.target_stage &&
      inputs.test_parallel_dispatch != true &&
      (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
      (needs.wait-for-stage-a.result == 'success' || needs.wait-for-stage-a.result == 'skipped') &&
      (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
    runs-on: ubuntu-latest
    outputs:
      stage_b_result: ${{ steps.wait.outputs.result }}
    steps:
      - uses: actions/checkout@v4

      - uses: ./.github/actions/check-maintenance

      - uses: ./.github/actions/wait-for-jobs
        id: wait
        with:
          stage-name: stage-b
          jobs: |
            [
              {"prefix": "stage-b-test-1-gpu-small", "expected_count": 8},
              {"prefix": "stage-b-test-1-gpu-large", "expected_count": 14},
              {"prefix": "stage-b-test-2-gpu-large", "expected_count": 4},
              {"prefix": "stage-b-test-4-gpu-b200", "expected_count": 1}
            ]
          max-wait-minutes: '480'

  # =============================================== PR Gate ====================================================
  call-gate:
    needs: check-changes
    # Skip for scheduled runs (they run all tests) and when target_stage is specified
    if: |
      github.event_name != 'schedule' &&
      inputs.test_parallel_dispatch != true &&
      !inputs.target_stage &&
      (
        needs.check-changes.outputs.main_package == 'true' ||
        needs.check-changes.outputs.sgl_kernel == 'true' ||
        needs.check-changes.outputs.jit_kernel == 'true' ||
        needs.check-changes.outputs.multimodal_gen == 'true'
      )
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit

  # =============================================== sgl-kernel ====================================================

  sgl-kernel-build-wheels:
    needs: [check-changes, call-gate]
    # Skip for scheduled runs (they run stages independently) and when target_stage is set
    if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
    runs-on: x64-kernel-build-node
    timeout-minutes: 240
    strategy:
      matrix:
        include:
          - python-version: "3.10"
            cuda-version: "12.9"
          # Add back when CUDA 13.0 is supported on CI
          # - python-version: "3.10"
          #   cuda-version: "13.0"
    name: Build Wheel
    steps:
      - name: Cleanup
        run: |
          sudo rm -rf $GITHUB_WORKSPACE/* || true

      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-maintenance

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
        run: |
          cd sgl-kernel
          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
        env:
          USE_CCACHE: 1

      - name: Verify wheel artifacts
        run: |
          ls -alh sgl-kernel/dist
          ls -alh sgl-kernel/dist/*.whl

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
          path: sgl-kernel/dist/*
          if-no-files-found: error

  sgl-kernel-build-wheels-arm:
    needs: [check-changes, call-gate]
    # Skip for scheduled runs (they run stages independently) and when target_stage is set
    if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
    runs-on: arm-kernel-build-node
    timeout-minutes: 240
    strategy:
      matrix:
        include:
          - python-version: "3.10"
            cuda-version: "12.9"
    name: Build Wheel Arm
    steps:
      - name: Cleanup
        run: |
          if [ -d "$GITHUB_WORKSPACE" ]; then
            sudo rm -rf "$GITHUB_WORKSPACE"/* || true
          else
            echo "$GITHUB_WORKSPACE does not exist, nothing to clean"
          fi

      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-maintenance

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
        run: |
          cd sgl-kernel
          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
        env:
          USE_CCACHE: 1

      - name: Verify wheel artifacts
        run: |
          ls -alh sgl-kernel/dist
          ls -alh sgl-kernel/dist/*.whl

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
          path: sgl-kernel/dist/*
          if-no-files-found: error

  call-sgl-kernel-tests:
    needs: [check-changes, call-gate, sgl-kernel-build-wheels]
    if: |
      github.event_name != 'schedule' &&
      inputs.test_parallel_dispatch != true &&
      !inputs.target_stage &&
      needs.check-changes.outputs.sgl_kernel == 'true'
    uses: ./.github/workflows/pr-test-sgl-kernel.yml
    with:
      sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
      b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
      pr_head_sha: ${{ inputs.pr_head_sha || '' }}
      git_ref: ${{ inputs.git_ref || '' }}
      skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
    secrets: inherit

  # =============================================== jit-kernel ====================================================

  call-jit-kernel-tests:
    needs: [check-changes, call-gate]
    if: needs.check-changes.outputs.jit_kernel == 'true'
    uses: ./.github/workflows/pr-test-jit-kernel.yml
    with:
      jit_kernel: ${{ needs.check-changes.outputs.jit_kernel }}
      pr_head_sha: ${{ inputs.pr_head_sha || '' }}
      git_ref: ${{ inputs.git_ref || '' }}
      target_stage: ${{ inputs.target_stage || '' }}
      test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }}
      skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
    secrets: inherit

  # =============================================== primary ====================================================

  # Runs on 5090 (32GB, SM120)
  stage-a-test-1-gpu-small:
    needs: [check-changes, call-gate, sgl-kernel-build-wheels]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-a-test-1-gpu-small') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 1-gpu-5090
    timeout-minutes: 240
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 10
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test/
          python3 run_suite.py --hw cuda --suite stage-a-test-1-gpu-small $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  stage-a-test-cpu:
    needs: [check-changes, call-gate]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-a-test-cpu') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          (needs.check-changes.outputs.main_package == 'true')
        )
      )
    runs-on: ubuntu-latest
    timeout-minutes: 240
    steps:
      - name: Free disk space
        run: |
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
          df -h

      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install uv
        uses: astral-sh/setup-uv@v5

      # uv pip targets a venv by default; setup-python has no venv — install into that interpreter (see UV_SYSTEM_PYTHON in https://docs.astral.sh/uv/guides/integration/github/)
      - name: Install dependencies
        timeout-minutes: 20
        env:
          UV_SYSTEM_PYTHON: "1"
        run: |
          uv pip install -e "python[dev]" --index-strategy unsafe-best-match --prerelease allow

      - name: Run test
        timeout-minutes: 10
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test/
          python3 run_suite.py --hw cpu --suite stage-a-test-cpu $CONTINUE_ON_ERROR_FLAG

  # Runs on 5090 (32GB, SM120)
  stage-b-test-1-gpu-small:
    needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-b-test-1-gpu-small') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 1-gpu-5090
    timeout-minutes: 240
    strategy:
      fail-fast: false
      max-parallel: 8
      matrix:
        partition: [0, 1, 2, 3, 4, 5, 6, 7]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          source /etc/profile.d/sglang-ci.sh
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
          git clone https://github.com/merrymercy/human-eval.git
          cd human-eval
          pip install -e . --no-build-isolation

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          source /etc/profile.d/sglang-ci.sh
          cd test/
          python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-small --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.partition }}

  # Runs on H100 (80GB, SM90) - tests that don't pass on 5090 (FA3, FP8, high VRAM, etc.)
  stage-b-test-1-gpu-large:
    needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-b-test-1-gpu-large') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    strategy:
      fail-fast: false
      max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
      matrix:
        partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test/
          python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.partition }}

  stage-b-test-2-gpu-large:
    needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-b-test-2-gpu-large') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 2-gpu-h100
    timeout-minutes: 240
    strategy:
      fail-fast: false
      matrix:
        partition: [0, 1, 2, 3]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
          git clone https://github.com/merrymercy/human-eval.git
          cd human-eval
          pip install -e . --no-build-isolation

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test/
          python3 run_suite.py --hw cuda --suite stage-b-test-2-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.partition }}

  stage-b-test-4-gpu-b200:
    needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-b-test-4-gpu-b200') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: ${{ needs.check-changes.outputs.b200_runner }}
    timeout-minutes: 240
    strategy:
      fail-fast: false

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v6
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG

      - name: Run FA4 jit_kernel tests (SM100+)
        timeout-minutes: 10
        run: |
          python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  call-multimodal-gen-tests:
    needs: [check-changes, call-gate, sgl-kernel-build-wheels]
    if: |
      always() &&
      !cancelled() &&
      (
        inputs.target_stage == 'multimodal-gen-test-1-gpu' ||
        inputs.target_stage == 'multimodal-gen-test-2-gpu' ||
        inputs.target_stage == 'multimodal-gen-unit-test' ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          needs.check-changes.outputs.multimodal_gen == 'true'
        )
      )
    uses: ./.github/workflows/pr-test-multimodal-gen.yml
    with:
      multimodal_gen: ${{ needs.check-changes.outputs.multimodal_gen }}
      sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
      continue_on_error: ${{ needs.check-changes.outputs.continue_on_error }}
      pr_head_sha: ${{ inputs.pr_head_sha || '' }}
      git_ref: ${{ inputs.git_ref || '' }}
      target_stage: ${{ inputs.target_stage || '' }}
      test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }}
      caller_needs_failure: ${{ (needs.call-gate.result == 'failure' || needs.sgl-kernel-build-wheels.result == 'failure' || needs.check-changes.result == 'failure') && 'true' || 'false' }}
      skip_stage_health_check: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
    secrets: inherit

  stage-c-test-4-gpu-h100:
    needs: [check-changes, call-gate, wait-for-stage-b]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-c-test-4-gpu-h100') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 4-gpu-h100
    timeout-minutes: 240
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1, 2]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  stage-c-test-8-gpu-h200:
    needs: [check-changes, call-gate, wait-for-stage-b]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-c-test-8-gpu-h200') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 8-gpu-h200
    timeout-minutes: 240
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1, 2, 3]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Warmup DeepGEMM JIT Compilation
        timeout-minutes: 25
        run: |
          python3 scripts/ci/cuda/warmup_deep_gemm.py \
            deepseek-ai/DeepSeek-V3-0324:8 \
            deepseek-ai/DeepSeek-V3.2-Exp:8

      - name: Warmup Server CUDA Graphs
        timeout-minutes: 25
        run: |
          python3 scripts/ci/cuda/warmup_server.py \
            deepseek-ai/DeepSeek-V3-0324:8 \
            inclusionAI/Ring-2.5-1T:8

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  stage-c-test-8-gpu-h20:
    needs: [check-changes, call-gate, wait-for-stage-b]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-c-test-8-gpu-h20') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 8-gpu-h20
    timeout-minutes: 240
    env:
      SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  stage-c-test-deepep-4-gpu-h100:
    needs: [check-changes, call-gate, wait-for-stage-b]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-c-test-deepep-4-gpu-h100') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 4-gpu-h100
    timeout-minutes: 240
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh

      - name: Warmup DeepGEMM JIT Compilation
        timeout-minutes: 25
        run: |
          python3 scripts/ci/cuda/warmup_deep_gemm.py \
            lmsys/sglang-ci-dsv3-test:4

      - name: Warmup Server CUDA Graphs
        timeout-minutes: 25
        run: |
          python3 scripts/ci/cuda/warmup_server.py \
            lmsys/sglang-ci-dsv3-test:4

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu-h100 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  stage-c-test-deepep-8-gpu-h200:
    needs: [check-changes, call-gate, wait-for-stage-b]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: 8-gpu-h200
    timeout-minutes: 240
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh

      - name: Warmup DeepGEMM JIT Compilation
        timeout-minutes: 25
        run: |
          python3 scripts/ci/cuda/warmup_deep_gemm.py \
            deepseek-ai/DeepSeek-V3-0324:8 \
            deepseek-ai/DeepSeek-V3.2-Exp:8

      - name: Warmup Server CUDA Graphs
        timeout-minutes: 25
        run: |
          python3 scripts/ci/cuda/warmup_server.py \
            deepseek-ai/DeepSeek-V3-0324:8

      - name: Run test
        timeout-minutes: 45
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

  stage-c-test-4-gpu-b200:
    needs: [check-changes, call-gate, wait-for-stage-b]
    if: |
      always() &&
      (
        (inputs.target_stage == 'stage-c-test-4-gpu-b200') ||
        (
          !inputs.target_stage &&
          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    runs-on: ${{ needs.check-changes.outputs.b200_runner }}
    timeout-minutes: 240
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1, 2, 3]

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v6
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        env:
          CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  # NOTE: GB200 stage temporarily disabled — no company-owned GB200 runner available yet.
  # Re-enable when a 4-gpu-gb200 runner is provisioned.
  # stage-c-test-4-gpu-gb200:
  #   needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels-arm]
  #   if: |
  #     always() &&
  #     (
  #       (inputs.target_stage == 'stage-c-test-4-gpu-gb200') ||
  #       (
  #         !inputs.target_stage &&
  #         ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
  #         ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
  #       )
  #     )
  #   runs-on: 4-gpu-gb200
  #   timeout-minutes: 240
  #   strategy:
  #     fail-fast: false
  #   steps:
  #     - uses: ./.github/actions/check-maintenance
  #       with:
  #         github-token: ${{ github.token }}
  #
  #     - name: Checkout code
  #       uses: actions/checkout@v4
  #       with:
  #         ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
  #
  #     - name: Download artifacts
  #       if: needs.check-changes.outputs.sgl_kernel == 'true'
  #       uses: actions/download-artifact@v4
  #       with:
  #         path: sgl-kernel/dist/
  #         merge-multiple: true
  #         pattern: wheel-python3.10-cuda12.9-aarch64
  #
  #     - name: Install dependencies
  #       timeout-minutes: 20
  #       run: |
  #         CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} GRACE_BLACKWELL=1 bash scripts/ci/cuda/ci_install_deepep.sh
  #
  #     - name: Run test
  #       timeout-minutes: 45
  #       env:
  #         CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
  #       run: |
  #         cd test
  #         python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG
  #
  #     - uses: ./.github/actions/upload-cuda-coredumps
  #       if: always()

  pr-test-finish:
    needs:
      [
        call-gate,
        check-changes,

        sgl-kernel-build-wheels,
        sgl-kernel-build-wheels-arm,
        call-sgl-kernel-tests,

        wait-for-stage-a,
        wait-for-stage-b,

        call-jit-kernel-tests,

        call-multimodal-gen-tests,

        stage-a-test-1-gpu-small,
        stage-a-test-cpu,
        stage-b-test-1-gpu-small,
        stage-b-test-1-gpu-large,
        stage-b-test-2-gpu-large,
        stage-b-test-4-gpu-b200,
        stage-c-test-4-gpu-h100,
        stage-c-test-8-gpu-h20,
        stage-c-test-8-gpu-h200,
        stage-c-test-deepep-4-gpu-h100,
        stage-c-test-deepep-8-gpu-h200,
        stage-c-test-4-gpu-b200,
        # stage-c-test-4-gpu-gb200,  # Temporarily disabled — no GB200 runner
      ]
    if: always()
    runs-on: ubuntu-latest
    steps:
      - name: Check all dependent job statuses
        run: |
          # Convert the 'needs' context to a JSON string
          json_needs='${{ toJson(needs) }}'

          # Get a list of all job names from the JSON keys
          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')

          for job in $job_names; do
            # For each job, extract its result
            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')

            # Print the job name and its result
            echo "$job: $result"

            # Check for failure or cancellation and exit if found
            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
              echo "The above jobs failed."
              exit 1
            fi
          done
          # If the loop completes, all jobs were successful
          echo "All jobs completed successfully"
          exit 0

pr-test-amd matrix .github/workflows/pr-test-amd.yml

Triggers

push, pull_request, workflow_dispatch, workflow_call

Runs on

ubuntu-latest, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ${{matrix.runner}}, ubuntu-latest

Jobs

call-gate, check-changes, sgl-kernel-unit-test-amd, sgl-kernel-unit-test-2-gpu-amd, stage-a-test-1-gpu-small-amd, jit-kernel-unit-test-amd, stage-b-test-1-gpu-small-amd, stage-b-test-1-gpu-small-amd-nondeterministic, stage-b-test-1-gpu-small-amd-mi35x, stage-b-test-1-gpu-large-amd, stage-b-test-2-gpu-large-amd, multimodal-gen-test-1-gpu-amd, multimodal-gen-test-2-gpu-amd, stage-c-test-4-gpu-amd, stage-c-test-large-8-gpu-amd, stage-c-test-large-8-gpu-amd-mi35x, stage-b-test-large-8-gpu-35x-disaggregation-amd, pr-test-amd-finish

Matrix

part, runner→ 0, 1, 10, 11, 12, 13, 2, 3, 4, 5, 6, 7, 8, 9, linux-mi325-1gpu-sglang, linux-mi325-2gpu-sglang, linux-mi325-4gpu-sglang, linux-mi325-8gpu-sglang, linux-mi35x-gpu-1, linux-mi35x-gpu-8, linux-mi35x-gpu-8.fabric

Actions

dorny/paths-filter

Commands

# Run all tests for workflow_call (when ref input is provided) # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then echo "run_all_tests=true" >> $GITHUB_OUTPUT echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" else echo "run_all_tests=false" >> $GITHUB_OUTPUT echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" fi
bash scripts/ci/amd/ensure_vram_clear.sh rocm
bash scripts/ci/amd/amd_ci_start_container.sh
bash scripts/ci/amd/amd_ci_install_dependency.sh
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_topk.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_sigmoid.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_torch_defaults_reset.py
bash scripts/ci/amd/ensure_vram_clear.sh rocm
bash scripts/ci/amd/amd_ci_start_container.sh
bash scripts/ci/amd/amd_ci_install_dependency.sh

View raw YAML

name: PR Test (AMD)
# Dynamic run-name for /rerun-stage commands to enable URL lookup
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
run-name: ${{ (inputs.target_stage || inputs.target_stage_select) && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage || inputs.target_stage_select, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage || inputs.target_stage_select)) || '' }}

on:
  push:
    branches: [ main ]
    paths:
      - "python/**"
      - "scripts/ci/**"
      - "test/**"
      - "sgl-kernel/**"
      - ".github/workflows/pr-test-amd.yml"
      - "docker/rocm.Dockerfile"
  pull_request:
    branches: [ main ]
    paths:
      - "python/**"
      - "scripts/ci/**"
      - "test/**"
      - "sgl-kernel/**"
      - ".github/workflows/pr-test-amd.yml"
      - "docker/rocm.Dockerfile"
  workflow_dispatch:
    inputs:
      target_stage_select:
        description: "Select a stage to run from dropdown (leave empty for auto-detect)"
        required: false
        type: choice
        default: ''
        options:
          - ''
          - sgl-kernel-unit-test-amd
          - sgl-kernel-unit-test-2-gpu-amd
          - stage-a-test-1-gpu-small-amd
          - jit-kernel-unit-test-amd
          - stage-b-test-1-gpu-small-amd
          - stage-b-test-1-gpu-small-amd-nondeterministic
          - stage-b-test-1-gpu-small-amd-mi35x
          - stage-b-test-1-gpu-large-amd
          - stage-b-test-2-gpu-large-amd
          - multimodal-gen-test-1-gpu-amd
          - multimodal-gen-test-2-gpu-amd
          - stage-c-test-4-gpu-amd
          - stage-c-test-large-8-gpu-amd
          - stage-c-test-large-8-gpu-amd-mi35x
          - stage-b-test-large-8-gpu-disaggregation-amd
      target_stage:
        description: "Or type comma-separated stage names (overrides dropdown if non-empty)"
        required: false
        type: string
        default: ""
      pr_head_sha:
        description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
        required: false
        type: string
        default: ""
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: false
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      run_all_tests:
        description: "Run all tests (for releasing or testing purpose)"
        required: false
        type: boolean
        default: false
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: false

env:
  AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}

concurrency:
  # When called via workflow_call with run_all_tests=true, use a unique group per run to
  # avoid collisions with direct push/PR triggers. We use run_all_tests (not github.event_name)
  # to detect this, because github.event_name inherits from the caller in workflow_call.
  group: pr-test-amd-${{ inputs.run_all_tests && format('full-{0}', github.run_id) || inputs.pr_head_sha || inputs.ref || github.ref }}
  cancel-in-progress: ${{ !inputs.run_all_tests && github.event_name != 'workflow_call' }}

jobs:
  call-gate:
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit
  check-changes:
    needs: [call-gate]
    runs-on: ubuntu-latest
    outputs:
      main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
      sgl_kernel: ${{ steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}
      jit_kernel: ${{ steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
      multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Determine run mode
        id: run-mode
        run: |
          # Run all tests for workflow_call (when ref input is provided)
          # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
          if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
            echo "run_all_tests=true" >> $GITHUB_OUTPUT
            echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
          else
            echo "run_all_tests=false" >> $GITHUB_OUTPUT
            echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
          fi

      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        if: steps.run-mode.outputs.run_all_tests != 'true'
        with:
          filters: |
            main_package:
              - "python/sglang/!(multimodal_gen)/**/!(*.md)"
              - "python/pyproject_rocm.toml"
              - "python/pyproject_other.toml"
              - "scripts/ci/amd/*"
              - "scripts/ci/utils/*"
              - "test/**/!(*.md)"
              - ".github/workflows/pr-test-amd.yml"
            sgl_kernel:
              - "sgl-kernel/**/*.!(md|txt)"
              - ".github/workflows/pr-test-amd.yml"
            jit_kernel:
              - "python/sglang/jit_kernel/**"
              - ".github/workflows/pr-test-amd.yml"
            multimodal_gen:
              - "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
              - "python/sglang/cli/**"
              - "python/sglang/jit_kernel/diffusion/**"
              - "python/sglang/jit_kernel/tests/diffusion/**"
              - "python/sglang/jit_kernel/benchmark/diffusion/**"
              - "python/pyproject_rocm.toml"
              - "python/pyproject_other.toml"

  # =============================================== sgl-kernel ====================================================
  sgl-kernel-unit-test-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.sgl_kernel == 'true'
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 14
        run: |
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
          docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_topk.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_sigmoid.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_torch_defaults_reset.py

  sgl-kernel-unit-test-2-gpu-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-2-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.sgl_kernel == 'true'
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-2gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 20
        run: |
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_deterministic_custom_allreduce.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_nccl_allreduce_determinism.py

  # =============================================== primary ====================================================

  stage-a-test-1-gpu-small-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-a-test-1-gpu-small-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 10
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-a-test-1-gpu-small-amd ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  jit-kernel-unit-test-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',jit-kernel-unit-test-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.jit_kernel == 'true'
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run JIT kernel unit tests
        timeout-minutes: 10
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout" python3 -m pytest -q python/sglang/jit_kernel/tests/test_store_cache.py

  stage-b-test-1-gpu-small-amd:
    needs: [check-changes, stage-a-test-1-gpu-small-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
        part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 14 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-1-gpu-small-amd-nondeterministic:
    needs: [check-changes, stage-a-test-1-gpu-small-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-nondeterministic,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-nondeterministic --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-1-gpu-small-amd-mi35x:
    needs: [check-changes, stage-a-test-1-gpu-small-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-mi35x,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi35x-gpu-1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-mi35x ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-1-gpu-large-amd:
    needs: [check-changes, stage-a-test-1-gpu-small-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-large-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
        part: [0, 1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-2-gpu-large-amd:
    needs: [check-changes, stage-a-test-1-gpu-small-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-2-gpu-large-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-2gpu-sglang]
        part: [0, 1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-2-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  multimodal-gen-test-1-gpu-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-1-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.multimodal_gen == 'true'
        )
      )
    strategy:
      fail-fast: false
      max-parallel: 1  # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
      matrix:
        runner: [linux-mi325-1gpu-sglang]
        part: [0, 1, 2, 3]  # 2 partitions: 11 tests ÷ 2 = ~5-6 tests each
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion

      - name: Setup kernel caches
        run: |
          # Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
          # This directory persists across container restarts on the self-hosted runner
          docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub

          # Clear pre-built AITER kernels from Docker image to avoid segfaults
          # The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
          echo "Clearing pre-built AITER kernels from Docker image..."
          docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
          docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
          echo "AITER kernels cleared - will be rebuilt on first use"

          # Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
          # This tells the test cleanup code to NOT delete downloaded models
          if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
            docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
            echo "Created .persistent_cache marker - HF cache will persist"
          else
            echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
          fi

          # Check MIOpen cache (VAE convolution kernels)
          miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
          echo "Found ${miopen_files} MIOpen cache files"

      - name: Diagnose HF cache and system resources
        run: |
          echo "=== System Memory Status ==="
          free -h
          echo ""
          echo "=== Disk Space ==="
          df -h /home/runner/sgl-data 2>/dev/null || df -h
          echo ""
          echo "=== HF Cache Directory Structure ==="
          docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
          docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
          echo ""
          echo "=== Checking for cached diffusion models (1-GPU tests) ==="
          # Models used in 1-GPU tests: Wan2.1-T2V-1.3B, HunyuanVideo, Qwen-Image, FLUX.1, FLUX.2
          for model in "Wan-AI--Wan2.1-T2V-1.3B-Diffusers" "tencent--HunyuanVideo" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev" "black-forest-labs--FLUX.2-dev"; do
            cache_path="/sgl-data/hf-cache/hub/models--${model}"
            if docker exec ci_sglang test -d "$cache_path"; then
              size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
              echo "✓ CACHED: $model ($size)"
            else
              echo "✗ NOT CACHED: $model"
            fi
          done
          echo ""
          echo "=== GPU Memory Status ==="
          docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"

      - name: Run diffusion server tests (1-GPU)
        timeout-minutes: 90
        run: |
          # AMD CI: All 1-GPU tests except FLUX.2 (FLUX.1 covers same code path)
          # Tests: T2V, T2I, I2V, LoRA
          #
          # HF download env vars:
          # - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
          # - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
          docker exec \
            -e SGLANG_E2E_TOLERANCE=0.3 \
            -e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
            -e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
            -e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
            -e AITER_JIT_DIR=/sgl-data/aiter-kernels \
            -e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
            -e HF_HUB_ENABLE_HF_TRANSFER=1 \
            -e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
            -w /sglang-checkout/python \
            ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
              --suite 1-gpu \
              --partition-id ${{ matrix.part }} \
              --total-partitions 4 \
              -k "not flux_2"

          # Post-test diagnostics
          echo "=== Post-test System Memory Status ==="
          free -h

  multimodal-gen-test-2-gpu-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-2-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.multimodal_gen == 'true'
        )
      )
    strategy:
      fail-fast: false
      max-parallel: 1  # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
      matrix:
        runner: [linux-mi325-2gpu-sglang]
        part: [0, 1]  # 2 partitions: 9 tests ÷ 2 = ~4-5 tests each
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion

      - name: Setup kernel caches
        run: |
          # Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
          docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub

          # Clear pre-built AITER kernels from Docker image to avoid segfaults
          # The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
          echo "Clearing pre-built AITER kernels from Docker image..."
          docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
          docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
          echo "AITER kernels cleared - will be rebuilt on first use"

          # Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
          # This tells the test cleanup code to NOT delete downloaded models
          if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
            docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
            echo "Created .persistent_cache marker - HF cache will persist"
          else
            echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
          fi

          # Check MIOpen cache (VAE convolution kernels)
          miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
          echo "Found ${miopen_files} MIOpen cache files"

      - name: Diagnose HF cache and system resources
        run: |
          echo "=== System Memory Status ==="
          free -h
          echo ""
          echo "=== Disk Space ==="
          df -h /home/runner/sgl-data 2>/dev/null || df -h
          echo ""
          echo "=== HF Cache Directory Structure ==="
          docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
          docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
          echo ""
          echo "=== Checking for cached diffusion models (2-GPU tests) ==="
          # Models used in 2-GPU tests: Wan2.2-T2V-A14B, Wan2.1-T2V-14B, Qwen-Image, FLUX.1
          for model in "Wan-AI--Wan2.2-T2V-A14B-Diffusers" "Wan-AI--Wan2.1-T2V-14B-Diffusers" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev"; do
            cache_path="/sgl-data/hf-cache/hub/models--${model}"
            if docker exec ci_sglang test -d "$cache_path"; then
              size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
              echo "✓ CACHED: $model ($size)"
            else
              echo "✗ NOT CACHED: $model"
            fi
          done
          echo ""
          echo "=== GPU Memory Status ==="
          docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"

      - name: Run diffusion server tests (2-GPU)
        timeout-minutes: 80
        run: |
          # AMD CI: All 2-GPU tests including LoRA
          # Tests: T2V, T2I, I2V, LoRA
          #
          # HF download env vars:
          # - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
          # - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
          docker exec \
            -e SGLANG_E2E_TOLERANCE=0.3 \
            -e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
            -e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
            -e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
            -e AITER_JIT_DIR=/sgl-data/aiter-kernels \
            -e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
            -e HF_HUB_ENABLE_HF_TRANSFER=1 \
            -e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
            -w /sglang-checkout/python \
            ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
              --suite 2-gpu \
              --partition-id ${{ matrix.part }} \
              --total-partitions 2

          # Post-test diagnostics
          echo "=== Post-test System Memory Status ==="
          free -h


  stage-c-test-4-gpu-amd:
    needs: [check-changes, call-gate, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-4-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-4gpu-sglang]
        part: [0]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh \
            -e NCCL_CUMEM_ENABLE=0 \
            -e NCCL_NVLS_ENABLE=0 \
            -e RCCL_MSCCL_ENABLE=0 \
            -e SGLANG_USE_ROCM700A=1 \
            -w "/sglang-checkout/test" \
            python3 run_suite.py \
              --hw amd \
              --suite stage-c-test-4-gpu-amd \
              --auto-partition-id ${{ matrix.part }} \
              --auto-partition-size 1 \
              --timeout-per-file 1800 \
              --enable-retry \
              --max-attempts 2 \
              --retry-wait-seconds 120 \
              --retry-timeout-increase 0 \
              ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-c-test-large-8-gpu-amd:
    needs: [check-changes, call-gate, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    env:
      RUNNER_LABELS: linux-mi325-8gpu-sglang
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-8gpu-sglang]
        part: [0, 1, 2]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Test RCCL multi-GPU communication
        timeout-minutes: 5
        run: |
          echo "Testing RCCL multi-GPU communication with debug info..."
          docker exec ci_sglang bash -c "cd /sglang-checkout && NCCL_DEBUG=INFO RCCL_DEBUG=INFO torchrun --nproc_per_node=8 scripts/ci/amd/test_rccl_multi_gpu.py"

      - name: Run test
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-c-test-large-8-gpu-amd-mi35x:
    needs: [check-changes, call-gate, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd-mi35x,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi35x-gpu-8]
        part: [0, 1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd-mi35x --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  # =============================================== Disaggregation ====================================================
  stage-b-test-large-8-gpu-35x-disaggregation-amd:
    needs: [check-changes, stage-a-test-1-gpu-small-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-large-8-gpu-disaggregation-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi35x-gpu-8.fabric]

    runs-on: ${{matrix.runner}}

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Check Host RDMA Environment
        id: rdma_detect
        run: |
          set +e
          echo "=== Checking Host RDMA Environment ==="

          echo ""
          echo "=== 1. Ionic driver library check ==="
          ls -l /usr/lib/x86_64-linux-gnu/libibverbs/libionic* 2>/dev/null || echo "libionic not found in standard path"

          echo ""
          echo "=== 2. Infiniband devices ==="
          ls -la /dev/infiniband/ 2>/dev/null || echo "/dev/infiniband not found"
          ls -la /sys/class/infiniband/ 2>/dev/null || echo "/sys/class/infiniband not found"

          echo ""
          echo "=== 3. ibv_devinfo ==="
          which ibv_devinfo 2>/dev/null && ibv_devinfo 2>&1 || echo "ibv_devinfo not available"

          echo ""
          echo "=== 4. Kernel modules ==="
          lsmod 2>/dev/null | grep -E "ib_|rdma|ionic" || echo "No RDMA kernel modules loaded"

          echo ""
          echo "=== 5. Detect RDMA Devices for test environment ==="
          if [ -d "/sys/class/infiniband" ]; then
            RDMA_DEVS=$(ls /sys/class/infiniband | paste -sd "," -)
            echo "Detected RDMA Devices: $RDMA_DEVS"
            echo "SGLANG_TEST_RDMA_DEVICE=$RDMA_DEVS" >> $GITHUB_ENV
          else
            echo "No RDMA devices found in /sys/class/infiniband"
            echo "SGLANG_TEST_RDMA_DEVICE=" >> $GITHUB_ENV
          fi

          echo ""
          echo "=== Host RDMA Check Complete ==="

      - name: Start Special Container
        run: bash scripts/ci/amd/amd_ci_start_container_disagg.sh
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Verify RDMA in Container
        run: |
          docker exec -u root ci_sglang bash -c '
            echo "=== Container RDMA Verification ==="
            echo "Device nodes:"
            ls -la /dev/infiniband/
            echo ""
            echo "Provider libraries:"
            ls /usr/lib/x86_64-linux-gnu/libibverbs/ | grep -E "ionic|mlx" || echo "No Ionic/Mellanox providers"
            echo ""
            echo "HCA devices:"
            HCA_COUNT=$(ibv_devinfo -list 2>&1 | grep -oE "^[0-9]+ HCAs? found" | grep -oE "^[0-9]+" || echo "0")
            ibv_devinfo -list
            if [ "$HCA_COUNT" -gt 0 ]; then
              echo ""
              echo "=== SUCCESS: RDMA setup complete. Found $HCA_COUNT HCA(s) ==="
            else
              echo ""
              echo "=== WARNING: No HCAs detected. RDMA tests may fail ==="
            fi
          '

      - name: Run Aiter Op Test (RMSNorm)
        timeout-minutes: 10
        run: |
          echo "Running pre-check: test_rmsnorm2d.py"
          docker exec \
            -e MAX_JOBS=192 \
            ci_sglang \
            python /sgl-workspace/aiter/op_tests/test_rmsnorm2d.py

      - name: Run test_disaggregation
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh \
            -e SGLANG_TEST_RDMA_DEVICE="${{ env.SGLANG_TEST_RDMA_DEVICE }}" \
            -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-8-gpu-35x-disaggregation-amd --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  pr-test-amd-finish:
    needs:
      [
        call-gate,
        check-changes,

        sgl-kernel-unit-test-amd,
        sgl-kernel-unit-test-2-gpu-amd,
        multimodal-gen-test-1-gpu-amd,
        multimodal-gen-test-2-gpu-amd,

        stage-a-test-1-gpu-small-amd,
        jit-kernel-unit-test-amd,
        stage-b-test-1-gpu-small-amd,
        stage-b-test-1-gpu-small-amd-nondeterministic,
        stage-b-test-1-gpu-small-amd-mi35x,
        stage-b-test-1-gpu-large-amd,
        stage-b-test-2-gpu-large-amd,
        stage-b-test-large-8-gpu-35x-disaggregation-amd,
        stage-c-test-4-gpu-amd,
        stage-c-test-large-8-gpu-amd,
        stage-c-test-large-8-gpu-amd-mi35x,
      ]
    if: always()
    runs-on: ubuntu-latest
    steps:
      - name: Check all dependent job statuses
        run: |
          # Convert the 'needs' context to a JSON string
          json_needs='${{ toJson(needs) }}'

          # Get a list of all job names from the JSON keys
          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')

          for job in $job_names; do
            # For each job, extract its result
            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')

            # Print the job name and its result
            echo "$job: $result"

            # Check for failure or cancellation and exit if found
            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
              echo "The above jobs failed."
              exit 1
            fi
          done

          # If the loop completes, all jobs were successful
          echo "All jobs completed successfully"
          exit 0

pr-test-amd-rocm720 matrix .github/workflows/pr-test-amd-rocm720.yml

Triggers

schedule, workflow_dispatch, workflow_call

Runs on

Jobs

Matrix

Actions

dorny/paths-filter

Commands

# Run all tests for workflow_call (when ref input is provided) # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then echo "run_all_tests=true" >> $GITHUB_OUTPUT echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" else echo "run_all_tests=false" >> $GITHUB_OUTPUT echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" fi
bash scripts/ci/amd/ensure_vram_clear.sh rocm
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
bash scripts/ci/amd/amd_ci_install_dependency.sh
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_topk.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_sigmoid.py docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_torch_defaults_reset.py
bash scripts/ci/amd/ensure_vram_clear.sh rocm
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
bash scripts/ci/amd/amd_ci_install_dependency.sh

View raw YAML

name: PR Test ROCm 7.2 (AMD)
# Dynamic run-name for /rerun-stage commands to enable URL lookup
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
run-name: ${{ (inputs.target_stage || inputs.target_stage_select) && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage || inputs.target_stage_select, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage || inputs.target_stage_select)) || '' }}

on:
  schedule:
    - cron: '30 17 * * *'
  # push:
  #   branches: [ main ]
  #   paths:
  #     - "python/**"
  #     - "scripts/ci/**"
  #     - "test/**"
  #     - "sgl-kernel/**"
  #     - ".github/workflows/pr-test-amd-rocm720.yml"
  #     - "docker/rocm.Dockerfile"
  # pull_request:
  #   branches: [ main ]
  #   paths:
  #     - "python/**"
  #     - "scripts/ci/**"
  #     - "test/**"
  #     - "sgl-kernel/**"
  #     - ".github/workflows/pr-test-amd-rocm720.yml"
  #     - "docker/rocm.Dockerfile"
  workflow_dispatch:
    inputs:
      target_stage_select:
        description: "Select a stage to run from dropdown (leave empty for auto-detect)"
        required: false
        type: choice
        default: ''
        options:
          - ''
          - sgl-kernel-unit-test-amd
          - sgl-kernel-unit-test-2-gpu-amd
          - stage-a-test-1-gpu-small-amd
          - jit-kernel-unit-test-amd
          - stage-b-test-1-gpu-small-amd
          - stage-b-test-1-gpu-small-amd-nondeterministic
          - stage-b-test-1-gpu-small-amd-mi35x
          - stage-b-test-1-gpu-large-amd
          - stage-b-test-2-gpu-large-amd
          - multimodal-gen-test-1-gpu-amd
          - multimodal-gen-test-2-gpu-amd
          - stage-c-test-large-8-gpu-amd
          - stage-c-test-large-8-gpu-amd-mi35x
          - stage-b-test-large-8-gpu-disaggregation-amd
          - stage-c-test-4-gpu-amd
      target_stage:
        description: "Or type comma-separated stage names (overrides dropdown if non-empty)"
        required: false
        type: string
        default: ""
      pr_head_sha:
        description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
        required: false
        type: string
        default: ""
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: true
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      run_all_tests:
        description: "Run all tests (for releasing or testing purpose)"
        required: false
        type: boolean
        default: false
      aiter_ref:
        description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
        required: false
        type: string
        default: ''
      continue_on_error:
        description: 'Continue on error (do not fail the workflow on test failures)'
        required: false
        type: boolean
        default: true

env:
  AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}

concurrency:
  # When called via workflow_call with run_all_tests=true, use a unique group per run to
  # avoid collisions with direct schedule/workflow_dispatch triggers. We use run_all_tests
  # (not github.event_name) to detect this, because github.event_name inherits from the caller.
  group: pr-test-amd-rocm720-${{ inputs.run_all_tests && format('full-{0}', github.run_id) || inputs.pr_head_sha || inputs.ref || github.ref }}
  cancel-in-progress: ${{ !inputs.run_all_tests && github.event_name != 'workflow_call' }}

jobs:
  call-gate:
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit
  check-changes:
    needs: [call-gate]
    runs-on: ubuntu-latest
    outputs:
      main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
      sgl_kernel: ${{ steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}
      jit_kernel: ${{ steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
      multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Determine run mode
        id: run-mode
        run: |
          # Run all tests for workflow_call (when ref input is provided)
          # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
          if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
            echo "run_all_tests=true" >> $GITHUB_OUTPUT
            echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
          else
            echo "run_all_tests=false" >> $GITHUB_OUTPUT
            echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
          fi

      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        if: steps.run-mode.outputs.run_all_tests != 'true'
        with:
          filters: |
            main_package:
              - "python/sglang/!(multimodal_gen)/**/!(*.md)"
              - "python/pyproject_rocm.toml"
              - "python/pyproject_other.toml"
              - "scripts/ci/amd/*"
              - "scripts/ci/utils/*"
              - "test/**/!(*.md)"
              - ".github/workflows/pr-test-amd-rocm720.yml"
            sgl_kernel:
              - "sgl-kernel/**/*.!(md|txt)"
              - ".github/workflows/pr-test-amd-rocm720.yml"
            jit_kernel:
              - "python/sglang/jit_kernel/**"
              - ".github/workflows/pr-test-amd-rocm720.yml"
            multimodal_gen:
              - "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
              - "python/sglang/cli/**"
              - "python/sglang/jit_kernel/diffusion/**"
              - "python/sglang/jit_kernel/tests/diffusion/**"
              - "python/sglang/jit_kernel/benchmark/diffusion/**"
              - "python/pyproject_rocm.toml"
              - "python/pyproject_other.toml"

  # =============================================== sgl-kernel ====================================================
  sgl-kernel-unit-test-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.sgl_kernel == 'true'
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 14
        run: |
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
          docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_topk.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_sigmoid.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_torch_defaults_reset.py

  sgl-kernel-unit-test-2-gpu-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-2-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.sgl_kernel == 'true'
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-2gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 20
        run: |
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_deterministic_custom_allreduce.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_nccl_allreduce_determinism.py

  # =============================================== primary ====================================================

  stage-a-test-1-gpu-small-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-a-test-1-gpu-small-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 10
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-a-test-1-gpu-small-amd ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  jit-kernel-unit-test-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',jit-kernel-unit-test-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          needs.check-changes.outputs.jit_kernel == 'true'
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run JIT kernel unit tests
        timeout-minutes: 10
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout" python3 -m pytest -q python/sglang/jit_kernel/tests/test_store_cache.py

  stage-b-test-1-gpu-small-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
        part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 14 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-1-gpu-small-amd-nondeterministic:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-nondeterministic,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-nondeterministic --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-1-gpu-small-amd-mi35x:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-mi35x,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi35x-gpu-1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-mi35x ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-1-gpu-large-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-large-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-1gpu-sglang]
        part: [0, 1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-b-test-2-gpu-large-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-2-gpu-large-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-2gpu-sglang]
        part: [0, 1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 30
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-2-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  multimodal-gen-test-1-gpu-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-1-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      max-parallel: 1  # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
      matrix:
        runner: [linux-mi325-1gpu-sglang]
        part: [0, 1, 2, 3]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion
          docker exec ci_sglang pip install amdsmi

      - name: Setup kernel caches
        run: |
          # Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
          # This directory persists across container restarts on the self-hosted runner
          docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub

          # Clear pre-built AITER kernels from Docker image to avoid segfaults
          # The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
          echo "Clearing pre-built AITER kernels from Docker image..."
          docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
          docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
          echo "AITER kernels cleared - will be rebuilt on first use"

          # Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
          # This tells the test cleanup code to NOT delete downloaded models
          if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
            docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
            echo "Created .persistent_cache marker - HF cache will persist"
          else
            echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
          fi

          # Check MIOpen cache (VAE convolution kernels)
          miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
          echo "Found ${miopen_files} MIOpen cache files"

      - name: Diagnose HF cache and system resources
        run: |
          echo "=== System Memory Status ==="
          free -h
          echo ""
          echo "=== Disk Space ==="
          df -h /home/runner/sgl-data 2>/dev/null || df -h
          echo ""
          echo "=== HF Cache Directory Structure ==="
          docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
          docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
          echo ""
          echo "=== Checking for cached diffusion models (1-GPU tests) ==="
          # Models used in 1-GPU tests: Wan2.1-T2V-1.3B, HunyuanVideo, Qwen-Image, FLUX.1, FLUX.2
          for model in "Wan-AI--Wan2.1-T2V-1.3B-Diffusers" "tencent--HunyuanVideo" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev" "black-forest-labs--FLUX.2-dev"; do
            cache_path="/sgl-data/hf-cache/hub/models--${model}"
            if docker exec ci_sglang test -d "$cache_path"; then
              size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
              echo "✓ CACHED: $model ($size)"
            else
              echo "✗ NOT CACHED: $model"
            fi
          done
          echo ""
          echo "=== GPU Memory Status ==="
          docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"

      - name: Run diffusion server tests (1-GPU)
        timeout-minutes: 60
        run: |
          # AMD CI: All 1-GPU tests except FLUX.2 (FLUX.1 covers same code path)
          # Tests: T2V, T2I, I2V, LoRA
          #
          # HF download env vars:
          # - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
          # - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
          docker exec \
            -e SGLANG_E2E_TOLERANCE=0.3 \
            -e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
            -e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
            -e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
            -e AITER_JIT_DIR=/sgl-data/aiter-kernels \
            -e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
            -e HF_HUB_ENABLE_HF_TRANSFER=1 \
            -e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
            -w /sglang-checkout/python \
            ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
              --suite 1-gpu \
              --partition-id ${{ matrix.part }} \
              --total-partitions 4 \
              -k "not flux_2"

          # Post-test diagnostics
          echo "=== Post-test System Memory Status ==="
          free -h

  multimodal-gen-test-2-gpu-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-2-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      max-parallel: 1  # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
      matrix:
        runner: [linux-mi325-2gpu-sglang]
        part: [0, 1]  # 2 partitions: 9 tests ÷ 2 = ~4-5 tests each
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Download artifacts
        if: needs.check-changes.outputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: |
          bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion
          docker exec ci_sglang pip install amdsmi

      - name: Setup kernel caches
        run: |
          # Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
          docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub

          # Clear pre-built AITER kernels from Docker image to avoid segfaults
          # The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
          echo "Clearing pre-built AITER kernels from Docker image..."
          docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
          docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
          echo "AITER kernels cleared - will be rebuilt on first use"

          # Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
          # This tells the test cleanup code to NOT delete downloaded models
          if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
            docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
            echo "Created .persistent_cache marker - HF cache will persist"
          else
            echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
          fi

          # Check MIOpen cache (VAE convolution kernels)
          miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
          echo "Found ${miopen_files} MIOpen cache files"

      - name: Diagnose HF cache and system resources
        run: |
          echo "=== System Memory Status ==="
          free -h
          echo ""
          echo "=== Disk Space ==="
          df -h /home/runner/sgl-data 2>/dev/null || df -h
          echo ""
          echo "=== HF Cache Directory Structure ==="
          docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
          docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
          echo ""
          echo "=== Checking for cached diffusion models (2-GPU tests) ==="
          # Models used in 2-GPU tests: Wan2.2-T2V-A14B, Wan2.1-T2V-14B, Qwen-Image, FLUX.1
          for model in "Wan-AI--Wan2.2-T2V-A14B-Diffusers" "Wan-AI--Wan2.1-T2V-14B-Diffusers" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev"; do
            cache_path="/sgl-data/hf-cache/hub/models--${model}"
            if docker exec ci_sglang test -d "$cache_path"; then
              size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
              echo "✓ CACHED: $model ($size)"
            else
              echo "✗ NOT CACHED: $model"
            fi
          done
          echo ""
          echo "=== GPU Memory Status ==="
          docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"

      - name: Run diffusion server tests (2-GPU)
        timeout-minutes: 80
        run: |
          # AMD CI: All 2-GPU tests including LoRA
          # Tests: T2V, T2I, I2V, LoRA
          #
          # HF download env vars:
          # - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
          # - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
          docker exec \
            -e SGLANG_E2E_TOLERANCE=0.3 \
            -e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
            -e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
            -e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
            -e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
            -e AITER_JIT_DIR=/sgl-data/aiter-kernels \
            -e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
            -e HF_HUB_ENABLE_HF_TRANSFER=1 \
            -e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
            -w /sglang-checkout/python \
            ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
              --suite 2-gpu \
              --partition-id ${{ matrix.part }} \
              --total-partitions 2

          # Post-test diagnostics
          echo "=== Post-test System Memory Status ==="
          free -h


  stage-c-test-4-gpu-amd:
    needs: [check-changes, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-4-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-4gpu-sglang]
        part: [0]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh \
            -e NCCL_CUMEM_ENABLE=0 \
            -e NCCL_NVLS_ENABLE=0 \
            -e RCCL_MSCCL_ENABLE=0 \
            -e SGLANG_USE_ROCM700A=1 \
            -w "/sglang-checkout/test" \
            python3 run_suite.py \
              --hw amd \
              --suite stage-c-test-4-gpu-amd \
              --auto-partition-id ${{ matrix.part }} \
              --auto-partition-size 1 \
              --timeout-per-file 1800 \
              --enable-retry \
              --max-attempts 2 \
              --retry-wait-seconds 120 \
              --retry-timeout-increase 0 \
              ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-c-test-large-8-gpu-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    env:
      RUNNER_LABELS: linux-mi325-8gpu-sglang
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi325-8gpu-sglang]
        part: [0, 1, 2]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Test RCCL multi-GPU communication
        timeout-minutes: 5
        run: |
          echo "Testing RCCL multi-GPU communication with debug info..."
          docker exec ci_sglang bash -c "cd /sglang-checkout && NCCL_DEBUG=INFO RCCL_DEBUG=INFO torchrun --nproc_per_node=8 scripts/ci/amd/test_rccl_multi_gpu.py"

      - name: Run test
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  stage-c-test-large-8-gpu-amd-mi35x:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd-mi35x,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi35x-gpu-8]
        part: [0, 1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Start CI container
        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
      - name: Run test
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd-mi35x --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  # =============================================== Disaggregation ====================================================
  stage-b-test-large-8-gpu-35x-disaggregation-amd:
    needs: [check-changes]
    if: |
      always() &&
      (
        (contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-large-8-gpu-disaggregation-amd,')) ||
        (
          !(inputs.target_stage || inputs.target_stage_select) &&
          (!failure() && !cancelled()) &&
          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
        )
      )
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi35x-gpu-8.fabric]

    runs-on: ${{matrix.runner}}

    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

      - name: Ensure VRAM is clear
        run: bash scripts/ci/amd/ensure_vram_clear.sh rocm

      - name: Check Host RDMA Environment
        id: rdma_detect
        run: |
          set +e
          echo "=== Checking Host RDMA Environment ==="

          echo ""
          echo "=== 1. Ionic driver library check ==="
          ls -l /usr/lib/x86_64-linux-gnu/libibverbs/libionic* 2>/dev/null || echo "libionic not found in standard path"

          echo ""
          echo "=== 2. Infiniband devices ==="
          ls -la /dev/infiniband/ 2>/dev/null || echo "/dev/infiniband not found"
          ls -la /sys/class/infiniband/ 2>/dev/null || echo "/sys/class/infiniband not found"

          echo ""
          echo "=== 3. ibv_devinfo ==="
          which ibv_devinfo 2>/dev/null && ibv_devinfo 2>&1 || echo "ibv_devinfo not available"

          echo ""
          echo "=== 4. Kernel modules ==="
          lsmod 2>/dev/null | grep -E "ib_|rdma|ionic" || echo "No RDMA kernel modules loaded"

          echo ""
          echo "=== 5. Detect RDMA Devices for test environment ==="
          if [ -d "/sys/class/infiniband" ]; then
            RDMA_DEVS=$(ls /sys/class/infiniband | paste -sd "," -)
            echo "Detected RDMA Devices: $RDMA_DEVS"
            echo "SGLANG_TEST_RDMA_DEVICE=$RDMA_DEVS" >> $GITHUB_ENV
          else
            echo "No RDMA devices found in /sys/class/infiniband"
            echo "SGLANG_TEST_RDMA_DEVICE=" >> $GITHUB_ENV
          fi

          echo ""
          echo "=== Host RDMA Check Complete ==="

      - name: Start Special Container
        run: bash scripts/ci/amd/amd_ci_start_container_disagg.sh --rocm-version rocm720
        env:
          GITHUB_WORKSPACE: ${{ github.workspace }}

      - name: Install dependencies
        run: bash scripts/ci/amd/amd_ci_install_dependency.sh

      - name: Verify RDMA in Container
        run: |
          docker exec -u root ci_sglang bash -c '
            echo "=== Container RDMA Verification ==="
            echo "Device nodes:"
            ls -la /dev/infiniband/
            echo ""
            echo "Provider libraries:"
            ls /usr/lib/x86_64-linux-gnu/libibverbs/ | grep -E "ionic|mlx" || echo "No Ionic/Mellanox providers"
            echo ""
            echo "HCA devices:"
            HCA_COUNT=$(ibv_devinfo -list 2>&1 | grep -oE "^[0-9]+ HCAs? found" | grep -oE "^[0-9]+" || echo "0")
            ibv_devinfo -list
            if [ "$HCA_COUNT" -gt 0 ]; then
              echo ""
              echo "=== SUCCESS: RDMA setup complete. Found $HCA_COUNT HCA(s) ==="
            else
              echo ""
              echo "=== WARNING: No HCAs detected. RDMA tests may fail ==="
            fi
          '

      - name: Run Aiter Op Test (RMSNorm)
        timeout-minutes: 10
        run: |
          echo "Running pre-check: test_rmsnorm2d.py"
          docker exec \
            -e MAX_JOBS=192 \
            ci_sglang \
            python /sgl-workspace/aiter/op_tests/test_rmsnorm2d.py

      - name: Run test_disaggregation
        timeout-minutes: 60
        run: |
          bash scripts/ci/amd/amd_ci_exec.sh \
            -e SGLANG_TEST_RDMA_DEVICE="${{ env.SGLANG_TEST_RDMA_DEVICE }}" \
            -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-8-gpu-35x-disaggregation-amd --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}

  pr-test-amd-finish:
    needs:
      [
        call-gate,
        check-changes,

        sgl-kernel-unit-test-amd,
        sgl-kernel-unit-test-2-gpu-amd,
        multimodal-gen-test-1-gpu-amd,
        multimodal-gen-test-2-gpu-amd,

        stage-a-test-1-gpu-small-amd,
        jit-kernel-unit-test-amd,
        stage-b-test-1-gpu-small-amd,
        stage-b-test-1-gpu-small-amd-nondeterministic,
        stage-b-test-1-gpu-small-amd-mi35x,
        stage-b-test-1-gpu-large-amd,
        stage-b-test-2-gpu-large-amd,
        stage-b-test-large-8-gpu-35x-disaggregation-amd,
        stage-c-test-4-gpu-amd,
        stage-c-test-large-8-gpu-amd,
        stage-c-test-large-8-gpu-amd-mi35x,
      ]
    if: always()
    runs-on: ubuntu-latest
    steps:
      - name: Check all dependent job statuses
        run: |
          # Convert the 'needs' context to a JSON string
          json_needs='${{ toJson(needs) }}'

          # Get a list of all job names from the JSON keys
          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')

          for job in $job_names; do
            # For each job, extract its result
            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')

            # Print the job name and its result
            echo "$job: $result"

            # Check for failure or cancellation and exit if found
            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
              echo "The above jobs failed."
              exit 1
            fi
          done

          # If the loop completes, all jobs were successful
          echo "All jobs completed successfully"
          exit 0

pr-test-jit-kernel .github/workflows/pr-test-jit-kernel.yml

Triggers

workflow_call

Runs on

1-gpu-h100, 8-gpu-h200, 1-gpu-h100

Jobs

jit-kernel-unit-test, jit-kernel-multigpu-unit-test, jit-kernel-benchmark-test

Commands

bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd test/ python3 run_suite.py --hw cuda --suite stage-b-kernel-unit-1-gpu-large
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd test/ python3 run_suite.py --hw cuda --suite stage-b-kernel-unit-8-gpu-h200
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd test/ python3 run_suite.py --hw cuda --suite stage-b-kernel-benchmark-1-gpu-large

View raw YAML

name: PR Test - JIT Kernel

on:
  workflow_call:
    inputs:
      jit_kernel:
        required: true
        type: string
      pr_head_sha:
        required: false
        type: string
        default: ''
      git_ref:
        required: false
        type: string
        default: ''
      target_stage:
        required: false
        type: string
        default: ''
      test_parallel_dispatch:
        required: false
        type: string
        default: 'false'
      skip_stage_health_check:
        required: false
        type: boolean
        default: false

# Workflow-level env is NOT inherited from the caller in reusable workflows (verified by CI test).
# The github context (including github.event_name) IS inherited from the caller.
env:
  SGLANG_IS_IN_CI: true
  SGLANG_CUDA_COREDUMP: "1"
  SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
  SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
  SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}

jobs:
  jit-kernel-unit-test:
    if: |
      github.event_name != 'schedule' &&
      inputs.test_parallel_dispatch != 'true' &&
      !inputs.target_stage
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run test
        timeout-minutes: 30
        run: |
          cd test/
          python3 run_suite.py --hw cuda --suite stage-b-kernel-unit-1-gpu-large

  jit-kernel-multigpu-unit-test:
    if: |
      github.event_name != 'schedule' &&
      inputs.test_parallel_dispatch != 'true' &&
      !inputs.target_stage
    runs-on: 8-gpu-h200
    timeout-minutes: 240
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run multi-GPU test
        timeout-minutes: 45
        run: |
          cd test/
          python3 run_suite.py --hw cuda --suite stage-b-kernel-unit-8-gpu-h200

  jit-kernel-benchmark-test:
    if: |
      github.event_name != 'schedule' &&
      inputs.test_parallel_dispatch != 'true' &&
      !inputs.target_stage
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run benchmark tests
        timeout-minutes: 45
        run: |
          cd test/
          python3 run_suite.py --hw cuda --suite stage-b-kernel-benchmark-1-gpu-large

pr-test-multimodal-gen matrix .github/workflows/pr-test-multimodal-gen.yml

Triggers

workflow_call

Runs on

1-gpu-h100, 2-gpu-h100, 1-gpu-h100

Jobs

multimodal-gen-test-1-gpu, multimodal-gen-test-2-gpu, multimodal-gen-unit-test

Matrix

part→ 0, 1

Commands

CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd python python3 sglang/multimodal_gen/test/run_suite.py \ --suite 1-gpu \ --partition-id ${{ matrix.part }} \ --total-partitions 2 \ $CONTINUE_ON_ERROR_FLAG
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd python python3 sglang/multimodal_gen/test/run_suite.py \ --suite 2-gpu \ --partition-id ${{ matrix.part }} \ --total-partitions 2 \ $CONTINUE_ON_ERROR_FLAG
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd python python3 sglang/multimodal_gen/test/run_suite.py --suite unit

View raw YAML

name: PR Test - Multimodal Gen

on:
  workflow_call:
    inputs:
      multimodal_gen:
        required: true
        type: string
      sgl_kernel:
        required: true
        type: string
      continue_on_error:
        required: false
        type: string
        default: 'false'
      pr_head_sha:
        required: false
        type: string
        default: ''
      git_ref:
        required: false
        type: string
        default: ''
      target_stage:
        required: false
        type: string
        default: ''
      test_parallel_dispatch:
        required: false
        type: string
        default: 'false'
      caller_needs_failure:
        required: false
        type: string
        default: 'false'
      skip_stage_health_check:
        required: false
        type: string
        default: 'false'

# Workflow-level env is NOT inherited from the caller in reusable workflows.
# The github context (including github.event_name) IS inherited from the caller.
env:
  SGLANG_IS_IN_CI: true
  SGLANG_CUDA_COREDUMP: "1"
  SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
  SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == 'true' }}

jobs:
  multimodal-gen-test-1-gpu:
    if: |
      (inputs.target_stage == 'multimodal-gen-test-1-gpu') ||
      (
        !inputs.target_stage &&
        ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (inputs.caller_needs_failure != 'true' && !cancelled())) &&
        inputs.multimodal_gen == 'true'
      )
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: inputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
      - name: Run diffusion server tests
        timeout-minutes: 240
        env:
          RUNAI_STREAMER_MEMORY_LIMIT: 0
          CONTINUE_ON_ERROR_FLAG: ${{ inputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py \
            --suite 1-gpu \
            --partition-id ${{ matrix.part }} \
            --total-partitions 2 \
            $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  multimodal-gen-test-2-gpu:
    if: |
      (inputs.target_stage == 'multimodal-gen-test-2-gpu') ||
      (
        !inputs.target_stage &&
        ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (inputs.caller_needs_failure != 'true' && !cancelled())) &&
        inputs.multimodal_gen == 'true'
      )
    runs-on: 2-gpu-h100
    timeout-minutes: 240
    strategy:
      fail-fast: false
      matrix:
        part: [0, 1]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: inputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run diffusion server tests
        timeout-minutes: 240
        env:
          RUNAI_STREAMER_MEMORY_LIMIT: 0
          CONTINUE_ON_ERROR_FLAG: ${{ inputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
        run: |
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py \
            --suite 2-gpu \
            --partition-id ${{ matrix.part }} \
            --total-partitions 2 \
            $CONTINUE_ON_ERROR_FLAG

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()
        with:
          artifact-suffix: ${{ matrix.part }}

  multimodal-gen-unit-test:
    if: |
      (inputs.target_stage == 'multimodal-gen-unit-test') ||
      (
        !inputs.target_stage &&
        ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (inputs.caller_needs_failure != 'true' && !cancelled())) &&
        inputs.multimodal_gen == 'true'
      )
    runs-on: 1-gpu-h100
    timeout-minutes: 120
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Download artifacts
        if: inputs.sgl_kernel == 'true'
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run diffusion unit tests
        timeout-minutes: 60
        run: |
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py --suite unit

pr-test-npu matrix .github/workflows/pr-test-npu.yml

Triggers

push, pull_request, workflow_dispatch, workflow_call

Runs on

ubuntu-latest, linux-aarch64-a2-1, linux-aarch64-a2-2, linux-aarch64-a3-4, linux-aarch64-a3-16, linux-aarch64-a3-2, linux-aarch64-a3-16, linux-aarch64-a3-16

Jobs

check-changes, pr-gate, per-commit-1-npu-a2, per-commit-2-npu-a2, per-commit-4-npu-a3, per-commit-16-npu-a3, multimodal-gen-test-1-npu-a3, multimodal-gen-test-2-npu-a3, multimodal-gen-test-8-npu-a3

Matrix

part→ 0, 1

Actions

dorny/paths-filter

Commands

# Run all tests for workflow_call (when ref input is provided) # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then echo "run_all_tests=true" >> $GITHUB_OUTPUT echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" else echo "run_all_tests=false" >> $GITHUB_OUTPUT echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" fi
git config --system --add safe.directory ${GITHUB_WORKSPACE}
# speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.trusted-host "${CACHING_URL}" bash scripts/ci/npu/npu_ci_install_dependency.sh 910b # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy download through proxy curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
cd test python3 run_suite.py --hw npu --suite per-commit-1-npu-a2 --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
cd test/srt python3 run_suite.py --suite per-commit-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
git config --system --add safe.directory ${GITHUB_WORKSPACE}
# speed up by using infra cache services CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local" sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list pip config set global.index-url http://${CACHING_URL}/pypi/simple pip config set global.trusted-host "${CACHING_URL}" bash scripts/ci/npu/npu_ci_install_dependency.sh 910b # copy required file from our daily cache cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp # copy download through proxy curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
cd test/srt python3 run_suite.py --suite per-commit-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

View raw YAML

name: PR Test (NPU)

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  workflow_dispatch:
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      run_all_tests:
        description: "Run all tests (for releasing or testing purpose)"
        required: false
        type: boolean
        default: false

concurrency:
  group: pr-test-npu-${{ inputs.ref || github.ref }}
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

jobs:
  # ==================== Check Changes ==================== #
  check-changes:
    runs-on: ubuntu-latest
    outputs:
      changes_exist: ${{ steps.filter.outputs.main_package == 'true' || steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true'}}
      main_package: ${{ steps.filter.outputs.main_package == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
      multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Determine run mode
        id: run-mode
        run: |
          # Run all tests for workflow_call (when ref input is provided)
          # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
          if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
            echo "run_all_tests=true" >> $GITHUB_OUTPUT
            echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
          else
            echo "run_all_tests=false" >> $GITHUB_OUTPUT
            echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
          fi

      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        if: steps.run-mode.outputs.run_all_tests != 'true'
        with:
          filters: |
            main_package:
              - "python/sglang/!(multimodal_gen)/**/!(*.md)"
              - "python/pyproject_npu.toml"
              - "scripts/ci/npu/npu_ci_install_dependency.sh"
              - "test/srt/ascend/**"
              - ".github/workflows/pr-test-npu.yml"
            multimodal_gen:
              - "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
              - "python/sglang/srt/**"
              - "python/pyproject_npu.toml"
              - "scripts/ci/npu/npu_ci_install_dependency.sh"
              - ".github/workflows/pr-test-npu.yml"

  # ==================== PR Gate ==================== #
  pr-gate:
    needs: check-changes
    if: needs.check-changes.outputs.changes_exist == 'true'
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit

  per-commit-1-npu-a2:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a2-1
    strategy:
      fail-fast: false
      matrix:
        part: [ 0, 1 ]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Run registered test
        timeout-minutes: 240
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test
          python3 run_suite.py --hw npu --suite per-commit-1-npu-a2 --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test/srt
          python3 run_suite.py --suite per-commit-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

  per-commit-2-npu-a2:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a2-2
    strategy:
      fail-fast: true
      matrix:
        part: [0, 1]
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test/srt
          python3 run_suite.py --suite per-commit-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

  per-commit-4-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a3-4
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test/srt
          python3 run_suite.py --suite per-commit-4-npu-a3 --timeout-per-file 3600

  per-commit-16-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: linux-aarch64-a3-16
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd test/srt
          python3 run_suite.py --suite per-commit-16-npu-a3 --timeout-per-file 3600

  multimodal-gen-test-1-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.multimodal_gen == 'true'
    runs-on: linux-aarch64-a3-2
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py --suite 1-npu

  multimodal-gen-test-2-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.multimodal_gen == 'true'
    runs-on: linux-aarch64-a3-16
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py --suite 2-npu

  multimodal-gen-test-8-npu-a3:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.multimodal_gen == 'true'
    runs-on: linux-aarch64-a3-16
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Mark repository safe
        run: |
          git config --system --add safe.directory ${GITHUB_WORKSPACE}

      - name: Install dependencies
        env:
          TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
          PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
          GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
        run: |
          # speed up by using infra cache services
          CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
          sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
          pip config set global.index-url http://${CACHING_URL}/pypi/simple
          pip config set global.trusted-host "${CACHING_URL}"

          bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
          # copy required file from our daily cache
          cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
          # copy download through proxy
          curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

      - name: Run test
        timeout-minutes: 60
        env:
          SGLANG_USE_MODELSCOPE: true
          SGLANG_IS_IN_CI: true
          HF_ENDPOINT: https://hf-mirror.com
          TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
          PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
          STREAMS_PER_DEVICE: 32
        run: |
          cd python
          python3 sglang/multimodal_gen/test/run_suite.py --suite 8-npu

pr-test-rust matrix .github/workflows/pr-test-rust.yml

Triggers

push, pull_request, workflow_dispatch

Runs on

4-gpu-a10, ubuntu-latest, ubuntu-latest, 4-gpu-a10, ubuntu-24.04, ubuntu-latest, ubuntu-latest

Jobs

build-wheel, python-unit-tests, unit-tests, gateway-e2e, docker-build-test, finish, summarize-benchmarks

Matrix

include, include.env_vars, include.extra_deps, include.name, include.parallel_opts, include.reruns, include.setup_brave, include.setup_oracle, include.test_dirs, include.timeout, include.upload_benchmarks→ , --reruns 2 --reruns-delay 5, --workers 1 --tests-per-worker 4, 32, 45, SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1, True, benchmarks, chat-completions, e2e, e2e_test/benchmarks, e2e_test/chat_completions, e2e_test/responses, e2e_test/router e2e_test/embeddings, genai-bench==0.0.3, pytest-parallel py, responses

Actions

mozilla-actions/sccache-action, Swatinem/rust-cache, mozilla-actions/sccache-action, Swatinem/rust-cache, docker/setup-buildx-action, docker/build-push-action

Commands

bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
source "$HOME/.cargo/env" export RUSTC_WRAPPER=sccache cd sgl-model-gateway/bindings/python python3 -m pip install --upgrade pip maturin maturin build --profile ci --features vendored-openssl --out dist
ls -lh sgl-model-gateway/bindings/python/dist/
pip install sgl-model-gateway/bindings/python/dist/*.whl python3 -c "import sglang_router; print('Python package: OK')" python3 -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')" python3 -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
mv sglang-repo/sgl-model-gateway/* . rm -rf sglang-repo
pip install dist/*.whl
cd bindings/python python3 -m pip install pytest pytest-cov pytest-xdist pytest -q tests --cov=sglang_router --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh

View raw YAML

name: PR Test (SMG)

on:
  push:
    branches: [ main ]
    paths:
      - "sgl-model-gateway/**"
  pull_request:
    branches: [ main ]
    types: [opened, synchronize, reopened, labeled]
    paths:
      - "sgl-model-gateway/**"
  workflow_dispatch:

concurrency:
  group: gateway-tests-${{ github.ref }}
  cancel-in-progress: true

env:
  RUSTC_WRAPPER: sccache
  SCCACHE_GHA_ENABLED: "true"
  SGLANG_IS_IN_CI: true

jobs:
  build-wheel:
    if: |
      github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
      (github.event.action == 'labeled' && github.event.label.name == 'run-ci')
    runs-on: 4-gpu-a10
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install rust dependencies
        run: |
          bash scripts/ci/cuda/ci_install_gateway_dependencies.sh

      - name: Configure sccache
        uses: mozilla-actions/sccache-action@v0.0.9
        with:
          version: "v0.12.0"
          disable_annotations: true

      - name: Rust cache
        uses: Swatinem/rust-cache@v2
        with:
          workspaces: sgl-model-gateway
          shared-key: "rust-cache"
          cache-all-crates: true
          cache-on-failure: true
          save-if: true

      - name: Build python binding
        run: |
          source "$HOME/.cargo/env"
          export RUSTC_WRAPPER=sccache
          cd sgl-model-gateway/bindings/python
          python3 -m pip install --upgrade pip maturin
          maturin build --profile ci --features vendored-openssl --out dist

      - name: List built wheel
        run: ls -lh sgl-model-gateway/bindings/python/dist/

      - name: Upload wheel artifact
        uses: actions/upload-artifact@v4
        with:
          name: smg-wheel
          path: sgl-model-gateway/bindings/python/dist/*.whl
          retention-days: 1

      - name: Test wheel install
        run: |
          pip install sgl-model-gateway/bindings/python/dist/*.whl
          python3 -c "import sglang_router; print('Python package: OK')"
          python3 -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
          python3 -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"

  python-unit-tests:
    needs: build-wheel
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          path: sglang-repo

      - name: Move sgl-model-gateway folder to root
        run: |
          mv sglang-repo/sgl-model-gateway/* .
          rm -rf sglang-repo

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.13"

      - name: Download wheel artifact
        uses: actions/download-artifact@v4
        with:
          name: smg-wheel
          path: dist/

      - name: Install wheel
        run: pip install dist/*.whl

      - name: Run Python unit tests
        run: |
          cd bindings/python
          python3 -m pip install pytest pytest-cov pytest-xdist
          pytest -q tests --cov=sglang_router --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80

  unit-tests:
    if: |
      github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
      (github.event.action == 'labeled' && github.event.label.name == 'run-ci')
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_gateway_dependencies.sh

      - name: Configure sccache
        uses: mozilla-actions/sccache-action@v0.0.9
        with:
          version: "v0.12.0"
          disable_annotations: true

      - name: Rust cache
        uses: Swatinem/rust-cache@v2
        with:
          workspaces: sgl-model-gateway
          shared-key: "rust-cache"
          cache-all-crates: true
          cache-on-failure: true
          save-if: true

      - name: Run lint
        run: |
          source "$HOME/.cargo/env"
          cd sgl-model-gateway/
          rustup component add clippy
          cargo clippy --all-targets --all-features -- -D warnings

      - name: Run fmt
        run: |
          source "$HOME/.cargo/env"
          cd sgl-model-gateway/
          rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
          rustup toolchain install nightly --profile minimal
          cargo +nightly fmt -- --check

      - name: Generate vision golden fixtures
        run: |
          pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

          pip install transformers pillow numpy scipy
          pip install transformers pillow numpy
          cd sgl-model-gateway/
          python scripts/generate_vision_golden.py

      - name: Run Rust tests
        timeout-minutes: 20
        run: |
          source "$HOME/.cargo/env"
          cd sgl-model-gateway/
          cargo test

      - name: Show sccache stats
        if: always()
        run: sccache --show-stats

  gateway-e2e:
    name: ${{ matrix.name }}
    needs: build-wheel
    if: |
      github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
      (github.event.action == 'labeled' && github.event.label.name == 'run-ci')
    strategy:
      fail-fast: false
      matrix:
        include:
          - name: benchmarks
            timeout: 32
            test_dirs: "e2e_test/benchmarks"
            extra_deps: "genai-bench==0.0.3"
            env_vars: ""
            reruns: ""
            upload_benchmarks: true
            parallel_opts: ""  # No parallel for benchmarks (performance measurement)
          - name: responses
            timeout: 45
            test_dirs: "e2e_test/responses"
            extra_deps: ""
            env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
            reruns: "--reruns 2 --reruns-delay 5"
            setup_oracle: true
            setup_brave: true
            parallel_opts: ""  # Cloud backend tests not compatible with parallel execution
          - name: e2e
            timeout: 45
            test_dirs: "e2e_test/router e2e_test/embeddings"
            extra_deps: "pytest-parallel py"  # py is required for pytest-parallel with newer pytest
            env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
            reruns: "--reruns 2 --reruns-delay 5"
            parallel_opts: "--workers 1 --tests-per-worker 4"  # Thread-based parallelism
          - name: chat-completions
            timeout: 45
            test_dirs: "e2e_test/chat_completions"
            extra_deps: ""
            env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
            reruns: "--reruns 2 --reruns-delay 5"
            parallel_opts: ""
    runs-on: 4-gpu-a10
    timeout-minutes: ${{ matrix.timeout }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install SGLang dependencies
        run: |
          sudo --preserve-env=PATH bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Setup Oracle Instant Client
        if: matrix.setup_oracle
        run: |
          sudo apt-get install -y unzip
          INSTANT_CLIENT_DIR="/home/ubuntu/instant-client"
          INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip"

          if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then
            echo "Downloading Oracle Instant Client..."
            mkdir -p "$INSTANT_CLIENT_DIR"
            cd "$INSTANT_CLIENT_DIR"
            wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP
            unzip $INSTANT_CLIENT_ZIP
            rm $INSTANT_CLIENT_ZIP
          else
            echo "Oracle Instant Client already exists, skipping download"
          fi

          echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV

      - name: Start Oracle Database
        if: matrix.setup_oracle
        run: |
          docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim
          echo "Starting Oracle DB..."

          # Export Oracle connection environment variables
          echo "ATP_USER=system" >> $GITHUB_ENV
          echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV
          echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV

      - name: Start Brave MCP Server
        if: matrix.setup_brave
        run: |
          docker run -d --rm \
            -p 8001:8080 \
            -e BRAVE_API_KEY \
            --name brave-search-server \
            shoofio/brave-search-mcp-sse:1.0.10
          echo "Starting Brave MCP Server..."
          sleep 2
          curl -f --max-time 1 http://localhost:8001/sse > /dev/null 2>&1 && echo "Brave MCP Server is healthy!" || echo "Brave MCP Server responded"

      - name: Download wheel artifact
        uses: actions/download-artifact@v4
        with:
          name: smg-wheel
          path: wheel/

      - name: Install wheel
        run: |
          pip uninstall -y sglang-router || true
          pip install wheel/*.whl

      - name: Install e2e test dependencies
        run: |
          python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy
          if [ -n "${{ matrix.extra_deps }}" ]; then
            python3 -m pip --no-cache-dir install --upgrade ${{ matrix.extra_deps }}
          fi

      - name: Run E2E tests
        run: |
          python3 python/sglang/cli/killall.py
          cd sgl-model-gateway
          ${{ matrix.env_vars }} ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest ${{ matrix.reruns }} ${{ matrix.parallel_opts }} ${{ matrix.test_dirs }} -s -vv -o log_cli=true --log-cli-level=INFO

      - name: Upload benchmark results
        if: matrix.upload_benchmarks && success()
        uses: actions/upload-artifact@v4
        with:
          name: genai-bench-results-all-policies
          path: sgl-model-gateway/benchmark_**/

      - name: Cleanup Brave MCP Server
        if: always() && matrix.setup_brave
        run: |
          docker stop brave-search-server || true
          docker rm brave-search-server || true

      - name: Cleanup Oracle Database
        if: always() && matrix.setup_oracle
        run: |
          docker stop oracle-db || true
          docker rm oracle-db || true

  docker-build-test:
    if: |
      github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
      (github.event.action == 'labeled' && github.event.label.name == 'run-ci')
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Build Docker image (no push)
        uses: docker/build-push-action@v5
        with:
          context: .
          file: docker/gateway.Dockerfile
          push: false
          tags: sgl-model-gateway:test
          cache-from: type=gha
          cache-to: type=gha,mode=max

  finish:
    needs: [build-wheel, python-unit-tests, unit-tests, gateway-e2e, docker-build-test]
    runs-on: ubuntu-latest
    steps:
      - name: Finish
        run: echo "This is an empty step to ensure that all jobs are completed."

  summarize-benchmarks:
    needs: gateway-e2e
    runs-on: ubuntu-latest
    if: success()

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Download benchmark results
      uses: actions/download-artifact@v4
      with:
        name: genai-bench-results-all-policies

    - name: Create benchmark summary
      run: python3 sgl-model-gateway/e2e_test/benchmarks/summarize.py .

pr-test-sgl-kernel .github/workflows/pr-test-sgl-kernel.yml

Triggers

workflow_call

Runs on

1-gpu-h100, 1-gpu-h100, 1-gpu-h100, ${{ inputs.b200_runner }}

Jobs

sgl-kernel-unit-test, sgl-kernel-mla-test, sgl-kernel-benchmark-test, sgl-kernel-b200-test

Commands

ls -alh sgl-kernel/dist || true rm -rf sgl-kernel/dist/* || true
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
cd sgl-kernel pytest tests/
ls -alh sgl-kernel/dist || true rm -rf sgl-kernel/dist/* || true
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
cd test/registered/mla python3 test_mla_deepseek_v3.py
ls -alh sgl-kernel/dist || true rm -rf sgl-kernel/dist/* || true
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

View raw YAML

name: PR Test - SGL Kernel

on:
  workflow_call:
    inputs:
      sgl_kernel:
        required: true
        type: string
      b200_runner:
        required: true
        type: string
      pr_head_sha:
        required: false
        type: string
        default: ''
      git_ref:
        required: false
        type: string
        default: ''
      skip_stage_health_check:
        required: false
        type: boolean
        default: false

# Workflow-level env is NOT inherited from the caller in reusable workflows.
# The github context (including github.event_name) IS inherited from the caller.
env:
  SGLANG_IS_IN_CI: true
  SGLANG_CUDA_COREDUMP: "1"
  SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
  SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}

jobs:
  sgl-kernel-unit-test:
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Cleanup
        run: |
          ls -alh sgl-kernel/dist || true
          rm -rf sgl-kernel/dist/* || true

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run test
        timeout-minutes: 30
        run: |
          cd sgl-kernel
          pytest tests/

  sgl-kernel-mla-test:
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Cleanup
        run: |
          ls -alh sgl-kernel/dist || true
          rm -rf sgl-kernel/dist/* || true

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run test
        timeout-minutes: 30
        run: |
          cd test/registered/mla
          python3 test_mla_deepseek_v3.py

  sgl-kernel-benchmark-test:
    runs-on: 1-gpu-h100
    timeout-minutes: 240
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Cleanup
        run: |
          ls -alh sgl-kernel/dist || true
          rm -rf sgl-kernel/dist/* || true

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run benchmark tests
        timeout-minutes: 45
        run: |
          cd sgl-kernel/benchmark
          echo "Running sgl-kernel benchmark tests in CI mode..."

          echo "CI environment variable: $CI"
          echo "GITHUB_ACTIONS environment variable: $GITHUB_ACTIONS"

          for bench_file in bench_*.py; do
            echo "Testing $bench_file..."
            timeout 60 python3 "$bench_file" || echo "Warning: $bench_file timed out or failed, continuing..."
            echo "Completed $bench_file"
            echo "---"
          done

          echo "All benchmark tests completed!"

  sgl-kernel-b200-test:
    runs-on: ${{ inputs.b200_runner }}
    timeout-minutes: 240
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

      - uses: ./.github/actions/check-stage-health

      - uses: ./.github/actions/check-maintenance

      - name: Cleanup
        run: |
          ls -alh sgl-kernel/dist || true
          rm -rf sgl-kernel/dist/* || true

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-python3.10-cuda12.9

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion

      - name: Run sgl-kernel unit tests on B200
        timeout-minutes: 30
        run: |
          cd sgl-kernel
          pytest tests/

  # Adding a single CUDA13 smoke test to verify that the kernel builds and runs
  # TODO: Add back this test when it can pass on CI
  # cuda13-kernel-smoke-test:
  #   if: inputs.sgl_kernel == 'true'
  #   runs-on: x64-cu13-kernel-tests
  #   steps:
  #     - uses: actions/checkout@v4

  #     - name: Cleanup
  #       run: |
  #         ls -alh sgl-kernel/dist || true
  #         rm -rf sgl-kernel/dist/* || true

  #     - name: Download CUDA 13.0 artifacts
  #       uses: actions/download-artifact@v4
  #       with:
  #         path: sgl-kernel/dist/
  #         merge-multiple: true
  #         pattern: wheel-python3.10-cuda13.0

  #     - name: Install dependencies
  #       run: |
  #         CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

  #     - name: Run kernel unit tests
  #       timeout-minutes: 30
  #       run: |
  #         cd sgl-kernel
  #         pytest tests/

pr-test-xeon matrix .github/workflows/pr-test-xeon.yml

Triggers

push, pull_request, workflow_dispatch, workflow_call

Runs on

ubuntu-latest, xeon-gnr

Jobs

check-changes, pr-gate, build-test

Matrix

build_type→ all

Actions

dorny/paths-filter

Commands

# Run all tests for workflow_call (when ref input is provided) # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then echo "run_all_tests=true" >> $GITHUB_OUTPUT echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" else echo "run_all_tests=false" >> $GITHUB_OUTPUT echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" fi
version=$(cat python/sglang/version.py | cut -d'"' -f2) tag=v${version}-xeon PR_REPO=${{ github.event.pull_request.head.repo.clone_url }} PR_HEAD_REF=${{ github.head_ref }} docker build \ ${PR_REPO:+--build-arg SGLANG_REPO=$PR_REPO} \ ${PR_HEAD_REF:+--build-arg VER_SGLANG=$PR_HEAD_REF} \ . -f docker/xeon.Dockerfile -t sglang_xeon --no-cache
docker run -dt \ -v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \ -v ${HF_HOME}:/root/.cache/huggingface \ --name ci_sglang_xeon \ sglang_xeon
docker exec -w /sglang-checkout/ ci_sglang_xeon \ bash -c "source /opt/.venv/bin/activate && python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '"
docker exec -w /sglang-checkout/ ci_sglang_xeon \ bash -c "source /opt/.venv/bin/activate && cd ./test/srt && python3 run_suite.py --suite per-commit-cpu --timeout-per-file 1500"
docker exec -u root ci_sglang_xeon bash -c " rm -rf /tmp/ci-home && chown -R $(id -u):$(id -g) /sglang-checkout/ 2>/dev/null || true "
docker rm -f ci_sglang_xeon || true

View raw YAML

name: PR Test (Xeon)

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  workflow_dispatch:
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      run_all_tests:
        description: "Run all tests (for releasing or testing purpose)"
        required: false
        type: boolean
        default: false

concurrency:
  group: pr-test-xeon-${{ inputs.ref || github.ref }}
  cancel-in-progress: false

jobs:
  # ==================== Check Changes ==================== #
  check-changes:
    runs-on: ubuntu-latest
    outputs:
      main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Determine run mode
        id: run-mode
        run: |
          # Run all tests for workflow_call (when ref input is provided)
          # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
          if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
            echo "run_all_tests=true" >> $GITHUB_OUTPUT
            echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
          else
            echo "run_all_tests=false" >> $GITHUB_OUTPUT
            echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
          fi

      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        if: steps.run-mode.outputs.run_all_tests != 'true'
        with:
          filters: |
            main_package:
              - "python/sglang/!(multimodal_gen)/**/!(*.md)"
              - "python/pyproject_cpu.toml"
              - "test/**/!(*.md)"
              - "sgl-kernel/**/*.!(md|txt)"
              - ".github/workflows/pr-test-xeon.yml"
              - "docker/xeon.Dockerfile"

  # ==================== PR Gate ==================== #
  pr-gate:
    needs: check-changes
    if: needs.check-changes.outputs.main_package == 'true'
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit

  build-test:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: xeon-gnr
    env:
      HF_HOME: /home/sdp/.cache/huggingface
    strategy:
      matrix:
        build_type: ['all']
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Build and Push
        run: |
          version=$(cat python/sglang/version.py | cut -d'"' -f2)
          tag=v${version}-xeon
          PR_REPO=${{ github.event.pull_request.head.repo.clone_url }}
          PR_HEAD_REF=${{ github.head_ref }}

          docker build \
            ${PR_REPO:+--build-arg SGLANG_REPO=$PR_REPO} \
            ${PR_HEAD_REF:+--build-arg VER_SGLANG=$PR_HEAD_REF} \
            . -f docker/xeon.Dockerfile  -t sglang_xeon --no-cache

      - name: Run container
        run: |
          docker run -dt \
            -v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \
            -v ${HF_HOME}:/root/.cache/huggingface \
            --name ci_sglang_xeon \
            sglang_xeon

      - name: Check AMX support
        id: check_amx
        timeout-minutes: 5
        run: |
          docker exec -w /sglang-checkout/ ci_sglang_xeon \
            bash -c "source /opt/.venv/bin/activate && python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '"

      - name: Run unit tests
        timeout-minutes: 36
        run: |
          docker exec -w /sglang-checkout/ ci_sglang_xeon \
            bash -c "source /opt/.venv/bin/activate && cd ./test/srt && python3 run_suite.py --suite per-commit-cpu --timeout-per-file 1500"

      - name: Change permission
        timeout-minutes: 2
        run: |
          docker exec -u root ci_sglang_xeon bash -c "
            rm -rf /tmp/ci-home  &&
            chown -R  $(id -u):$(id -g) /sglang-checkout/ 2>/dev/null || true
          "

      - name: Cleanup container
        if: always()
        run: |
          docker rm -f ci_sglang_xeon || true

pr-test-xpu .github/workflows/pr-test-xpu.yml

Triggers

push, pull_request, workflow_dispatch, workflow_call

Runs on

ubuntu-latest, intel-bmg, ubuntu-latest

Jobs

check-changes, pr-gate, build-and-test, finish

Actions

dorny/paths-filter, docker/setup-buildx-action

Commands

# Run all tests for workflow_call (when ref input is provided) # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then echo "run_all_tests=true" >> $GITHUB_OUTPUT echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" else echo "run_all_tests=false" >> $GITHUB_OUTPUT echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" fi
PR_REPO=${{ github.event.pull_request.head.repo.clone_url }} PR_HEAD_REF=${{ github.head_ref }} docker build \ ${PR_REPO:+--build-arg SG_LANG_REPO=$PR_REPO} \ ${PR_HEAD_REF:+--build-arg SG_LANG_BRANCH=$PR_HEAD_REF} \ --no-cache --progress=plain -f docker/xpu.Dockerfile -t xpu_sglang_main:bmg .
container_id=$(docker run -dt \ --group-add 992 \ --group-add $(getent group video | cut -d: -f3) \ -v ${HF_HOME}:/root/.cache/huggingface \ --device /dev/dri \ -e HF_TOKEN="$(cat ~/huggingface_token.txt)" \ xpu_sglang_main:bmg) echo "Started container: $container_id" echo "container_id=$container_id" >> "$GITHUB_OUTPUT"
cid="${{ steps.start_container.outputs.container_id }}" docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip uninstall -y flashinfer-python docker exec "$cid" /bin/bash -c '/home/sdp/miniforge3/envs/py3.10/bin/hf auth login --token ${HF_TOKEN} '
cid="${{ steps.start_container.outputs.container_id }}" docker exec "$cid" bash -c "source /home/sdp/miniforge3/bin/activate && conda activate py3.10 && cd /home/sdp/sglang/test/srt && python3 run_suite.py --suite per-commit-xpu"
cid="${{ steps.start_container.outputs.container_id }}" docker rm -f "$cid" || true
result="${{ needs.build-and-test.result }}" if [ "$result" != "success" ] && [ "$result" != "skipped" ]; then echo "Job failed with result: $result" exit 1 fi echo "All jobs completed successfully (result: $result)" exit 0

View raw YAML

name: PR Test (XPU)

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  workflow_dispatch:
  workflow_call:
    inputs:
      ref:
        description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
        required: false
        type: string
        default: ''
      run_all_tests:
        description: "Run all tests (for releasing or testing purpose)"
        required: false
        type: boolean
        default: false

concurrency:
  group: pr-test-xpu-${{ inputs.ref || github.ref }}
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

jobs:
  # ==================== Check Changes ==================== #
  check-changes:
    runs-on: ubuntu-latest
    outputs:
      main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Determine run mode
        id: run-mode
        run: |
          # Run all tests for workflow_call (when ref input is provided)
          # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
          if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
            echo "run_all_tests=true" >> $GITHUB_OUTPUT
            echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
          else
            echo "run_all_tests=false" >> $GITHUB_OUTPUT
            echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
          fi
      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        if: steps.run-mode.outputs.run_all_tests != 'true'
        with:
          filters: |
            main_package:
              - "python/sglang/!(multimodal_gen)/**/!(*.md)"
              - "python/pyproject_xpu.toml"
              - "test/**/!(*.md)"
              - "sgl-kernel/**/*.!(md|txt)"
              - ".github/workflows/pr-test-xpu.yml"
              - "docker/xpu.Dockerfile"

  # ==================== PR Gate ==================== #
  pr-gate:
    needs: check-changes
    if: needs.check-changes.outputs.main_package == 'true'
    uses: ./.github/workflows/pr-gate.yml
    secrets: inherit

  build-and-test:
    needs: [check-changes, pr-gate]
    if: needs.check-changes.outputs.main_package == 'true'
    runs-on: intel-bmg
    env:
      HF_HOME: /home/sdp/.cache/huggingface
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          ref: ${{ inputs.ref || github.ref }}

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Build Docker image
        run: |
          PR_REPO=${{ github.event.pull_request.head.repo.clone_url }}
          PR_HEAD_REF=${{ github.head_ref }}
          docker build \
            ${PR_REPO:+--build-arg SG_LANG_REPO=$PR_REPO} \
            ${PR_HEAD_REF:+--build-arg SG_LANG_BRANCH=$PR_HEAD_REF} \
            --no-cache --progress=plain -f docker/xpu.Dockerfile -t xpu_sglang_main:bmg .

      - name: Run container
        id: start_container
        run: |
          container_id=$(docker run -dt \
            --group-add 992 \
            --group-add $(getent group video | cut -d: -f3) \
            -v ${HF_HOME}:/root/.cache/huggingface \
            --device /dev/dri \
            -e HF_TOKEN="$(cat ~/huggingface_token.txt)" \
            xpu_sglang_main:bmg)
          echo "Started container: $container_id"
          echo "container_id=$container_id" >> "$GITHUB_OUTPUT"

      - name: Install Dependency
        timeout-minutes: 20
        run: |
          cid="${{ steps.start_container.outputs.container_id }}"
          docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
          docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
          docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip uninstall -y flashinfer-python
          docker exec "$cid" /bin/bash -c '/home/sdp/miniforge3/envs/py3.10/bin/hf auth login --token ${HF_TOKEN} '


      - name: Run E2E Bfloat16 tests
        timeout-minutes: 20
        run: |
          cid="${{ steps.start_container.outputs.container_id }}"
          docker exec "$cid" bash -c "source /home/sdp/miniforge3/bin/activate && conda activate py3.10 && cd /home/sdp/sglang/test/srt && python3 run_suite.py --suite per-commit-xpu"
      - name: Cleanup container
        if: always()
        run: |
          cid="${{ steps.start_container.outputs.container_id }}"
          docker rm -f "$cid" || true

  finish:
    if: always()
    needs: [build-and-test, pr-gate]
    runs-on: ubuntu-latest
    steps:
      - name: Check job status
        run: |
          result="${{ needs.build-and-test.result }}"
          if [ "$result" != "success" ] && [ "$result" != "skipped" ]; then
            echo "Job failed with result: $result"
            exit 1
          fi
          echo "All jobs completed successfully (result: $result)"
          exit 0

release-branch-cut perms .github/workflows/release-branch-cut.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

cut-release-branch, run-pr-tests-nvidia, run-pr-tests-amd, run-pr-test-npu, run-pr-tests-xeon, run-pr-tests-xpu, run-nightly-tests-nvidia, run-nightly-tests-amd, run-nightly-tests-npu, run-nightly-tests-intel

Commands

BRANCH_NAME="${{ github.event.inputs.branch_name }}" if [ -z "$BRANCH_NAME" ]; then echo "::error::Branch name is required" exit 1 fi # Validate branch name format (should start with release/) if [[ ! "$BRANCH_NAME" =~ ^release/ ]]; then echo "::warning::Branch name '$BRANCH_NAME' does not follow convention 'release/vX.Y.Z'" fi echo "Branch name: $BRANCH_NAME"
COMMIT_SHA="${{ github.event.inputs.commit_sha }}" # If no commit SHA provided, use latest main if [ -z "$COMMIT_SHA" ]; then COMMIT_SHA=$(git rev-parse HEAD) echo "No commit SHA provided, using latest main: $COMMIT_SHA" fi # Verify the commit exists and is on main if ! git cat-file -t "$COMMIT_SHA" > /dev/null 2>&1; then echo "::error::Commit SHA '$COMMIT_SHA' does not exist" exit 1 fi # Check if commit is an ancestor of main (i.e., is on main branch) if ! git merge-base --is-ancestor "$COMMIT_SHA" main; then echo "::error::Commit SHA '$COMMIT_SHA' is not on the main branch" exit 1 fi echo "COMMIT_SHA=$COMMIT_SHA" >> $GITHUB_OUTPUT echo "Validated commit SHA: $COMMIT_SHA"
BRANCH_NAME="${{ github.event.inputs.branch_name }}" if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then echo "::error::Branch '$BRANCH_NAME' already exists" exit 1 fi echo "Branch '$BRANCH_NAME' does not exist, proceeding with creation"
COMMIT_SHA="${{ steps.validate.outputs.COMMIT_SHA }}" BRANCH_NAME="${{ github.event.inputs.branch_name }}" git config user.name "sglang-bot" git config user.email "sglang-bot@users.noreply.github.com" # Create branch from the specified commit git checkout -b "$BRANCH_NAME" "$COMMIT_SHA" echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT echo "Successfully created branch '$BRANCH_NAME' from commit '$COMMIT_SHA'"
BRANCH_NAME="${{ github.event.inputs.branch_name }}" # Extract version from branch name (e.g., release/v0.5.8 -> v0.5.8) VERSION=$(echo "$BRANCH_NAME" | sed 's/release\///') # Update git clone version references in docs sed -i "s/git clone -b v[0-9]\+\.[0-9]\+\.[0-9]\+\.\?post\?[0-9]*/git clone -b $VERSION/" docs/get_started/install.md sed -i "s/git clone -b v[0-9]\+\.[0-9]\+\.[0-9]\+\.\?post\?[0-9]*/git clone -b $VERSION/" docs/platforms/amd_gpu.md # Check if any changes were made if git diff --quiet; then echo "No version references needed updating" else git add docs/get_started/install.md docs/platforms/amd_gpu.md git commit -m "docs: update version references to $VERSION" echo "Updated version references to $VERSION" fi
BRANCH_NAME="${{ steps.set_output.outputs.branch_name }}" git push origin "$BRANCH_NAME" echo "Successfully pushed branch '$BRANCH_NAME'"
COMMIT_SHA="${{ steps.validate.outputs.COMMIT_SHA }}" BRANCH_NAME="${{ github.event.inputs.branch_name }}" echo "## Release Branch Cut Summary" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Property | Value |" >> $GITHUB_STEP_SUMMARY echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY echo "| Branch | \`$BRANCH_NAME\` |" >> $GITHUB_STEP_SUMMARY echo "| Commit | \`$COMMIT_SHA\` |" >> $GITHUB_STEP_SUMMARY echo "| Triggered by | @${{ github.actor }} |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Next Steps" >> $GITHUB_STEP_SUMMARY echo "1. Tests are automatically triggered on the release branch" >> $GITHUB_STEP_SUMMARY echo "2. Apply any hotfixes if needed" >> $GITHUB_STEP_SUMMARY echo "3. Create a tag to trigger release: \`gh workflow run release-tag.yml -f version=X.Y.Z -f ref=$BRANCH_NAME\`" >> $GITHUB_STEP_SUMMARY

View raw YAML

name: Release Branch Cut

on:
  workflow_dispatch:
    inputs:
      branch_name:
        description: 'Branch name to create (e.g., release/v0.5.7)'
        required: true
        type: string
      commit_sha:
        description: 'Commit SHA from main to cut the release branch from (defaults to latest main)'
        required: false
        type: string
        default: ''

permissions:
  actions: write
  contents: write
  issues: read
  pull-requests: read

jobs:
  cut-release-branch:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    environment: 'prod'
    outputs:
      branch_name: ${{ steps.set_output.outputs.branch_name }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: main
          fetch-depth: 0
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Validate branch name
        run: |
          BRANCH_NAME="${{ github.event.inputs.branch_name }}"

          if [ -z "$BRANCH_NAME" ]; then
            echo "::error::Branch name is required"
            exit 1
          fi

          # Validate branch name format (should start with release/)
          if [[ ! "$BRANCH_NAME" =~ ^release/ ]]; then
            echo "::warning::Branch name '$BRANCH_NAME' does not follow convention 'release/vX.Y.Z'"
          fi

          echo "Branch name: $BRANCH_NAME"

      - name: Validate commit SHA
        id: validate
        run: |
          COMMIT_SHA="${{ github.event.inputs.commit_sha }}"

          # If no commit SHA provided, use latest main
          if [ -z "$COMMIT_SHA" ]; then
            COMMIT_SHA=$(git rev-parse HEAD)
            echo "No commit SHA provided, using latest main: $COMMIT_SHA"
          fi

          # Verify the commit exists and is on main
          if ! git cat-file -t "$COMMIT_SHA" > /dev/null 2>&1; then
            echo "::error::Commit SHA '$COMMIT_SHA' does not exist"
            exit 1
          fi

          # Check if commit is an ancestor of main (i.e., is on main branch)
          if ! git merge-base --is-ancestor "$COMMIT_SHA" main; then
            echo "::error::Commit SHA '$COMMIT_SHA' is not on the main branch"
            exit 1
          fi

          echo "COMMIT_SHA=$COMMIT_SHA" >> $GITHUB_OUTPUT
          echo "Validated commit SHA: $COMMIT_SHA"

      - name: Check if branch already exists
        run: |
          BRANCH_NAME="${{ github.event.inputs.branch_name }}"

          if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then
            echo "::error::Branch '$BRANCH_NAME' already exists"
            exit 1
          fi

          echo "Branch '$BRANCH_NAME' does not exist, proceeding with creation"

      - name: Create release branch
        id: set_output
        run: |
          COMMIT_SHA="${{ steps.validate.outputs.COMMIT_SHA }}"
          BRANCH_NAME="${{ github.event.inputs.branch_name }}"

          git config user.name "sglang-bot"
          git config user.email "sglang-bot@users.noreply.github.com"

          # Create branch from the specified commit
          git checkout -b "$BRANCH_NAME" "$COMMIT_SHA"

          echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
          echo "Successfully created branch '$BRANCH_NAME' from commit '$COMMIT_SHA'"

      - name: Update version references in documentation
        run: |
          BRANCH_NAME="${{ github.event.inputs.branch_name }}"
          # Extract version from branch name (e.g., release/v0.5.8 -> v0.5.8)
          VERSION=$(echo "$BRANCH_NAME" | sed 's/release\///')

          # Update git clone version references in docs
          sed -i "s/git clone -b v[0-9]\+\.[0-9]\+\.[0-9]\+\.\?post\?[0-9]*/git clone -b $VERSION/" docs/get_started/install.md
          sed -i "s/git clone -b v[0-9]\+\.[0-9]\+\.[0-9]\+\.\?post\?[0-9]*/git clone -b $VERSION/" docs/platforms/amd_gpu.md

          # Check if any changes were made
          if git diff --quiet; then
            echo "No version references needed updating"
          else
            git add docs/get_started/install.md docs/platforms/amd_gpu.md
            git commit -m "docs: update version references to $VERSION"
            echo "Updated version references to $VERSION"
          fi

      - name: Push release branch
        run: |
          BRANCH_NAME="${{ steps.set_output.outputs.branch_name }}"
          git push origin "$BRANCH_NAME"
          echo "Successfully pushed branch '$BRANCH_NAME'"

      - name: Summary
        run: |
          COMMIT_SHA="${{ steps.validate.outputs.COMMIT_SHA }}"
          BRANCH_NAME="${{ github.event.inputs.branch_name }}"

          echo "## Release Branch Cut Summary" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "| Property | Value |" >> $GITHUB_STEP_SUMMARY
          echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY
          echo "| Branch | \`$BRANCH_NAME\` |" >> $GITHUB_STEP_SUMMARY
          echo "| Commit | \`$COMMIT_SHA\` |" >> $GITHUB_STEP_SUMMARY
          echo "| Triggered by | @${{ github.actor }} |" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Next Steps" >> $GITHUB_STEP_SUMMARY
          echo "1. Tests are automatically triggered on the release branch" >> $GITHUB_STEP_SUMMARY
          echo "2. Apply any hotfixes if needed" >> $GITHUB_STEP_SUMMARY
          echo "3. Create a tag to trigger release: \`gh workflow run release-tag.yml -f version=X.Y.Z -f ref=$BRANCH_NAME\`" >> $GITHUB_STEP_SUMMARY

  run-pr-tests-nvidia:
    needs: cut-release-branch
    uses: ./.github/workflows/pr-test.yml
    with:
      git_ref: ${{ needs.cut-release-branch.outputs.branch_name }}
      run_all_tests: true
      skip_stage_health_check: true
    secrets: inherit

  run-pr-tests-amd:
    needs: cut-release-branch
    uses: ./.github/workflows/pr-test-amd.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
      run_all_tests: true
    secrets: inherit

  run-pr-test-npu:
    needs: cut-release-branch
    uses: ./.github/workflows/pr-test-npu.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
      run_all_tests: true
    secrets: inherit

  run-pr-tests-xeon:
    needs: cut-release-branch
    uses: ./.github/workflows/pr-test-xeon.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
      run_all_tests: true
    secrets: inherit

  run-pr-tests-xpu:
    needs: cut-release-branch
    uses: ./.github/workflows/pr-test-xpu.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
      run_all_tests: true
    secrets: inherit

  run-nightly-tests-nvidia:
    needs: cut-release-branch
    uses: ./.github/workflows/nightly-test-nvidia.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
    secrets: inherit

  run-nightly-tests-amd:
    needs: cut-release-branch
    uses: ./.github/workflows/nightly-test-amd.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
    secrets: inherit

  run-nightly-tests-npu:
    needs: cut-release-branch
    uses: ./.github/workflows/nightly-test-npu.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
    secrets: inherit

  run-nightly-tests-intel:
    needs: cut-release-branch
    uses: ./.github/workflows/nightly-test-intel.yml
    with:
      ref: ${{ needs.cut-release-branch.outputs.branch_name }}
    secrets: inherit

release-docker matrix .github/workflows/release-docker.yml

Triggers

push, workflow_dispatch

Runs on

x64-docker-build-node, arm-docker-build-node, ubuntu-22.04

Jobs

publish-x86, publish-arm64, create-manifests

Matrix

variant, variant.build_type, variant.cuda_version, variant.grace_blackwell→ 0, 1, 12.9.1, all

Actions

jlumbroso/free-disk-space, docker/setup-buildx-action, docker/login-action, docker/setup-buildx-action, docker/login-action, docker/setup-buildx-action, docker/login-action

Commands

rm -rf /opt/hostedtoolcache
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target framework \ --platform linux/amd64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu129-framework.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-framework.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target framework \ --platform linux/amd64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg GRACE_BLACKWELL=0 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu130-framework.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-framework.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
rm -rf /opt/hostedtoolcache
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target framework \ --platform linux/arm64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu129-framework.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-framework.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target framework \ --platform linux/arm64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg GRACE_BLACKWELL=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu130-framework.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-framework.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "digest=${DIGEST}" >> $GITHUB_OUTPUT

View raw YAML

name: Release Docker Images
#
# This workflow builds and publishes framework Docker images (full development environment):
#   - lmsysorg/sglang:v{version}, lmsysorg/sglang:latest
#   - lmsysorg/sglang:v{version}-cu130, lmsysorg/sglang:latest-cu130
#
on:
  push:
    tags:
      - "v[0-9]+.*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to build (without v prefix, e.g., 0.5.7)"
        required: true

jobs:
  publish-x86:
    if: github.repository == 'sgl-project/sglang'
    environment: "prod"
    outputs:
      digest-cu129: ${{ steps.build-cu129.outputs.digest }}
      digest-cu130: ${{ steps.build-cu130.outputs.digest }}
    strategy:
      matrix:
        variant:
          - cuda_version: "12.9.1"
            build_type: "all"
            grace_blackwell: 0
    runs-on: x64-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache

      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          tool-cache: false
          docker-images: false
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build AMD64 Framework
        id: build-cu129
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target framework \
            --platform linux/amd64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu129-framework.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-framework.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "digest=${DIGEST}" >> $GITHUB_OUTPUT

      - name: Build and Push AMD64 Framework (CUDA 13)
        id: build-cu130
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target framework \
            --platform linux/amd64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=13.0.1 \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg GRACE_BLACKWELL=0 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu130-framework.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-framework.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "digest=${DIGEST}" >> $GITHUB_OUTPUT

  publish-arm64:
    if: github.repository == 'sgl-project/sglang'
    environment: "prod"
    outputs:
      digest-cu129: ${{ steps.build-cu129.outputs.digest }}
      digest-cu130: ${{ steps.build-cu130.outputs.digest }}
    strategy:
      matrix:
        variant:
          - cuda_version: "12.9.1"
            build_type: "all"
            grace_blackwell: 1
    runs-on: arm-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache

      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build ARM64 Framework
        id: build-cu129
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target framework \
            --platform linux/arm64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu129-framework.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-framework.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "digest=${DIGEST}" >> $GITHUB_OUTPUT

      - name: Build and Push ARM64 Framework (CUDA 13)
        id: build-cu130
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target framework \
            --platform linux/arm64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=13.0.1 \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg GRACE_BLACKWELL=1 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu130-framework.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-framework.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "digest=${DIGEST}" >> $GITHUB_OUTPUT

  create-manifests:
    runs-on: ubuntu-22.04
    needs: [publish-x86, publish-arm64]
    if: github.repository == 'sgl-project/sglang'
    environment: "prod"
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Create multi-arch manifests
        run: |
          version=${{ steps.version.outputs.version }}

          CU129_AMD64_FW=${{ needs.publish-x86.outputs.digest-cu129 }}
          CU130_AMD64_FW=${{ needs.publish-x86.outputs.digest-cu130 }}
          CU129_ARM64_FW=${{ needs.publish-arm64.outputs.digest-cu129 }}
          CU130_ARM64_FW=${{ needs.publish-arm64.outputs.digest-cu130 }}

          # Create versioned framework manifest (default)
          docker buildx imagetools create \
            -t lmsysorg/sglang:v${version} \
            lmsysorg/sglang@${CU129_AMD64_FW} \
            lmsysorg/sglang@${CU129_ARM64_FW}

          # Create latest framework manifest (default)
          docker buildx imagetools create \
            -t lmsysorg/sglang:latest \
            lmsysorg/sglang@${CU129_AMD64_FW} \
            lmsysorg/sglang@${CU129_ARM64_FW}

          # Create versioned CUDA 13 framework manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:v${version}-cu130 \
            lmsysorg/sglang@${CU130_AMD64_FW} \
            lmsysorg/sglang@${CU130_ARM64_FW}

          # Create latest CUDA 13 framework manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:latest-cu130 \
            lmsysorg/sglang@${CU130_AMD64_FW} \
            lmsysorg/sglang@${CU130_ARM64_FW}

release-docker-amd matrix .github/workflows/release-docker-amd.yml

Triggers

push, workflow_dispatch

Runs on

amd-docker-scale

Jobs

publish

Matrix

build_type, gpu_arch, rocm_version→ all, gfx942, gfx950, rocm700, rocm720

Actions

docker/login-action

Commands

if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} echo "Version: ${version}" gpu_arch_suffix="" if [ "${{ matrix.rocm_version }}" = "rocm700" ]; then if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm700-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then rocm_tag="rocm700-mi35x" else echo "Unsupported gfx arch" exit 1 fi elif [ "${{ matrix.rocm_version }}" = "rocm720" ]; then gpu_arch_suffix="-${{ matrix.rocm_version }}" if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm720-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then rocm_tag="rocm720-mi35x" else echo "Unsupported gfx arch" exit 1 fi else echo "Unsupported rocm version" exit 1 fi tag=v${version}-${rocm_tag} # rocm.Dockerfile expects SGL_BRANCH with 'v' prefix for git tag checkout docker build . -f docker/rocm.Dockerfile --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }}${gpu_arch_suffix} --build-arg SGL_BRANCH=v${version} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic -t lmsysorg/sglang:${tag} --no-cache docker push lmsysorg/sglang:${tag}

View raw YAML

name: Release Docker Images (AMD)
on:
  push:
    tags:
      - 'v[0-9]+.*'
  workflow_dispatch:
    inputs:
      version:
        description: 'Version to build (without v prefix, e.g., 0.5.7)'
        required: true

jobs:
  publish:
    if: github.repository == 'sgl-project/sglang'
    runs-on: amd-docker-scale
    environment: 'prod'
    strategy:
      matrix:
        rocm_version: ['rocm700', 'rocm720']
        gpu_arch: ['gfx942', 'gfx950']
        build_type: ['all']
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build and Push
        run: |
          version=${{ steps.version.outputs.version }}
          echo "Version: ${version}"

          gpu_arch_suffix=""
          if [ "${{ matrix.rocm_version }}" = "rocm700" ]; then
            if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
              rocm_tag="rocm700-mi30x"
            elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
              rocm_tag="rocm700-mi35x"
            else
              echo "Unsupported gfx arch"
              exit 1
            fi
          elif [ "${{ matrix.rocm_version }}" = "rocm720" ]; then
            gpu_arch_suffix="-${{ matrix.rocm_version }}"
            if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
              rocm_tag="rocm720-mi30x"
            elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
              rocm_tag="rocm720-mi35x"
            else
              echo "Unsupported gfx arch"
              exit 1
            fi
          else
            echo "Unsupported rocm version"
            exit 1
          fi

          tag=v${version}-${rocm_tag}

          # rocm.Dockerfile expects SGL_BRANCH with 'v' prefix for git tag checkout
          docker build . -f docker/rocm.Dockerfile --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }}${gpu_arch_suffix} --build-arg SGL_BRANCH=v${version} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic -t lmsysorg/sglang:${tag} --no-cache
          docker push lmsysorg/sglang:${tag}

release-docker-amd-nightly matrix .github/workflows/release-docker-amd-nightly.yml

Triggers

workflow_dispatch, schedule

Runs on

amd-docker-scale, linux-mi300-gpu-1

Jobs

publish, cache

Matrix

build_type, gpu_arch→ all, gfx942, gfx950

Actions

docker/login-action, docker/login-action, docker/login-action

Commands

echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7) VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//') if [ -z "$VERSION" ]; then echo "::error::Could not determine version from git tags" exit 1 fi # Get short commit hash of current HEAD COMMIT_HASH=$(git rev-parse --short HEAD) # Compose pretend version for setuptools_scm: e.g., 0.5.8.dev20260129+g1a2b3c4 PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}" echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT echo "Detected version: ${VERSION}" echo "Pretend version for pip: ${PRETEND_VERSION}"
version=${{ steps.version.outputs.version }} pretend_version=${{ steps.version.outputs.pretend_version }} echo "Version: ${version}" echo "Pretend version: ${pretend_version}" if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm700-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then rocm_tag="rocm700-mi35x" else echo "Unsupported gfx arch" exit 1 fi tag=v${version}-${rocm_tag} echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache docker push rocm/sgl-dev:${tag}-${{ env.DATE }}
docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }} docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7) VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//') if [ -z "$VERSION" ]; then echo "::error::Could not determine version from git tags" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "Detected version: ${VERSION}"
set -euxo pipefail version=${{ steps.version.outputs.version }} echo "Version: ${version}" if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm700-mi30x" else echo "Unsupported gfx arch" exit 1 fi tag=v${version}-${rocm_tag} if [ "${{ matrix.build_type }}" = "all" ]; then tag_suffix="" else echo "Unsupported build type" exit 1 fi image="rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix}" # Determine target cache file name based on ROCm variant if [[ "${rocm_tag}" == rocm700* ]]; then final_path="/home/runner/sgl-data/docker/image-700.tar" else echo "Unexpected ROCm tag: ${rocm_tag}" exit 1 fi tmp_path="${final_path}.tmp" echo "Pulling image: ${image}" docker pull "${image}" echo "Saving to temp file: ${tmp_path}" docker save "${image}" -o "${tmp_path}" echo "Moving to final path: ${final_path}" mv -f "${tmp_path}" "${final_path}" echo "Cache populated successfully at ${final_path}"

View raw YAML

name: Release Docker Images Nightly (AMD)
on:
  workflow_dispatch:
  schedule:
    - cron: '0 12 * * *'

concurrency:
  # A PR number if a pull request and otherwise the commit hash. This cancels
  # queued and in-progress runs for the same PR (presubmit) or commit
  # (postsubmit). The workflow name is prepended to avoid conflicts between
  # different workflows.
  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
  cancel-in-progress: true

jobs:
  publish:
    if: github.repository == 'sgl-project/sglang'
    runs-on: amd-docker-scale
    environment: 'prod'
    strategy:
      fail-fast: false
      matrix:
        gpu_arch: ['gfx942', 'gfx950']
        build_type: ['all']
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0  # Required for git describe to find tags

      - name: "Set Date"
        run: |
          echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV

      - name: Get version from latest tag
        id: version
        run: |
          # Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
          VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')

          if [ -z "$VERSION" ]; then
            echo "::error::Could not determine version from git tags"
            exit 1
          fi

          # Get short commit hash of current HEAD
          COMMIT_HASH=$(git rev-parse --short HEAD)

          # Compose pretend version for setuptools_scm: e.g., 0.5.8.dev20260129+g1a2b3c4
          PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}"

          echo "version=${VERSION}" >> $GITHUB_OUTPUT
          echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT
          echo "Detected version: ${VERSION}"
          echo "Pretend version for pip: ${PRETEND_VERSION}"

      - name: Login to Docker Hub (AMD)
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
          password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}

      - name: Build and Push to rocm/sgl-dev
        run: |
          version=${{ steps.version.outputs.version }}
          pretend_version=${{ steps.version.outputs.pretend_version }}
          echo "Version: ${version}"
          echo "Pretend version: ${pretend_version}"

          if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
            rocm_tag="rocm700-mi30x"
          elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
            rocm_tag="rocm700-mi35x"
          else
            echo "Unsupported gfx arch"
            exit 1
          fi

          tag=v${version}-${rocm_tag}
          echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV

          docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache
          docker push rocm/sgl-dev:${tag}-${{ env.DATE }}

      - name: Login to Docker Hub (lmsys)
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Push to lmsysorg/sglang-rocm
        run: |
          docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
          docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}

  # Temporarily disable docker cache seeding until performant storage is in place
  cache:
    if: false
    # if: always() && github.repository == 'sgl-project/sglang'
    runs-on: linux-mi300-gpu-1
    environment: 'prod'
    needs: publish
    strategy:
      fail-fast: false
      matrix:
        gpu_arch: ['gfx942']
        build_type: ['all']
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0  # Required for git describe to find tags

      - name: "Set Date"
        run: |
          echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV

      - name: Get version from latest tag
        id: version
        run: |
          # Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
          VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')

          if [ -z "$VERSION" ]; then
            echo "::error::Could not determine version from git tags"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT
          echo "Detected version: ${VERSION}"

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
          password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}

      - name: Pull and Save Docker Image to Cache
        run: |
          set -euxo pipefail

          version=${{ steps.version.outputs.version }}
          echo "Version: ${version}"

          if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
            rocm_tag="rocm700-mi30x"
          else
            echo "Unsupported gfx arch"
            exit 1
          fi

          tag=v${version}-${rocm_tag}

          if [ "${{ matrix.build_type }}" = "all" ]; then
            tag_suffix=""
          else
            echo "Unsupported build type"
            exit 1
          fi

          image="rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix}"

          # Determine target cache file name based on ROCm variant
          if [[ "${rocm_tag}" == rocm700* ]]; then
            final_path="/home/runner/sgl-data/docker/image-700.tar"
          else
            echo "Unexpected ROCm tag: ${rocm_tag}"
            exit 1
          fi

          tmp_path="${final_path}.tmp"

          echo "Pulling image: ${image}"
          docker pull "${image}"

          echo "Saving to temp file: ${tmp_path}"
          docker save "${image}" -o "${tmp_path}"

          echo "Moving to final path: ${final_path}"
          mv -f "${tmp_path}" "${final_path}"

          echo "Cache populated successfully at ${final_path}"

release-docker-amd-rocm720-nightly matrix .github/workflows/release-docker-amd-rocm720-nightly.yml

Triggers

workflow_dispatch, schedule

Runs on

amd-docker-scale

Jobs

publish

Matrix

build_type, gpu_arch→ all, gfx942-rocm720, gfx950-rocm720

Actions

docker/login-action, docker/login-action

Commands

echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7) VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//') if [ -z "$VERSION" ]; then echo "::error::Could not determine version from git tags" exit 1 fi # Get short commit hash of current HEAD COMMIT_HASH=$(git rev-parse --short HEAD) # Compose pretend version for setuptools_scm: e.g., 0.5.8.post1.dev20260211+g1a2b3c4 PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}" echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT echo "Detected version: ${VERSION}" echo "Pretend version for pip: ${PRETEND_VERSION}"
version=${{ steps.version.outputs.version }} pretend_version=${{ steps.version.outputs.pretend_version }} echo "Version: ${version}" echo "Pretend version: ${pretend_version}" if [ "${{ matrix.gpu_arch }}" = "gfx942-rocm720" ]; then rocm_tag="rocm720-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx950-rocm720" ]; then rocm_tag="rocm720-mi35x" else echo "Unsupported gfx arch" exit 1 fi tag=v${version}-${rocm_tag} echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache docker push rocm/sgl-dev:${tag}-${{ env.DATE }}
docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }} docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}

View raw YAML

name: Release Docker Images ROCm 7.2.0 Nightly Preview (AMD)
on:
  workflow_dispatch:
  schedule:
    - cron: '0 12 * * *'

concurrency:
  # A PR number if a pull request and otherwise the commit hash. This cancels
  # queued and in-progress runs for the same PR (presubmit) or commit
  # (postsubmit). The workflow name is prepended to avoid conflicts between
  # different workflows.
  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
  cancel-in-progress: True

jobs:
  publish:
    if: github.repository == 'sgl-project/sglang'
    runs-on: amd-docker-scale
    environment: 'prod'
    strategy:
      fail-fast: false
      matrix:
        gpu_arch: ['gfx942-rocm720', 'gfx950-rocm720']
        build_type: ['all']
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0  # Required for git describe to find tags

      - name: "Set Date"
        run: |
          echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV

      - name: Get version from latest tag
        id: version
        run: |
          # Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
          VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')

          if [ -z "$VERSION" ]; then
            echo "::error::Could not determine version from git tags"
            exit 1
          fi

          # Get short commit hash of current HEAD
          COMMIT_HASH=$(git rev-parse --short HEAD)

          # Compose pretend version for setuptools_scm: e.g., 0.5.8.post1.dev20260211+g1a2b3c4
          PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}"

          echo "version=${VERSION}" >> $GITHUB_OUTPUT
          echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT
          echo "Detected version: ${VERSION}"
          echo "Pretend version for pip: ${PRETEND_VERSION}"

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
          password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}

      - name: Build and Push to rocm/sgl-dev
        run: |
          version=${{ steps.version.outputs.version }}
          pretend_version=${{ steps.version.outputs.pretend_version }}
          echo "Version: ${version}"
          echo "Pretend version: ${pretend_version}"

          if [ "${{ matrix.gpu_arch }}" = "gfx942-rocm720" ]; then
            rocm_tag="rocm720-mi30x"
          elif [ "${{ matrix.gpu_arch }}" = "gfx950-rocm720" ]; then
            rocm_tag="rocm720-mi35x"
          else
            echo "Unsupported gfx arch"
            exit 1
          fi

          tag=v${version}-${rocm_tag}
          echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV

          docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache
          docker push rocm/sgl-dev:${tag}-${{ env.DATE }}

      - name: Login to Docker Hub (lmsys)
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Push to lmsysorg/sglang-rocm
        run: |
          docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
          docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}

release-docker-cu13-framework .github/workflows/release-docker-cu13-framework.yml

Triggers

workflow_dispatch

Runs on

x64-docker-build-node, arm-docker-build-node, ubuntu-22.04

Jobs

publish-x86, publish-arm64, create-manifest

Actions

jlumbroso/free-disk-space, docker/setup-buildx-action, docker/login-action, docker/setup-buildx-action, docker/login-action, docker/setup-buildx-action, docker/login-action

Commands

rm -rf /opt/hostedtoolcache
VERSION="${{ github.event.inputs.version }}" if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target framework \ --platform linux/amd64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=all \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg GRACE_BLACKWELL=0 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu130-amd64-framework.txt
rm -rf /opt/hostedtoolcache
VERSION="${{ github.event.inputs.version }}" if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target framework \ --platform linux/arm64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=all \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg GRACE_BLACKWELL=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu130-arm64-framework.txt
version=${{ github.event.inputs.version }} AMD64_DIGEST=$(cat /tmp/digests/amd64/digest-cu130-amd64-framework.txt) ARM64_DIGEST=$(cat /tmp/digests/arm64/digest-cu130-arm64-framework.txt) # Create versioned CUDA 13 framework manifest docker buildx imagetools create \ -t lmsysorg/sglang:v${version}-cu130 \ lmsysorg/sglang@${AMD64_DIGEST} \ lmsysorg/sglang@${ARM64_DIGEST} # Create latest CUDA 13 framework manifest docker buildx imagetools create \ -t lmsysorg/sglang:latest-cu130 \ lmsysorg/sglang@${AMD64_DIGEST} \ lmsysorg/sglang@${ARM64_DIGEST}

View raw YAML

name: Release CUDA 13 Framework Docker Images (Temporary)

# Temporary workflow to build only versioned cu13 framework images
# Can be deleted after use

on:
  workflow_dispatch:
    inputs:
      version:
        description: "Version to build (without v prefix, e.g., 0.5.8)"
        required: true
jobs:
  publish-x86:
    if: github.repository == 'sgl-project/sglang'
    runs-on: x64-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache

      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          tool-cache: false
          docker-images: false
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Validate version
        id: version
        run: |
          VERSION="${{ github.event.inputs.version }}"
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi
          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build and Push AMD64 Framework (CUDA 13)
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target framework \
            --platform linux/amd64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=13.0.1 \
            --build-arg BUILD_TYPE=all \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg GRACE_BLACKWELL=0 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "${DIGEST}" > /tmp/digest-cu130-amd64-framework.txt

      - name: Upload digest
        uses: actions/upload-artifact@v4
        with:
          name: digest-cu130-amd64
          path: /tmp/digest-cu130-amd64-framework.txt
          retention-days: 1

  publish-arm64:
    if: github.repository == 'sgl-project/sglang'
    runs-on: arm-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache

      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Validate version
        id: version
        run: |
          VERSION="${{ github.event.inputs.version }}"
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi
          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build and Push ARM64 Framework (CUDA 13)
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target framework \
            --platform linux/arm64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=13.0.1 \
            --build-arg BUILD_TYPE=all \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg GRACE_BLACKWELL=1 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "${DIGEST}" > /tmp/digest-cu130-arm64-framework.txt

      - name: Upload digest
        uses: actions/upload-artifact@v4
        with:
          name: digest-cu130-arm64
          path: /tmp/digest-cu130-arm64-framework.txt
          retention-days: 1

  create-manifest:
    runs-on: ubuntu-22.04
    needs: [publish-x86, publish-arm64]
    if: github.repository == 'sgl-project/sglang'
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Download amd64 digest
        uses: actions/download-artifact@v4
        with:
          name: digest-cu130-amd64
          path: /tmp/digests/amd64

      - name: Download arm64 digest
        uses: actions/download-artifact@v4
        with:
          name: digest-cu130-arm64
          path: /tmp/digests/arm64

      - name: Create multi-arch manifest
        run: |
          version=${{ github.event.inputs.version }}
          AMD64_DIGEST=$(cat /tmp/digests/amd64/digest-cu130-amd64-framework.txt)
          ARM64_DIGEST=$(cat /tmp/digests/arm64/digest-cu130-arm64-framework.txt)

          # Create versioned CUDA 13 framework manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:v${version}-cu130 \
            lmsysorg/sglang@${AMD64_DIGEST} \
            lmsysorg/sglang@${ARM64_DIGEST}

          # Create latest CUDA 13 framework manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:latest-cu130 \
            lmsysorg/sglang@${AMD64_DIGEST} \
            lmsysorg/sglang@${ARM64_DIGEST}

release-docker-dev matrix .github/workflows/release-docker-dev.yml

Triggers

workflow_dispatch, schedule

Runs on

${{ matrix.runner }}, ubuntu-22.04

Jobs

build-dev, create-manifests

Matrix

include, include.arch_tag, include.build_type, include.grace_blackwell, include.platform, include.runner, include.version, variant, variant.arm64, variant.base, variant.x86→ 0, 1, 12.9.1, 13.0.1, all, arm-docker-build-node, arm64, arm64-cu13, dev, dev-cu13, linux/amd64, linux/arm64, x64-docker-build-node, x86, x86-cu13

Actions

jlumbroso/free-disk-space, docker/setup-buildx-action, docker/login-action, docker/setup-buildx-action, docker/login-action

Commands

rm -rf /opt/hostedtoolcache
docker buildx prune --filter "until=72h" -f docker system prune -af --filter "until=72h" docker volume prune -af
# Nightly (schedule) installs latest release; manual dispatch builds from checked-out source if [ "${{ github.event_name }}" = "schedule" ]; then SOURCE_ARG="--build-arg USE_LATEST_SGLANG=1" else SOURCE_ARG="--build-arg BRANCH_TYPE=local" fi docker buildx build \ --platform ${{ matrix.platform }} \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ --target framework \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=${{ matrix.version }} \ --build-arg BUILD_TYPE=${{ matrix.build_type }} \ --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) \ --build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} \ ${SOURCE_ARG} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --metadata-file /tmp/metadata.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest.txt
X86_DIGEST=$(cat /tmp/digests/x86/digest.txt) ARM64_DIGEST=$(cat /tmp/digests/arm64/digest.txt) SUFFIX="" if [ -n "${{ inputs.tag }}" ]; then SUFFIX="-${{ inputs.tag }}" elif [ -n "${{ inputs.pr_number }}" ]; then SUFFIX="-pr-${{ inputs.pr_number }}" fi TAG="${{ matrix.variant.base }}${SUFFIX}" # For nightly (no suffix), also stamp a dated tag EXTRA_TAG="" if [ -z "${SUFFIX}" ]; then SHORT_SHA="${{ github.sha }}" EXTRA_TAG="-t lmsysorg/sglang:nightly-${TAG}-$(date +%Y%m%d)-${SHORT_SHA:0:8}" fi docker buildx imagetools create \ -t lmsysorg/sglang:${TAG} \ ${EXTRA_TAG} \ lmsysorg/sglang@${X86_DIGEST} \ lmsysorg/sglang@${ARM64_DIGEST} echo "✓ Published lmsysorg/sglang:${TAG}"
TOKEN=$(curl -s -H "Content-Type: application/json" \ -X POST -d '{"username": "${{ secrets.DOCKERHUB_USERNAME }}", "password": "${{ secrets.DOCKERHUB_TOKEN }}"}' \ https://hub.docker.com/v2/users/login/ | jq -r .token) TAGS_RESPONSE=$(curl -s -H "Authorization: JWT $TOKEN" \ "https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/?page_size=100") TAGS=$(echo "$TAGS_RESPONSE" | jq -r \ '.results[] | select(.name | test("^nightly-${{ matrix.variant.base }}-[0-9]")) | "\(.last_updated)|\(.name)"' \ | sort -r | cut -d'|' -f2) TAG_COUNT=$(echo "$TAGS" | wc -l) if [ "$TAG_COUNT" -gt 14 ]; then echo "Found $TAG_COUNT nightly builds, keeping only the 14 most recent" TAGS_TO_DELETE=$(echo "$TAGS" | tail -n +15) for tag in $TAGS_TO_DELETE; do echo "Deleting tag: $tag" curl -X DELETE -H "Authorization: JWT $TOKEN" \ "https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/$tag/" done else echo "Only $TAG_COUNT nightly builds found, no cleanup needed" fi

View raw YAML

name: Build and Push Development Docker Images

on:
  workflow_dispatch:
    inputs:
      pr_number:
        description: "PR number to build from (leave empty to use current branch)"
        required: false
        default: ""
      tag:
        description: "Custom tag suffix (overrides pr_number in tag). E.g. 'my-test' → dev-my-test, dev-cu13-my-test, etc."
        required: false
        default: ""
  schedule:
    - cron: "0 0 * * *"

concurrency:
  group: release-docker-dev-${{ inputs.tag || inputs.pr_number || 'nightly' }}
  cancel-in-progress: true

jobs:
  build-dev:
    if: ${{ github.repository == 'sgl-project/sglang' }}
    runs-on: ${{ matrix.runner }}
    strategy:
      matrix:
        include:
          - runner: x64-docker-build-node
            platform: linux/amd64
            build_type: all
            grace_blackwell: 0
            arch_tag: x86
            version: 12.9.1
          - runner: arm-docker-build-node
            platform: linux/arm64
            build_type: all
            grace_blackwell: 1
            arch_tag: arm64
            version: 12.9.1
          - runner: x64-docker-build-node
            platform: linux/amd64
            build_type: all
            grace_blackwell: 0
            arch_tag: x86-cu13
            version: 13.0.1
          - runner: arm-docker-build-node
            platform: linux/arm64
            build_type: all
            grace_blackwell: 1
            arch_tag: arm64-cu13
            version: 13.0.1
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache

      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || github.ref }}

      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          tool-cache: true
          docker-images: true
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: true

      - name: Prune Docker to reclaim disk space
        run: |
          docker buildx prune --filter "until=72h" -f
          docker system prune -af --filter "until=72h"
          docker volume prune -af

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Build and Push Dev Image
        run: |
          # Nightly (schedule) installs latest release; manual dispatch builds from checked-out source
          if [ "${{ github.event_name }}" = "schedule" ]; then
            SOURCE_ARG="--build-arg USE_LATEST_SGLANG=1"
          else
            SOURCE_ARG="--build-arg BRANCH_TYPE=local"
          fi

          docker buildx build \
            --platform ${{ matrix.platform }} \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            --target framework \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=${{ matrix.version }} \
            --build-arg BUILD_TYPE=${{ matrix.build_type }} \
            --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) \
            --build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} \
            ${SOURCE_ARG} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --metadata-file /tmp/metadata.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "${DIGEST}" > /tmp/digest.txt

      - name: Upload digest
        uses: actions/upload-artifact@v4
        with:
          name: digest-${{ matrix.arch_tag }}
          path: /tmp/digest.txt
          retention-days: 1

  create-manifests:
    runs-on: ubuntu-22.04
    needs: [build-dev]
    if: ${{ github.repository == 'sgl-project/sglang' }}
    strategy:
      matrix:
        variant:
          - base: dev
            x86: x86
            arm64: arm64
          - base: dev-cu13
            x86: x86-cu13
            arm64: arm64-cu13
    steps:
      - uses: docker/setup-buildx-action@v3

      - uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Download x86 digest
        uses: actions/download-artifact@v4
        with:
          name: digest-${{ matrix.variant.x86 }}
          path: /tmp/digests/x86

      - name: Download arm64 digest
        uses: actions/download-artifact@v4
        with:
          name: digest-${{ matrix.variant.arm64 }}
          path: /tmp/digests/arm64

      - name: Create multi-arch manifest
        run: |
          X86_DIGEST=$(cat /tmp/digests/x86/digest.txt)
          ARM64_DIGEST=$(cat /tmp/digests/arm64/digest.txt)

          SUFFIX=""
          if [ -n "${{ inputs.tag }}" ]; then
            SUFFIX="-${{ inputs.tag }}"
          elif [ -n "${{ inputs.pr_number }}" ]; then
            SUFFIX="-pr-${{ inputs.pr_number }}"
          fi

          TAG="${{ matrix.variant.base }}${SUFFIX}"

          # For nightly (no suffix), also stamp a dated tag
          EXTRA_TAG=""
          if [ -z "${SUFFIX}" ]; then
            SHORT_SHA="${{ github.sha }}"
            EXTRA_TAG="-t lmsysorg/sglang:nightly-${TAG}-$(date +%Y%m%d)-${SHORT_SHA:0:8}"
          fi

          docker buildx imagetools create \
            -t lmsysorg/sglang:${TAG} \
            ${EXTRA_TAG} \
            lmsysorg/sglang@${X86_DIGEST} \
            lmsysorg/sglang@${ARM64_DIGEST}

          echo "✓ Published lmsysorg/sglang:${TAG}"

      - name: Cleanup Old Nightly Builds
        if: ${{ !inputs.tag && !inputs.pr_number }}
        run: |
          TOKEN=$(curl -s -H "Content-Type: application/json" \
            -X POST -d '{"username": "${{ secrets.DOCKERHUB_USERNAME }}", "password": "${{ secrets.DOCKERHUB_TOKEN }}"}' \
            https://hub.docker.com/v2/users/login/ | jq -r .token)

          TAGS_RESPONSE=$(curl -s -H "Authorization: JWT $TOKEN" \
            "https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/?page_size=100")

          TAGS=$(echo "$TAGS_RESPONSE" | jq -r \
            '.results[] | select(.name | test("^nightly-${{ matrix.variant.base }}-[0-9]")) | "\(.last_updated)|\(.name)"' \
            | sort -r | cut -d'|' -f2)

          TAG_COUNT=$(echo "$TAGS" | wc -l)
          if [ "$TAG_COUNT" -gt 14 ]; then
            echo "Found $TAG_COUNT nightly builds, keeping only the 14 most recent"
            TAGS_TO_DELETE=$(echo "$TAGS" | tail -n +15)
            for tag in $TAGS_TO_DELETE; do
              echo "Deleting tag: $tag"
              curl -X DELETE -H "Authorization: JWT $TOKEN" \
                "https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/$tag/"
            done
          else
            echo "Only $TAG_COUNT nightly builds found, no cleanup needed"
          fi

release-docker-gateway .github/workflows/release-docker-gateway.yml

Triggers

push, workflow_dispatch

Runs on

ubuntu-24.04

Jobs

publish

Actions

docker/setup-qemu-action, docker/setup-buildx-action, docker/login-action

Commands

version=$(cat sgl-model-gateway/bindings/python/src/sglang_router/version.py | cut -d'"' -f2) tag=v${version} docker buildx build . -f docker/gateway.Dockerfile \ --platform linux/amd64,linux/arm64 \ -t lmsysorg/sgl-model-gateway:${tag} \ -t lmsysorg/sgl-model-gateway:latest \ --push

View raw YAML

name: Release SGLang Model Gateway Docker Image
on:
  push:
    branches:
      - main
    paths:
      - sgl-model-gateway/bindings/python/pyproject.toml
  workflow_dispatch:

jobs:
  publish:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Build and Push
        run: |
          version=$(cat sgl-model-gateway/bindings/python/src/sglang_router/version.py | cut -d'"' -f2)
          tag=v${version}

          docker buildx build . -f docker/gateway.Dockerfile \
            --platform linux/amd64,linux/arm64 \
            -t lmsysorg/sgl-model-gateway:${tag} \
            -t lmsysorg/sgl-model-gateway:latest \
            --push

release-docker-npu matrix .github/workflows/release-docker-npu.yml

Triggers

push, workflow_dispatch

Runs on

ubuntu-22.04-arm

Jobs

build

Matrix

cann_version, device_type→ 8.5.0, 910b, a3

Actions

jlumbroso/free-disk-space, docker/metadata-action, docker/login-action, docker/setup-qemu-action, docker/setup-buildx-action, docker/build-push-action

Commands

if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=v${VERSION}" >> $GITHUB_OUTPUT echo "TAG=lmsysorg/sglang:v${VERSION}-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT

View raw YAML

name: Release Docker Images (NPU)
on:
  push:
    tags:
      - 'v[0-9]+.*'
  workflow_dispatch:
    inputs:
      version:
        description: 'Version to build (without v prefix, e.g., 0.5.7)'
        required: true

jobs:
  build:
    runs-on: ubuntu-22.04-arm
    strategy:
      matrix:
        cann_version: ["8.5.0"]
        device_type: ["910b", "a3"]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Free up disk space
        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
        with:
          tool-cache: true
          docker-images: false

        # push with tag
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: |
            lmsysorg/sglang
          tags: |
            type=ref,event=pr
          flavor: |
            latest=false

      # Login against a Docker registry except on PR
      # https://github.com/docker/login-action
      - name: Login to Docker Hub
        uses: docker/login-action@v2
        if: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi
          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi
          echo "version=v${VERSION}" >> $GITHUB_OUTPUT
          echo "TAG=lmsysorg/sglang:v${VERSION}-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT
      # Enable Docker multi-architecture build environment
      # Emulate non-native architectures
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
      # Required for building and pushing multi-arch Docker images
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Build and push Docker image
        id: build-and-push
        uses: docker/build-push-action@v6
        with:
          context: docker
          file: docker/npu.Dockerfile
          platforms: linux/arm64,linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          tags: ${{ steps.meta.outputs.tags || steps.version.outputs.TAG }}
          push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
          provenance: false
          build-args: |
            SGLANG_KERNEL_NPU_TAG=2026.03.10.rc1
            CANN_VERSION=${{ matrix.cann_version }}
            DEVICE_TYPE=${{ matrix.device_type }}
            SGLANG_TAG=${{ steps.version.outputs.version }}

release-docker-npu-nightly matrix .github/workflows/release-docker-npu-nightly.yml

Triggers: pull_request, workflow_dispatch, schedule
Runs on: ubuntu-22.04-arm
Jobs: build
Matrix: cann_version, device_type→ 8.5.0, 910b, a3
Actions: jlumbroso/free-disk-space, docker/setup-buildx-action, docker/metadata-action, docker/login-action, docker/setup-qemu-action, docker/setup-buildx-action, docker/build-push-action

View raw YAML

name: Release Docker Images Nightly (NPU)
on:
  pull_request:
    branches:
      - 'main'
    paths:
      - '.github/workflows/release-docker-npu-nightly.yml'
      - 'docker/npu.Dockerfile'
  workflow_dispatch:
  schedule:
    - cron: "0 0 * * *"

concurrency:
  group: ${{ github.workflow }}-${{ github.sha }}
  cancel-in-progress: true

jobs:
  build:
    runs-on: ubuntu-22.04-arm
    strategy:
      matrix:
        cann_version: ["8.5.0"]
        device_type: ["910b", "a3"]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Free up disk space
        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
        with:
          tool-cache: true
          docker-images: false

      - name: Setup Docker buildx
        uses: docker/setup-buildx-action@v3

      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: |
            lmsysorg/sglang
          # push with schedule event
          # push with workflow_dispatch event
          tags: |
            type=ref,event=pr
            type=ref,event=branch
            type=schedule,pattern=main
          flavor: |
            latest=false
            suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }},onlatest=true
      # Login against a Docker registry except on PR
      # https://github.com/docker/login-action
      - name: Log into docker hub
        uses: docker/login-action@v3
        if: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      # Enable Docker multi-architecture build environment
      # Emulate non-native architectures
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
      # Required for building and pushing multi-arch Docker images
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      # Build and push Docker image with Buildx (don't push on PR)
      # https://github.com/docker/build-push-action
      - name: Build and push Docker image
        id: build-and-push
        uses: docker/build-push-action@v6
        with:
          context: docker
          file: docker/npu.Dockerfile
          platforms: linux/arm64,linux/amd64
          labels: ${{ steps.meta.outputs.labels }}
          tags: ${{ steps.meta.outputs.tags }}
          push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
          provenance: false
          build-args: |
            SGLANG_KERNEL_NPU_TAG=2026.03.10.rc1
            CANN_VERSION=${{ matrix.cann_version }}
            DEVICE_TYPE=${{ matrix.device_type }}

release-docker-runtime matrix .github/workflows/release-docker-runtime.yml

Triggers

push, workflow_dispatch

Runs on

x64-docker-build-node, arm-docker-build-node, ubuntu-22.04

Jobs

publish-x86, publish-arm64, create-manifests

Matrix

variant, variant.build_type, variant.cuda_version, variant.grace_blackwell→ 0, 1, 12.9.1, all

Actions

jlumbroso/free-disk-space, docker/setup-buildx-action, docker/login-action, docker/setup-buildx-action, docker/login-action, docker/setup-buildx-action, docker/login-action

Commands

rm -rf /opt/hostedtoolcache
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/amd64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu129-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu129-amd64-runtime.txt
version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/amd64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg GRACE_BLACKWELL=0 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu130-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu130-amd64-runtime.txt
rm -rf /opt/hostedtoolcache
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/arm64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \ --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu129-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu129-arm64-runtime.txt
version=${{ steps.version.outputs.version }} docker buildx build \ --target runtime \ --platform linux/arm64 \ --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \ -f docker/Dockerfile \ --build-arg CUDA_VERSION=13.0.1 \ --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \ --build-arg GRACE_BLACKWELL=1 \ --build-arg SGL_VERSION=${version} \ --metadata-file /tmp/metadata-cu130-runtime.json \ --no-cache \ . DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])") echo "Pushed digest: ${DIGEST}" echo "${DIGEST}" > /tmp/digest-cu130-arm64-runtime.txt

View raw YAML

name: Release Docker Runtime Images
#
# This workflow builds and publishes runtime Docker images (production-optimized, ~50% smaller):
#   - lmsysorg/sglang:v{version}-runtime, lmsysorg/sglang:latest-runtime
#   - lmsysorg/sglang:v{version}-cu130-runtime, lmsysorg/sglang:latest-cu130-runtime
#
on:
  push:
    tags:
      - "v[0-9]+.*"
  workflow_dispatch:
    inputs:
      version:
        description: "Version to build (without v prefix, e.g., 0.5.7)"
        required: true

jobs:
  publish-x86:
    if: github.repository == 'sgl-project/sglang'
    environment: "prod"
    strategy:
      matrix:
        variant:
          - cuda_version: "12.9.1"
            build_type: "all"
            grace_blackwell: 0
    runs-on: x64-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache

      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          tool-cache: false
          docker-images: false
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build and Push AMD64 Runtime
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target runtime \
            --platform linux/amd64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu129-runtime.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "${DIGEST}" > /tmp/digest-cu129-amd64-runtime.txt

      - name: Build and Push AMD64 Runtime (CUDA 13)
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target runtime \
            --platform linux/amd64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=13.0.1 \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg GRACE_BLACKWELL=0 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu130-runtime.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "${DIGEST}" > /tmp/digest-cu130-amd64-runtime.txt

      - name: Upload digests
        uses: actions/upload-artifact@v4
        with:
          name: digests-amd64
          path: /tmp/digest-*.txt
          retention-days: 1

  publish-arm64:
    if: github.repository == 'sgl-project/sglang'
    environment: "prod"
    strategy:
      matrix:
        variant:
          - cuda_version: "12.9.1"
            build_type: "all"
            grace_blackwell: 1
    runs-on: arm-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache

      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build and Push ARM64 Runtime
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target runtime \
            --platform linux/arm64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
            --build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu129-runtime.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "${DIGEST}" > /tmp/digest-cu129-arm64-runtime.txt

      - name: Build and Push ARM64 Runtime (CUDA 13)
        run: |
          version=${{ steps.version.outputs.version }}

          docker buildx build \
            --target runtime \
            --platform linux/arm64 \
            --output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
            -f docker/Dockerfile \
            --build-arg CUDA_VERSION=13.0.1 \
            --build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
            --build-arg GRACE_BLACKWELL=1 \
            --build-arg SGL_VERSION=${version} \
            --metadata-file /tmp/metadata-cu130-runtime.json \
            --no-cache \
            .

          DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])")
          echo "Pushed digest: ${DIGEST}"
          echo "${DIGEST}" > /tmp/digest-cu130-arm64-runtime.txt

      - name: Upload digests
        uses: actions/upload-artifact@v4
        with:
          name: digests-arm64
          path: /tmp/digest-*.txt
          retention-days: 1

  create-manifests:
    runs-on: ubuntu-22.04
    needs: [publish-x86, publish-arm64]
    if: github.repository == 'sgl-project/sglang'
    environment: "prod"
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Download amd64 digests
        uses: actions/download-artifact@v4
        with:
          name: digests-amd64
          path: /tmp/digests/amd64

      - name: Download arm64 digests
        uses: actions/download-artifact@v4
        with:
          name: digests-arm64
          path: /tmp/digests/arm64

      - name: Create multi-arch manifests
        run: |
          version=${{ steps.version.outputs.version }}

          CU129_AMD64_RT=$(cat /tmp/digests/amd64/digest-cu129-amd64-runtime.txt)
          CU130_AMD64_RT=$(cat /tmp/digests/amd64/digest-cu130-amd64-runtime.txt)
          CU129_ARM64_RT=$(cat /tmp/digests/arm64/digest-cu129-arm64-runtime.txt)
          CU130_ARM64_RT=$(cat /tmp/digests/arm64/digest-cu130-arm64-runtime.txt)

          # Create versioned runtime manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:v${version}-runtime \
            lmsysorg/sglang@${CU129_AMD64_RT} \
            lmsysorg/sglang@${CU129_ARM64_RT}

          # Create latest runtime manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:latest-runtime \
            lmsysorg/sglang@${CU129_AMD64_RT} \
            lmsysorg/sglang@${CU129_ARM64_RT}

          # Create versioned CUDA 13 runtime manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:v${version}-cu130-runtime \
            lmsysorg/sglang@${CU130_AMD64_RT} \
            lmsysorg/sglang@${CU130_ARM64_RT}

          # Create latest CUDA 13 runtime manifest
          docker buildx imagetools create \
            -t lmsysorg/sglang:latest-cu130-runtime \
            lmsysorg/sglang@${CU130_AMD64_RT} \
            lmsysorg/sglang@${CU130_ARM64_RT}

release-docker-xeon matrix .github/workflows/release-docker-xeon.yml

Triggers

push, workflow_dispatch

Runs on

ubuntu-24.04

Jobs

publish

Matrix

build_type→ all

Actions

docker/login-action

Commands

if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then VERSION="${{ github.event.inputs.version }}" else # Extract version from tag (e.g., v0.5.7 -> 0.5.7) VERSION="${GITHUB_REF_NAME#v}" fi # Validate version format if [ -z "$VERSION" ]; then echo "::error::Version is empty" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)" exit 1 fi echo "version=${VERSION}" >> $GITHUB_OUTPUT
version=${{ steps.version.outputs.version }} tag=v${version}-xeon docker build . -f docker/xeon.Dockerfile \ --build-arg VER_SGLANG=v${version} \ -t lmsysorg/sglang:${tag} \ --no-cache docker push lmsysorg/sglang:${tag}

View raw YAML

name: Release Docker Xeon Images
on:
  push:
    tags:
      - 'v[0-9]+.*'
  workflow_dispatch:
    inputs:
      version:
        description: 'Version to build (without v prefix, e.g., 0.5.7)'
        required: true

jobs:
  publish:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-24.04
    environment: 'prod'
    strategy:
      matrix:
        build_type: ['all']
    steps:

      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Get version from tag
        id: version
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            VERSION="${{ github.event.inputs.version }}"
          else
            # Extract version from tag (e.g., v0.5.7 -> 0.5.7)
            VERSION="${GITHUB_REF_NAME#v}"
          fi

          # Validate version format
          if [ -z "$VERSION" ]; then
            echo "::error::Version is empty"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
            exit 1
          fi

          echo "version=${VERSION}" >> $GITHUB_OUTPUT

      - name: Build and Push
        run: |
          version=${{ steps.version.outputs.version }}
          tag=v${version}-xeon

          docker build . -f docker/xeon.Dockerfile \
            --build-arg VER_SGLANG=v${version} \
            -t lmsysorg/sglang:${tag} \
            --no-cache
          docker push lmsysorg/sglang:${tag}

release-docs .github/workflows/release-docs.yml

Triggers

release, push, workflow_dispatch

Runs on

1-gpu-h100

Jobs

execute-and-deploy

Commands

git fetch --prune --unshallow || git fetch --prune --depth=0
bash scripts/ci/cuda/ci_install_dependency.sh pip install -r docs/requirements.txt apt-get update && apt-get install -y pandoc parallel retry ln -sf "$(which python3)" /usr/bin/python
python -m ipykernel install --user --name python3 --display-name "Python 3"
cd docs make clean make compile
cd docs make html make markdown python3 wrap_run_llm.py if [[ "${{ github.event_name }}" == "release" ]]; then python3 release_lookup/generate_index.py --output release_lookup/release_index.json # Copy release lookup tool for official docs on published releases. mkdir -p _build/html/release_lookup cp release_lookup/index.html _build/html/release_lookup/ cp release_lookup/release_index.json _build/html/release_lookup/ fi cd _build/html git clone https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git ../sgl-project.github.io --depth 1 if [[ "${{ github.event_name }}" == "release" ]]; then find ../sgl-project.github.io/ -mindepth 1 -not -path "../sgl-project.github.io/.git*" -not -name CNAME -not -name ".jekyll" -not -name ".nojekyll" -delete else find ../sgl-project.github.io/ -mindepth 1 -not -path "../sgl-project.github.io/.git*" -not -path "../sgl-project.github.io/release_lookup*" -not -name CNAME -not -name ".jekyll" -not -name ".nojekyll" -delete fi cp -r * ../sgl-project.github.io cp ../../README.md ../sgl-project.github.io/README.md cd ../sgl-project.github.io git config user.name "sglang-bot" git config user.email "sglangbot@gmail.com" git add . git commit -m "Update $(date +'%Y-%m-%d %H:%M:%S')" git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main cd .. rm -rf sgl-project.github.io

View raw YAML

name: Release Documentation

on:
  release:
    types: [published]
  push:
    branches:
      - main
    paths:
      - "docs/**"
      - "python/sglang/version.py"
      - "python/sglang/**"
  workflow_dispatch:

concurrency:
  group: release-docs-${{ github.ref }}
  cancel-in-progress: true

env:
  SGLANG_IS_IN_CI: true

jobs:
  execute-and-deploy:
    runs-on: 1-gpu-h100
    if: github.repository == 'sgl-project/sglang'
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Fetch full git history for release index
        if: github.event_name == 'release'
        run: |
          git fetch --prune --unshallow || git fetch --prune --depth=0

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh
          pip install -r docs/requirements.txt
          apt-get update && apt-get install -y pandoc parallel retry
          ln -sf "$(which python3)" /usr/bin/python

      - name: Setup Jupyter Kernel
        run: |
          python -m ipykernel install --user --name python3 --display-name "Python 3"

      - name: Execute notebooks
        timeout-minutes: 40
        run: |
          cd docs
          make clean
          make compile

      - name: Push HTML to sgl-project.github.io
        timeout-minutes: 30
        env:
          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_DOCUMENTATION }}
        run: |
          cd docs
          make html
          make markdown
          python3 wrap_run_llm.py

          if [[ "${{ github.event_name }}" == "release" ]]; then
            python3 release_lookup/generate_index.py --output release_lookup/release_index.json

            # Copy release lookup tool for official docs on published releases.
            mkdir -p _build/html/release_lookup
            cp release_lookup/index.html _build/html/release_lookup/
            cp release_lookup/release_index.json _build/html/release_lookup/
          fi

          cd _build/html

          git clone https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git ../sgl-project.github.io --depth 1
          if [[ "${{ github.event_name }}" == "release" ]]; then
            find ../sgl-project.github.io/ -mindepth 1 -not -path "../sgl-project.github.io/.git*" -not -name CNAME -not -name ".jekyll" -not -name ".nojekyll" -delete
          else
            find ../sgl-project.github.io/ -mindepth 1 -not -path "../sgl-project.github.io/.git*" -not -path "../sgl-project.github.io/release_lookup*" -not -name CNAME -not -name ".jekyll" -not -name ".nojekyll" -delete
          fi
          cp -r * ../sgl-project.github.io
          cp ../../README.md ../sgl-project.github.io/README.md
          cd ../sgl-project.github.io
          git config user.name "sglang-bot"
          git config user.email "sglangbot@gmail.com"
          git add .
          git commit -m "Update $(date +'%Y-%m-%d %H:%M:%S')"
          git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main
          cd ..
          rm -rf sgl-project.github.io

release-pypi .github/workflows/release-pypi.yml

Triggers

push, workflow_dispatch

Runs on

ubuntu-latest

Jobs

publish

Commands

cd python cp ../README.md ../LICENSE . pip install build wheel setuptools setuptools-scm python3 -m build pip install twine python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}

View raw YAML

name: Release PyPI
on:
  push:
    tags:
      - 'v[0-9]+.*'
  workflow_dispatch:

jobs:
  publish:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    environment: "prod"
    steps:
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"

      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0  # Required for setuptools-scm to determine version from tags

      - name: Upload to pypi
        run: |
          cd python
          cp ../README.md ../LICENSE .
          pip install build wheel setuptools setuptools-scm
          python3 -m build
          pip install twine
          python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}

release-pypi-gateway matrix .github/workflows/release-pypi-gateway.yml

Triggers

push, workflow_dispatch

Runs on

${{ matrix.os }}-latest, ubuntu-latest, ubuntu-latest

Jobs

build, build-sdist, upload

Matrix

Actions

PyO3/maturin-action, PyO3/maturin-action

Commands

mv sglang-repo/sgl-model-gateway/* . rm -rf sglang-repo ls -alt
pip install -U twine
brew install protobuf
choco install protoc -y
${{ matrix.ls || 'ls -lh' }} bindings/python/dist/
twine check --strict bindings/python/dist/*
mv sglang-repo/sgl-model-gateway/* . rm -rf sglang-repo ls -alt
pip install twine twine upload dist/* --verbose

View raw YAML

name: Release SGLang Model Gateway to PyPI

on:
  push:
    branches:
      - main
    paths:
      - sgl-model-gateway/bindings/python/pyproject.toml
  workflow_dispatch:

jobs:
  build:
    name: build on ${{ matrix.platform || matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }})
    runs-on: ${{ matrix.os }}-latest
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu, macos, windows]
        target: [x86_64, aarch64]
        manylinux: [auto]
        include:
          - os: ubuntu
            platform: linux
          - os: windows
            ls: dir
            target: x86_64
            python-architecture: x64
            interpreter: 3.9 3.10 3.11 3.12 3.13
          - os: macos
            target: aarch64
            interpreter: 3.9 3.10 3.11 3.12 3.13
          - os: ubuntu
            platform: linux
            target: aarch64
          # musllinux
          - os: ubuntu
            platform: linux
            target: x86_64
            manylinux: musllinux_1_1
          - os: ubuntu
            platform: linux
            target: aarch64
            manylinux: musllinux_1_1
        exclude:
          - os: windows
            target: aarch64

    steps:
      - uses: actions/checkout@v4
        with:
          path: sglang-repo

      - name: Move sgl-model-gateway folder to root and delete sglang-repo
        run: |
          mv sglang-repo/sgl-model-gateway/* .
          rm -rf sglang-repo
          ls -alt
        shell: bash

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.13"
          architecture: ${{ matrix.python-architecture || 'x64' }}

      - name: Install twine
        run: pip install -U twine

      - name: Install protoc (macOS)
        if: matrix.os == 'macos'
        run: brew install protobuf

      - name: Install protoc (Windows)
        if: matrix.os == 'windows'
        run: choco install protoc -y

      - name: Build wheels
        uses: PyO3/maturin-action@v1
        with:
          working-directory: bindings/python
          target: ${{ matrix.target }}
          manylinux: ${{ matrix.manylinux || 'auto' }}
          args: --release --out dist --features vendored-openssl --interpreter ${{ matrix.interpreter || '3.9 3.10 3.11 3.12 3.13 3.14' }}
          rust-toolchain: stable
          docker-options: -e CI -e CC_aarch64_unknown_linux_gnu=aarch64-linux-gnu-gcc -e CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++
          before-script-linux: |
            # Install build dependencies (perl/make for vendored OpenSSL, protoc for gRPC)
            if command -v yum &> /dev/null; then
              yum update -y && yum install -y wget unzip gcc gcc-c++ perl-core make
              # Install cross-compilation toolchain for aarch64 if needed
              if [ "${{ matrix.target }}" = "aarch64" ]; then
                yum install -y gcc-aarch64-linux-gnu gcc-c++-aarch64-linux-gnu || true
              fi
            elif command -v apt-get &> /dev/null; then
              apt-get update && apt-get install -y wget unzip gcc g++ perl make
              # Install cross-compilation toolchain for aarch64 if needed
              if [ "${{ matrix.target }}" = "aarch64" ]; then
                apt-get install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu || true
              fi
            fi
            (cd /tmp && \
             wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip && \
             unzip protoc-32.0-linux-x86_64.zip -d /usr/local && \
             rm protoc-32.0-linux-x86_64.zip)
            protoc --version

      - name: List built packages
        run: ${{ matrix.ls || 'ls -lh' }} bindings/python/dist/

      - name: Check packages
        run: twine check --strict bindings/python/dist/*

      - uses: actions/upload-artifact@v4
        with:
          name: packages-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux || 'auto' }}
          path: bindings/python/dist/

  build-sdist:
    name: Build SDist
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          path: sglang-repo

      - name: Move sgl-model-gateway folder to root and delete sglang-repo
        run: |
          mv sglang-repo/sgl-model-gateway/* .
          rm -rf sglang-repo
          ls -alt

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.13"

      - name: Build SDist
        uses: PyO3/maturin-action@v1
        with:
          working-directory: bindings/python
          command: sdist
          args: --out dist
          rust-toolchain: stable

      - uses: actions/upload-artifact@v4
        with:
          name: sdist
          path: bindings/python/dist/*.tar.gz

  upload:
    name: Upload to PyPI
    if: github.repository == 'sgl-project/sglang'  # Ensure this job only runs for the sgl-project/sglang repository
    needs: [build, build-sdist]
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v4
        with:
          path: dist
          merge-multiple: true

      - name: Upload to PyPI
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN_ROUTER }}
        run: |
          pip install twine
          twine upload dist/* --verbose

release-pypi-nightly .github/workflows/release-pypi-nightly.yml

Triggers

schedule, repository_dispatch, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest

Jobs

build-nightly-wheel, release-nightly

Actions

softprops/action-gh-release

Commands

pip install build wheel setuptools setuptools-scm
cd python cp ../README.md ../LICENSE . # Parse git describe output to get latest tag # Use same command as pyproject.toml to ensure version consistency DESC=$(git tag --list --sort=-version:refname 'v*.*.*' | head -1 | xargs git describe --tags --long 2>/dev/null || echo 'v0.0.0-0-g0000000') TAG=$(echo "$DESC" | cut -d- -f1) HASH="g$(git rev-parse --short HEAD)" BUILD_DATE=$(date -u +%Y%m%d) # Increment patch version for nightlies (e.g., v0.5.8 -> 0.5.9) VERSION=${TAG#v} # Remove 'v' prefix MAJOR=$(echo "$VERSION" | cut -d. -f1) MINOR=$(echo "$VERSION" | cut -d. -f2) PATCH=$(echo "$VERSION" | cut -d. -f3) NEXT_PATCH=$((PATCH + 1)) NEXT_VERSION="${MAJOR}.${MINOR}.${NEXT_PATCH}" # Use date-based dev number for correct chronological sorting # e.g., 0.5.9.dev20260215+g4cf4f0859 > 0.5.9.dev20260214+g45a4697d4 FORCE_VERSION="${NEXT_VERSION}.dev${BUILD_DATE}+${HASH}" echo "Forcing nightly version to: $FORCE_VERSION" export SETUPTOOLS_SCM_PRETEND_VERSION="$FORCE_VERSION" # Build wheel python3 -m build --wheel # Extract version from built wheel filename WHEEL_FILE=$(ls dist/*.whl) NIGHTLY_VERSION=$(echo "$WHEEL_FILE" | sed 's/.*sglang-$.*$-py3.*/\1/') # Get commit info COMMIT_HASH=$(git rev-parse --short HEAD) BUILD_DATE=$(date -u +%Y-%m-%d) echo "Built wheel: $WHEEL_FILE" echo "Nightly version: ${NIGHTLY_VERSION}" echo "Commit: ${COMMIT_HASH}" echo "Build date: ${BUILD_DATE}" echo "nightly_version=${NIGHTLY_VERSION}" >> $GITHUB_OUTPUT echo "commit_hash=${COMMIT_HASH}" >> $GITHUB_OUTPUT echo "build_date=${BUILD_DATE}" >> $GITHUB_OUTPUT
echo "Downloaded wheel:" ls -lh dist/
git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl cd sgl-whl git config --local user.name "sglang-bot" git config --local user.email "sglangbot@gmail.com"
python3 scripts/update_nightly_whl_index.py \ --commit-hash ${{ needs.build-nightly-wheel.outputs.commit_hash }} \ --nightly-version ${{ needs.build-nightly-wheel.outputs.nightly_version }} \ --cuda-version ${{ inputs.cuda_version || '129' }} \ --build-date ${{ needs.build-nightly-wheel.outputs.build_date }}
cd sgl-whl git add -A git diff --staged --quiet || git commit -m "Update nightly wheel index for commit ${{ needs.build-nightly-wheel.outputs.commit_hash }}" git push

View raw YAML

name: Release PyPI Nightly Wheels

on:
  # Run daily at 2 AM UTC
  schedule:
    - cron: '0 2 * * *'
  # Triggered by nightly Docker workflow to use same commit
  repository_dispatch:
    types: [nightly-release]
  # Manual trigger for testing
  workflow_dispatch:
    inputs:
      commit_sha:
        description: 'Specific commit SHA to build (leave empty for latest)'
        required: false
        type: string
      cuda_version:
        description: 'CUDA version (e.g., 129 or 130)'
        required: false
        default: '129'
        type: string

concurrency:
  group: release-pypi-nightly-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build-nightly-wheel:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    outputs:
      nightly_version: ${{ steps.build.outputs.nightly_version }}
      commit_hash: ${{ steps.build.outputs.commit_hash }}
      build_date: ${{ steps.build.outputs.build_date }}
    steps:
      - uses: actions/checkout@v4
        with:
          # Use commit from: 1) Docker workflow, 2) manual input, 3) latest main
          ref: ${{ github.event.client_payload.commit_sha || inputs.commit_sha || github.sha }}
          fetch-depth: 0  # Need full history for setuptools-scm

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install build dependencies
        run: |
          pip install build wheel setuptools setuptools-scm

      - name: Build wheel
        id: build
        run: |
          cd python
          cp ../README.md ../LICENSE .

          # Parse git describe output to get latest tag
          # Use same command as pyproject.toml to ensure version consistency
          DESC=$(git tag --list --sort=-version:refname 'v*.*.*' | head -1 | xargs git describe --tags --long 2>/dev/null || echo 'v0.0.0-0-g0000000')
          TAG=$(echo "$DESC" | cut -d- -f1)
          HASH="g$(git rev-parse --short HEAD)"
          BUILD_DATE=$(date -u +%Y%m%d)

          # Increment patch version for nightlies (e.g., v0.5.8 -> 0.5.9)
          VERSION=${TAG#v}  # Remove 'v' prefix
          MAJOR=$(echo "$VERSION" | cut -d. -f1)
          MINOR=$(echo "$VERSION" | cut -d. -f2)
          PATCH=$(echo "$VERSION" | cut -d. -f3)
          NEXT_PATCH=$((PATCH + 1))
          NEXT_VERSION="${MAJOR}.${MINOR}.${NEXT_PATCH}"

          # Use date-based dev number for correct chronological sorting
          # e.g., 0.5.9.dev20260215+g4cf4f0859 > 0.5.9.dev20260214+g45a4697d4
          FORCE_VERSION="${NEXT_VERSION}.dev${BUILD_DATE}+${HASH}"
          echo "Forcing nightly version to: $FORCE_VERSION"
          export SETUPTOOLS_SCM_PRETEND_VERSION="$FORCE_VERSION"

          # Build wheel
          python3 -m build --wheel

          # Extract version from built wheel filename
          WHEEL_FILE=$(ls dist/*.whl)
          NIGHTLY_VERSION=$(echo "$WHEEL_FILE" | sed 's/.*sglang-\(.*\)-py3.*/\1/')

          # Get commit info
          COMMIT_HASH=$(git rev-parse --short HEAD)
          BUILD_DATE=$(date -u +%Y-%m-%d)

          echo "Built wheel: $WHEEL_FILE"
          echo "Nightly version: ${NIGHTLY_VERSION}"
          echo "Commit: ${COMMIT_HASH}"
          echo "Build date: ${BUILD_DATE}"

          echo "nightly_version=${NIGHTLY_VERSION}" >> $GITHUB_OUTPUT
          echo "commit_hash=${COMMIT_HASH}" >> $GITHUB_OUTPUT
          echo "build_date=${BUILD_DATE}" >> $GITHUB_OUTPUT

      - name: Upload wheel artifact
        uses: actions/upload-artifact@v4
        with:
          name: nightly-wheel
          path: python/dist/*.whl
          retention-days: 7

  release-nightly:
    needs: build-nightly-wheel
    runs-on: ubuntu-latest
    environment: 'prod'
    steps:
      - uses: actions/checkout@v4

      - name: Download wheel artifact
        uses: actions/download-artifact@v4
        with:
          name: nightly-wheel
          path: dist/

      - name: List downloaded wheels
        run: |
          echo "Downloaded wheel:"
          ls -lh dist/

      - name: Create GitHub Release for nightly wheel
        uses: softprops/action-gh-release@v2
        with:
          tag_name: nightly-${{ needs.build-nightly-wheel.outputs.build_date }}-${{ needs.build-nightly-wheel.outputs.commit_hash }}
          name: Nightly Build ${{ needs.build-nightly-wheel.outputs.build_date }} (${{ needs.build-nightly-wheel.outputs.commit_hash }})
          repository: sgl-project/whl
          token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
          prerelease: true
          body: |
            Nightly build from commit ${{ github.sha }}
            Build date: ${{ needs.build-nightly-wheel.outputs.build_date }}
            Version: ${{ needs.build-nightly-wheel.outputs.nightly_version }}
          files: |
            dist/*.whl

      - name: Clone wheel index repository
        run: |
          git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
          cd sgl-whl
          git config --local user.name "sglang-bot"
          git config --local user.email "sglangbot@gmail.com"
        env:
          WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Update wheel index
        run: |
          python3 scripts/update_nightly_whl_index.py \
            --commit-hash ${{ needs.build-nightly-wheel.outputs.commit_hash }} \
            --nightly-version ${{ needs.build-nightly-wheel.outputs.nightly_version }} \
            --cuda-version ${{ inputs.cuda_version || '129' }} \
            --build-date ${{ needs.build-nightly-wheel.outputs.build_date }}

      - name: Push wheel index
        run: |
          cd sgl-whl
          git add -A
          git diff --staged --quiet || git commit -m "Update nightly wheel index for commit ${{ needs.build-nightly-wheel.outputs.commit_hash }}"
          git push

release-pypi-pr .github/workflows/release-pypi-pr.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest

Jobs

build-pr-wheel, release-pr-wheel

Actions

softprops/action-gh-release

Commands

# Get base version from the latest v*.*.* git tag directly # Note: We cannot use setuptools_scm here because the [tool.setuptools_scm] # config (with custom git_describe_command) lives in python/pyproject.toml, # not at the repo root. Without that config, setuptools_scm falls back to # default git describe which finds gateway-* tags instead of v*.*.* release tags. LATEST_TAG=$(git tag --list --sort=-version:refname 'v*.*.*' | head -1) BASE_VERSION=${LATEST_TAG#v} echo "Latest release tag: ${LATEST_TAG}" # Get commit info COMMIT_HASH=$(git rev-parse --short HEAD) COMMIT_COUNT=$(git rev-list --count HEAD) # Get current date in YYYY-MM-DD format BUILD_DATE=$(date -u +%Y-%m-%d) # Always use pr-{number} format for suffix SUFFIX="pr-${{ inputs.pr_number }}" # Generate PR wheel version following PEP 440 # Format: {base_version}.dev{commit_count}+pr-{number}.g{commit_hash} WHEEL_VERSION="${BASE_VERSION}.dev${COMMIT_COUNT}+${SUFFIX}.g${COMMIT_HASH}" echo "Base version: ${BASE_VERSION}" echo "PR wheel version: ${WHEEL_VERSION}" echo "Commit: ${COMMIT_HASH}" echo "Build date: ${BUILD_DATE}" echo "wheel_version=${WHEEL_VERSION}" >> $GITHUB_OUTPUT echo "commit_hash=${COMMIT_HASH}" >> $GITHUB_OUTPUT echo "base_version=${BASE_VERSION}" >> $GITHUB_OUTPUT echo "build_date=${BUILD_DATE}" >> $GITHUB_OUTPUT
cd python WHEEL_VERSION="${{ steps.gen_version.outputs.wheel_version }}" # Update pyproject.toml to use static version instead of dynamic # Remove 'version' from dynamic list and add static version sed -i 's/dynamic = \["version"\]/dynamic = []/' pyproject.toml sed -i "/^name = \"sglang\"/a version = \"${WHEEL_VERSION}\"" pyproject.toml # Verify update echo "Updated version in pyproject.toml:" grep "^version" pyproject.toml grep "^dynamic" pyproject.toml
cd python pip install build wheel setuptools
cd python cp ../README.md ../LICENSE . python3 -m build --wheel # List built wheels echo "Built wheel:" ls -lh dist/
echo "Downloaded wheel:" ls -lh dist/
git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl cd sgl-whl git config --local user.name "sglang-bot" git config --local user.email "sglangbot@gmail.com"
python3 scripts/update_pr_whl_index.py \ --pr-number ${{ inputs.pr_number }} \ --commit-hash ${{ needs.build-pr-wheel.outputs.commit_hash }} \ --wheel-version ${{ needs.build-pr-wheel.outputs.wheel_version }} \ --build-date ${{ needs.build-pr-wheel.outputs.build_date }}
cd sgl-whl git add -A git diff --staged --quiet || git commit -m "Update PR wheel index for PR #${{ inputs.pr_number }} (commit ${{ needs.build-pr-wheel.outputs.commit_hash }})" git push

View raw YAML

name: Release PyPI PR Wheels

on:
  workflow_dispatch:
    inputs:
      pr_number:
        description: 'PR number to build wheel for (works with both internal and fork PRs)'
        required: true
        type: string

concurrency:
  group: build-pr-wheel-${{ github.event.inputs.pr_number }}
  cancel-in-progress: true

jobs:
  build-pr-wheel:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    outputs:
      wheel_version: ${{ steps.gen_version.outputs.wheel_version }}
      commit_hash: ${{ steps.gen_version.outputs.commit_hash }}
      build_date: ${{ steps.gen_version.outputs.build_date }}
    steps:
      - uses: actions/checkout@v4
        with:
          ref: refs/pull/${{ inputs.pr_number }}/head
          fetch-depth: 0  # Need full history for version generation

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Generate PR wheel version
        id: gen_version
        run: |
          # Get base version from the latest v*.*.* git tag directly
          # Note: We cannot use setuptools_scm here because the [tool.setuptools_scm]
          # config (with custom git_describe_command) lives in python/pyproject.toml,
          # not at the repo root. Without that config, setuptools_scm falls back to
          # default git describe which finds gateway-* tags instead of v*.*.* release tags.
          LATEST_TAG=$(git tag --list --sort=-version:refname 'v*.*.*' | head -1)
          BASE_VERSION=${LATEST_TAG#v}
          echo "Latest release tag: ${LATEST_TAG}"

          # Get commit info
          COMMIT_HASH=$(git rev-parse --short HEAD)
          COMMIT_COUNT=$(git rev-list --count HEAD)

          # Get current date in YYYY-MM-DD format
          BUILD_DATE=$(date -u +%Y-%m-%d)

          # Always use pr-{number} format for suffix
          SUFFIX="pr-${{ inputs.pr_number }}"

          # Generate PR wheel version following PEP 440
          # Format: {base_version}.dev{commit_count}+pr-{number}.g{commit_hash}
          WHEEL_VERSION="${BASE_VERSION}.dev${COMMIT_COUNT}+${SUFFIX}.g${COMMIT_HASH}"

          echo "Base version: ${BASE_VERSION}"
          echo "PR wheel version: ${WHEEL_VERSION}"
          echo "Commit: ${COMMIT_HASH}"
          echo "Build date: ${BUILD_DATE}"

          echo "wheel_version=${WHEEL_VERSION}" >> $GITHUB_OUTPUT
          echo "commit_hash=${COMMIT_HASH}" >> $GITHUB_OUTPUT
          echo "base_version=${BASE_VERSION}" >> $GITHUB_OUTPUT
          echo "build_date=${BUILD_DATE}" >> $GITHUB_OUTPUT

      - name: Update pyproject.toml with PR wheel version
        run: |
          cd python
          WHEEL_VERSION="${{ steps.gen_version.outputs.wheel_version }}"

          # Update pyproject.toml to use static version instead of dynamic
          # Remove 'version' from dynamic list and add static version
          sed -i 's/dynamic = \["version"\]/dynamic = []/' pyproject.toml
          sed -i "/^name = \"sglang\"/a version = \"${WHEEL_VERSION}\"" pyproject.toml

          # Verify update
          echo "Updated version in pyproject.toml:"
          grep "^version" pyproject.toml
          grep "^dynamic" pyproject.toml

      - name: Install build dependencies
        run: |
          cd python
          pip install build wheel setuptools

      - name: Build wheel
        run: |
          cd python
          cp ../README.md ../LICENSE .
          python3 -m build --wheel

          # List built wheels
          echo "Built wheel:"
          ls -lh dist/

      - name: Upload wheel artifact
        uses: actions/upload-artifact@v4
        with:
          name: pr-wheel-${{ inputs.pr_number }}
          path: python/dist/*.whl
          retention-days: 30

  release-pr-wheel:
    needs: build-pr-wheel
    runs-on: ubuntu-latest
    environment: 'prod'
    steps:
      - uses: actions/checkout@v4

      - name: Download wheel artifact
        uses: actions/download-artifact@v4
        with:
          name: pr-wheel-${{ inputs.pr_number }}
          path: dist/

      - name: List downloaded wheels
        run: |
          echo "Downloaded wheel:"
          ls -lh dist/

      - name: Create GitHub Release for PR wheel
        uses: softprops/action-gh-release@v2
        with:
          tag_name: pr-${{ inputs.pr_number }}-${{ needs.build-pr-wheel.outputs.build_date }}-${{ needs.build-pr-wheel.outputs.commit_hash }}
          name: "PR #${{ inputs.pr_number }} Build (${{ needs.build-pr-wheel.outputs.commit_hash }})"
          repository: sgl-project/whl
          token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
          prerelease: true
          body: |
            PR wheel build from PR #${{ inputs.pr_number }}
            Commit: ${{ github.sha }}
            Build date: ${{ needs.build-pr-wheel.outputs.build_date }}
            Version: ${{ needs.build-pr-wheel.outputs.wheel_version }}

            **Installation via index (pip):**
            ```bash
            pip install sglang==${{ needs.build-pr-wheel.outputs.wheel_version }} --index-url https://sgl-project.github.io/whl/pr/
            ```

            **Installation via index (uv):**
            ```bash
            uv pip install sglang==${{ needs.build-pr-wheel.outputs.wheel_version }} --index-url https://sgl-project.github.io/whl/pr/ --extra-index-url https://pypi.org/simple --index-strategy unsafe-best-match
            ```

            **Direct installation:**
            ```bash
            pip install https://github.com/sgl-project/whl/releases/download/pr-${{ inputs.pr_number }}-${{ needs.build-pr-wheel.outputs.build_date }}-${{ needs.build-pr-wheel.outputs.commit_hash }}/sglang-${{ needs.build-pr-wheel.outputs.wheel_version }}-py3-none-any.whl
            ```
          files: |
            dist/*.whl

      - name: Clone wheel index repository
        run: |
          git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
          cd sgl-whl
          git config --local user.name "sglang-bot"
          git config --local user.email "sglangbot@gmail.com"
        env:
          WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Update wheel index
        run: |
          python3 scripts/update_pr_whl_index.py \
            --pr-number ${{ inputs.pr_number }} \
            --commit-hash ${{ needs.build-pr-wheel.outputs.commit_hash }} \
            --wheel-version ${{ needs.build-pr-wheel.outputs.wheel_version }} \
            --build-date ${{ needs.build-pr-wheel.outputs.build_date }}

      - name: Push wheel index
        run: |
          cd sgl-whl
          git add -A
          git diff --staged --quiet || git commit -m "Update PR wheel index for PR #${{ inputs.pr_number }} (commit ${{ needs.build-pr-wheel.outputs.commit_hash }})"
          git push

release-tag perms .github/workflows/release-tag.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

create-tag

Commands

VERSION="${{ github.event.inputs.version }}" if [ -z "$VERSION" ]; then echo "::error::Version is required" exit 1 fi if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then echo "::error::Invalid version format: $VERSION (expected: X.Y.Z or X.Y.Z.postN)" exit 1 fi echo "Version validated: v$VERSION"
TAG="v${{ github.event.inputs.version }}" if git rev-parse "$TAG" >/dev/null 2>&1; then echo "::error::Tag $TAG already exists" exit 1 fi echo "Tag $TAG does not exist, proceeding..."
TAG="v${{ github.event.inputs.version }}" REF="${{ github.event.inputs.ref }}" git config user.name "sglang-bot" git config user.email "sglang-bot@users.noreply.github.com" echo "Creating tag $TAG on ref $REF (commit: $(git rev-parse HEAD))" git tag -a "$TAG" -m "Release $TAG" git push origin "$TAG" echo "::notice::Successfully created and pushed tag $TAG" echo "This will trigger the release workflows (PyPI, Docker)"

View raw YAML

name: Release Tag
# Creates a git tag to trigger release workflows (PyPI, Docker)
# Use this after testing on a release branch is complete
on:
  workflow_dispatch:
    inputs:
      version:
        description: 'Version to tag (without v prefix, e.g., 0.5.7)'
        required: true
        type: string
      ref:
        description: 'Branch or commit to tag (e.g., release/v0.5.7, main, or commit SHA)'
        required: false
        default: 'main'
        type: string

permissions:
  contents: write

jobs:
  create-tag:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-latest
    environment: 'prod'
    steps:
      - name: Validate version format
        run: |
          VERSION="${{ github.event.inputs.version }}"
          if [ -z "$VERSION" ]; then
            echo "::error::Version is required"
            exit 1
          fi
          if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
            echo "::error::Invalid version format: $VERSION (expected: X.Y.Z or X.Y.Z.postN)"
            exit 1
          fi
          echo "Version validated: v$VERSION"

      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.ref }}
          fetch-depth: 0
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Check if tag already exists
        run: |
          TAG="v${{ github.event.inputs.version }}"
          if git rev-parse "$TAG" >/dev/null 2>&1; then
            echo "::error::Tag $TAG already exists"
            exit 1
          fi
          echo "Tag $TAG does not exist, proceeding..."

      - name: Create and push tag
        run: |
          TAG="v${{ github.event.inputs.version }}"
          REF="${{ github.event.inputs.ref }}"

          git config user.name "sglang-bot"
          git config user.email "sglang-bot@users.noreply.github.com"

          echo "Creating tag $TAG on ref $REF (commit: $(git rev-parse HEAD))"
          git tag -a "$TAG" -m "Release $TAG"
          git push origin "$TAG"

          echo "::notice::Successfully created and pushed tag $TAG"
          echo "This will trigger the release workflows (PyPI, Docker)"

release-whl-kernel matrix .github/workflows/release-whl-kernel.yml

Triggers

push, workflow_dispatch

Runs on

${{ matrix.runner }}, ubuntu-latest, ${{ matrix.runner }}, ubuntu-latest, amd-docker-scale, ubuntu-latest, ubuntu-latest, kernel-build-node-musa, ubuntu-latest

Jobs

build-cu129-matrix, release-cu129, build-cu130-matrix, release-cu130, build-rocm-matrix, release-rocm700, release-rocm720, build-musa43, release-musa43

Matrix

arch, cuda-version, include, include.arch, include.runner, musa-version, python-version, rocm-version→ 12.9, 13.0, 3.10, 43, 700, 720, aarch64, arm-kernel-build-node, x64-kernel-build-node, x86_64

Actions

softprops/action-gh-release, softprops/action-gh-release, softprops/action-gh-release, softprops/action-gh-release, softprops/action-gh-release

Commands

cd sgl-kernel chmod +x ./build.sh ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" ${{ matrix.arch == 'aarch64' && 'aarch64' || '' }}
pip install twine python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN_SGLANG_KERNEL }}
if [ -z "${{ inputs.tag_name }}" ]; then TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)" echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT else echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT fi
git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
python3 scripts/update_kernel_whl_index.py --cuda 129
cd sgl-whl git config --local user.name "sglang-bot" git config --local user.email "sglangbot@gmail.com" git add -A git commit -m "update whl index" git push
cd sgl-kernel chmod +x ./build.sh ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" ${{ matrix.arch == 'aarch64' && 'aarch64' || '' }}
if [ -z "${{ inputs.tag_name }}" ]; then TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)" echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT else echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT fi

View raw YAML

name: Release SGLang Kernels

on:
  push:
    branches:
      - main
    paths:
      - sgl-kernel/python/sgl_kernel/version.py
  workflow_dispatch:
    inputs:
      target:
        type: choice
        description: 'Build target'
        required: false
        default: 'all'
        options:
          - 'all'
          - 'cu129'
          - 'cu130'
          - 'rocm700'
          - 'rocm720'
          - 'musa43'
      tag_name:
        type: string
        required: false
      pr_number:
        description: "PR number to build from (e.g. 12345)"
        type: string
        required: false

concurrency:
  group: release-sglang-kernels-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build-cu129-matrix:
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event.inputs.target == 'all' || github.event.inputs.target == 'cu129')
    strategy:
      matrix:
        python-version: ["3.10"]
        cuda-version: ["12.9"]
        arch: [x86_64, aarch64]
        include:
          - arch: x86_64
            runner: x64-kernel-build-node
          - arch: aarch64
            runner: arm-kernel-build-node
    runs-on: ${{ matrix.runner }}
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheels
        run: |
          cd sgl-kernel
          chmod +x ./build.sh
          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" ${{ matrix.arch == 'aarch64' && 'aarch64' || '' }}
        env:
          BUILD_JOBS: 64
          NVCC_THREADS: 8

      - name: Upload to PyPI
        working-directory: sgl-kernel
        run: |
          pip install twine
          python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN_SGLANG_KERNEL }}

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}${{ matrix.arch == 'aarch64' && '-aarch64' || '' }}
          path: sgl-kernel/dist/*

  release-cu129:
    needs: build-cu129-matrix
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-*

      - name: Set tag name
        id: set_tag_name
        run: |
          if [ -z "${{ inputs.tag_name }}" ]; then
            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
          else
            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
          fi

      - name: Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
          repository: sgl-project/whl
          token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
          files: |
            sgl-kernel/dist/*

      - name: Clone wheel index
        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
        env:
          WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}

      - name: Update wheel index
        run: python3 scripts/update_kernel_whl_index.py --cuda 129

      - name: Push wheel index
        run: |
          cd sgl-whl
          git config --local user.name "sglang-bot"
          git config --local user.email "sglangbot@gmail.com"
          git add -A
          git commit -m "update whl index"
          git push

  # for now we do not release CUDA 13.0 wheels to pypi
  build-cu130-matrix:
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event.inputs.target == 'all' || github.event.inputs.target == 'cu130')
    strategy:
      matrix:
        python-version: ["3.10"]
        cuda-version: ["13.0"]
        arch: [x86_64, aarch64]
        include:
          - arch: x86_64
            runner: x64-kernel-build-node
          - arch: aarch64
            runner: arm-kernel-build-node
    runs-on: ${{ matrix.runner }}
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheels
        run: |
          cd sgl-kernel
          chmod +x ./build.sh
          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" ${{ matrix.arch == 'aarch64' && 'aarch64' || '' }}
        env:
          BUILD_JOBS: 64
          NVCC_THREADS: 8

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}${{ matrix.arch == 'aarch64' && '-aarch64' || '' }}
          path: sgl-kernel/dist/*

  release-cu130:
    needs: build-cu130-matrix
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-*

      - name: Set tag name
        id: set_tag_name
        run: |
          if [ -z "${{ inputs.tag_name }}" ]; then
            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
          else
            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
          fi

      - name: Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
          repository: sgl-project/whl
          token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
          files: |
            sgl-kernel/dist/*

      - name: Clone wheel index
        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
        env:
          WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}

      - name: Update wheel index
        run: python3 scripts/update_kernel_whl_index.py --cuda 130

      - name: Push wheel index
        run: |
          cd sgl-whl
          git config --local user.name "sglang-bot"
          git config --local user.email "sglangbot@gmail.com"
          git add -A
          git commit -m "update whl index"
          git push

  build-rocm-matrix:
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event.inputs.target == 'all' || github.event.inputs.target == 'rocm700' || github.event.inputs.target == 'rocm720')
    runs-on: amd-docker-scale
    strategy:
      matrix:
        python-version: ["3.10"]
        rocm-version: ["700", "720"]
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheels
        run: |
          cp 3rdparty/amd/wheel/sgl-kernel/* sgl-kernel/
          cd sgl-kernel
          chmod +x ./build_rocm.sh
          ./build_rocm.sh "${{ matrix.rocm-version }}"

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-rocm${{ matrix.rocm-version }}
          path: sgl-kernel/dist/*

  release-rocm700:
    needs: build-rocm-matrix
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-*-rocm700

      - name: Set tag name
        id: set_tag_name
        run: |
          if [ -z "${{ inputs.tag_name }}" ]; then
            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
          else
            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
          fi

      - name: Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
          repository: sgl-project/whl
          token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
          files: |
            sgl-kernel/dist/*

      - name: Clone wheel index
        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
        env:
          WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}

      - name: Update wheel index
        run: python3 scripts/update_kernel_whl_index.py --rocm 700

      - name: Push wheel index
        run: |
          cd sgl-whl
          git config --local user.name "sglang-bot"
          git config --local user.email "sglangbot@gmail.com"
          git add -A
          git commit -m "update whl index"
          git push

  release-rocm720:
    needs: build-rocm-matrix
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-*-rocm720

      - name: Set tag name
        id: set_tag_name
        run: |
          if [ -z "${{ inputs.tag_name }}" ]; then
            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
          else
            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
          fi

      - name: Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
          repository: sgl-project/whl
          token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
          files: |
            sgl-kernel/dist/*

      - name: Clone wheel index
        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
        env:
          WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}

      - name: Update wheel index
        run: python3 scripts/update_kernel_whl_index.py --rocm 720

      - name: Push wheel index
        run: |
          cd sgl-whl
          git config --local user.name "sglang-bot"
          git config --local user.email "sglangbot@gmail.com"
          git add -A
          git commit -m "update whl index"
          git push

  build-musa43:
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event.inputs.target == 'all' || github.event.inputs.target == 'musa43')
    runs-on: kernel-build-node-musa
    strategy:
      matrix:
        python-version: ["3.10"]
        musa-version: ["43"]
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: "recursive"

      - name: Build wheels
        run: |
          cd sgl-kernel
          mv pyproject_musa.toml pyproject.toml
          python setup_musa.py sdist bdist_wheel

      - name: Rename MUSA wheels
        run: |
          bash scripts/ci/musa/rename_wheels_musa.sh ${{ matrix.musa-version }} sgl-kernel/dist

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-musa${{ matrix.musa-version }}
          path: sgl-kernel/dist/*

  release-musa43:
    needs: build-musa43
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-*

      - name: Set tag name
        id: set_tag_name
        run: |
          if [ -z "${{ inputs.tag_name }}" ]; then
            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
          else
            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
          fi

      - name: Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
          repository: sgl-project/whl
          token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
          files: |
            sgl-kernel/dist/*

      - name: Clone wheel index
        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
        env:
          WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}

      - name: Update wheel index
        run: python3 scripts/update_kernel_whl_index.py --musa 43

      - name: Push wheel index
        run: |
          cd sgl-whl
          git config --local user.name "sglang-bot"
          git config --local user.email "sglangbot@gmail.com"
          git add -A
          git commit -m "update whl index"
          git push

rerun-ut perms .github/workflows/rerun-ut.yml

Triggers

workflow_dispatch

Runs on

${{ inputs.runner_label }}

Jobs

rerun-ut-cuda

Commands

if [[ "${{ inputs.runner_label }}" == "1-gpu-5090" ]]; then source /etc/profile.d/sglang-ci.sh fi if [[ "${{ inputs.use_deepep }}" == "true" ]]; then bash scripts/ci/cuda/ci_install_deepep.sh else bash scripts/ci/cuda/ci_install_dependency.sh fi
if [[ "${{ inputs.runner_label }}" == "1-gpu-5090" ]]; then source /etc/profile.d/sglang-ci.sh fi cd test/ python3 ${{ inputs.test_command }}

View raw YAML

name: Rerun UT
run-name: ${{ inputs.pr_head_sha && format('[rerun-ut] {0} {1}', inputs.test_command, inputs.pr_head_sha) || format('[rerun-ut] {0}', inputs.test_command) }}

on:
  workflow_dispatch:
    inputs:
      test_command:
        description: "Test command to run (e.g. 'registered/core/test_srt_endpoint.py TestSRTEndpoint.test_simple_decode')"
        required: true
        type: string
      runner_label:
        description: "Runner label"
        required: true
        type: choice
        options:
          - 1-gpu-h100
          - 1-gpu-5090
          - 2-gpu-h100
          - 4-gpu-h100
          - 4-gpu-a10
          - 4-gpu-b200
          - 8-gpu-h200
          - 8-gpu-h20
          - 8-gpu-b200
      pr_head_sha:
        description: "PR head SHA to checkout (for /rerun-ut on fork PRs)"
        required: false
        type: string
        default: ""
      use_deepep:
        description: "Use ci_install_deepep.sh instead of ci_install_dependency.sh"
        required: false
        type: string
        default: "false"

env:
  SGLANG_IS_IN_CI: true
  SGLANG_CUDA_COREDUMP: "1"
  SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true

permissions:
  actions: write
  contents: read
  issues: read

jobs:
  rerun-ut-cuda:
    runs-on: ${{ inputs.runner_label }}
    timeout-minutes: 120
    env:
      RUNNER_LABELS: ${{ inputs.runner_label }}
      SGLANG_CI_RDMA_ALL_DEVICES: ${{ inputs.runner_label == '8-gpu-h20' && 'mlx5_1,mlx5_2,mlx5_3,mlx5_4' || '' }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          ref: ${{ inputs.pr_head_sha || github.sha }}

      - uses: ./.github/actions/check-maintenance

      - name: Install dependencies
        timeout-minutes: 20
        run: |
          if [[ "${{ inputs.runner_label }}" == "1-gpu-5090" ]]; then
            source /etc/profile.d/sglang-ci.sh
          fi
          if [[ "${{ inputs.use_deepep }}" == "true" ]]; then
            bash scripts/ci/cuda/ci_install_deepep.sh
          else
            bash scripts/ci/cuda/ci_install_dependency.sh
          fi

      - name: Run test
        timeout-minutes: 60
        run: |
          if [[ "${{ inputs.runner_label }}" == "1-gpu-5090" ]]; then
            source /etc/profile.d/sglang-ci.sh
          fi
          cd test/
          python3 ${{ inputs.test_command }}

      - uses: ./.github/actions/upload-cuda-coredumps
        if: always()

retag-docker .github/workflows/retag-docker.yml

Triggers

workflow_dispatch

Runs on

ubuntu-22.04

Jobs

retag

Actions

docker/login-action

Commands

echo "Retagging lmsysorg/sglang:${{ inputs.source_tag }} -> lmsysorg/sglang:${{ inputs.target_tag }}" docker buildx imagetools create \ -t lmsysorg/sglang:${{ inputs.target_tag }} \ lmsysorg/sglang:${{ inputs.source_tag }}

View raw YAML

name: Retag Docker Image

on:
  workflow_dispatch:
    inputs:
      source_tag:
        description: "Existing image tag (e.g., v0.4.7-cu129-amd64)"
        required: true
      target_tag:
        description: "New tag to apply (e.g., latest)"
        required: true

jobs:
  retag:
    if: github.repository == 'sgl-project/sglang'
    runs-on: ubuntu-22.04
    environment: "prod"
    steps:
      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Retag image
        run: |
          echo "Retagging lmsysorg/sglang:${{ inputs.source_tag }} -> lmsysorg/sglang:${{ inputs.target_tag }}"
          docker buildx imagetools create \
            -t lmsysorg/sglang:${{ inputs.target_tag }} \
            lmsysorg/sglang:${{ inputs.source_tag }}

runner-utilization .github/workflows/runner-utilization.yml

Triggers

schedule, pull_request, workflow_dispatch

Runs on

ubuntu-latest

Jobs

report

Commands

python scripts/ci/utils/runner_utilization_report.py \ --repo ${{ github.repository }} \ --hours ${{ inputs.hours || '24' }} \ ${{ inputs.filter && format('--filter {0}', inputs.filter) || '' }}

View raw YAML

name: Runner Utilization Report

on:
  schedule:
    - cron: '0 8 * * *'  # Daily at 8 AM UTC
  pull_request:
    paths:
      - '.github/workflows/runner-utilization.yml'
      - 'scripts/ci/utils/runner_utilization_report.py'
  workflow_dispatch:
    inputs:
      hours:
        description: 'Time window in hours'
        required: false
        default: '24'
        type: string
      filter:
        description: 'Filter runner labels (e.g., 5090, h200)'
        required: false
        type: string

jobs:
  report:
    name: Generate Report
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Generate Utilization Report
        timeout-minutes: 30
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          python scripts/ci/utils/runner_utilization_report.py \
            --repo ${{ github.repository }} \
            --hours ${{ inputs.hours || '24' }} \
            ${{ inputs.filter && format('--filter {0}', inputs.filter) || '' }}

slash-command-handler perms .github/workflows/slash-command-handler.yml

Triggers

issue_comment

Runs on

ubuntu-latest

Jobs

slash_command

Commands

PR_DATA=$(gh pr view ${{ github.event.issue.number }} --repo ${{ github.repository }} --json headRefName,headRepositoryOwner) || { echo "::error::Failed to fetch PR data" exit 1 } # Use 'empty' filter to handle null/missing values (e.g., deleted forks) HEAD_OWNER=$(echo "$PR_DATA" | jq -r '.headRepositoryOwner.login // empty') REPO_OWNER="${{ github.repository_owner }}" # Treat missing/null owner as fork for security (fail-safe) if [[ -z "$HEAD_OWNER" || "$HEAD_OWNER" != "$REPO_OWNER" ]]; then IS_FORK="true" else IS_FORK="false" fi echo "is_fork=$IS_FORK" >> $GITHUB_OUTPUT echo "ref=$(echo "$PR_DATA" | jq -r '.headRefName')" >> $GITHUB_OUTPUT echo "PR owner: $HEAD_OWNER, Repo owner: $REPO_OWNER, Is fork: $IS_FORK"
pip install PyGithub
python scripts/ci/utils/slash_command_handler.py

View raw YAML

name: Slash Command Handler

on:
  issue_comment:
    types: [created, edited]

permissions:
  contents: read
  pull-requests: write # Required to add labels and reactions
  actions: write       # Required to rerun workflows
  issues: write        # Required for comment reactions in some contexts

jobs:
  slash_command:
    # Only run if it is a PR and the comment contains a recognized command
    # Use contains() since startsWith() can't handle leading whitespace/newlines
    if: >
      github.event.issue.pull_request &&
      (contains(github.event.comment.body, '/tag-run-ci-label') ||
       contains(github.event.comment.body, '/rerun-failed-ci') ||
       contains(github.event.comment.body, '/tag-and-rerun-ci') ||
       contains(github.event.comment.body, '/rerun-stage') ||
       contains(github.event.comment.body, '/rerun-ut'))
    runs-on: ubuntu-latest

    steps:
      # SECURITY: This workflow runs on issue_comment trigger with elevated permissions
      # (pull-requests: write, actions: write). For non-fork PRs, we can safely checkout
      # the PR branch to allow testing changes to this handler. For fork PRs, we MUST
      # stay on main to prevent untrusted code execution with these elevated permissions.
      - name: Get PR details
        id: pr
        shell: bash
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          PR_DATA=$(gh pr view ${{ github.event.issue.number }} --repo ${{ github.repository }} --json headRefName,headRepositoryOwner) || {
            echo "::error::Failed to fetch PR data"
            exit 1
          }
          # Use 'empty' filter to handle null/missing values (e.g., deleted forks)
          HEAD_OWNER=$(echo "$PR_DATA" | jq -r '.headRepositoryOwner.login // empty')
          REPO_OWNER="${{ github.repository_owner }}"
          # Treat missing/null owner as fork for security (fail-safe)
          if [[ -z "$HEAD_OWNER" || "$HEAD_OWNER" != "$REPO_OWNER" ]]; then
            IS_FORK="true"
          else
            IS_FORK="false"
          fi
          echo "is_fork=$IS_FORK" >> $GITHUB_OUTPUT
          echo "ref=$(echo "$PR_DATA" | jq -r '.headRefName')" >> $GITHUB_OUTPUT
          echo "PR owner: $HEAD_OWNER, Repo owner: $REPO_OWNER, Is fork: $IS_FORK"

      - name: Checkout code
        uses: actions/checkout@v4
        with:
          # For non-fork PRs, checkout PR branch to allow testing handler changes
          # For fork PRs, stay on main for security (don't run untrusted code with elevated permissions)
          ref: ${{ steps.pr.outputs.is_fork == 'false' && steps.pr.outputs.ref || '' }}

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: |
          pip install PyGithub

      - name: Handle Slash Command
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          REPO_FULL_NAME: ${{ github.repository }}
          PR_NUMBER: ${{ github.event.issue.number }}
          COMMENT_ID: ${{ github.event.comment.id }}
          COMMENT_BODY: ${{ github.event.comment.body }}
          USER_LOGIN: ${{ github.event.comment.user.login }}
        run: |
          python scripts/ci/utils/slash_command_handler.py

stress-test .github/workflows/stress-test.yml

Triggers

workflow_dispatch

Runs on

8-gpu-h200

Jobs

stress-test

Commands

bash scripts/ci/cuda/ci_install_dependency.sh
cd test python3 run_suite.py --hw cuda --suite stress

View raw YAML

name: Stress Test

on:
  workflow_dispatch:
    inputs:
      num_prompts:
        description: 'Number of prompts per model'
        required: true
        default: '50000'
        type: string
      duration_minutes:
        description: 'Timeout per model in minutes'
        required: true
        default: '45'
        type: string

jobs:
  stress-test:
    if: github.repository == 'sgl-project/sglang'
    runs-on: 8-gpu-h200
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run stress tests
        timeout-minutes: 210
        env:
          NUM_PROMPTS: ${{ inputs.num_prompts }}
          DURATION_MINUTES: ${{ inputs.duration_minutes }}
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite stress

      - name: Upload results
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: stress-test-results
          path: |
            stress_test_*.jsonl

weekly-test-nvidia .github/workflows/weekly-test-nvidia.yml

Triggers

schedule, workflow_dispatch

Runs on

8-gpu-h200

Jobs

weekly-test-8-gpu-h200

Commands

bash scripts/ci/cuda/ci_install_dependency.sh
cd test python3 run_suite.py --hw cuda --suite weekly-8-gpu-h200 --nightly --continue-on-error --timeout-per-file 7200

View raw YAML

name: Weekly Test (Nvidia)

on:
  schedule:
    - cron: '0 0 * * 0'  # Run every Sunday at midnight UTC
  workflow_dispatch:
    inputs:
      job_filter:
        description: 'Select which job to run (leave empty or "all" to run all jobs)'
        required: false
        type: choice
        default: 'all'
        options:
          - 'all'
          - 'weekly-test-8-gpu-h200'

concurrency:
  group: weekly-test-nvidia-${{ github.ref }}
  cancel-in-progress: true

env:
  SGLANG_IS_IN_CI: true
  HF_HUB_DOWNLOAD_TIMEOUT: 300
  HF_HUB_ETAG_TIMEOUT: 300

jobs:
  # Weekly tests - 8 GPU H200
  weekly-test-8-gpu-h200:
    if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'weekly-test-8-gpu-h200')
    runs-on: 8-gpu-h200
    timeout-minutes: 120
    env:
      RUNNER_LABELS: 8-gpu-h200
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Install dependencies
        run: |
          bash scripts/ci/cuda/ci_install_dependency.sh

      - name: Run weekly 8-GPU H200 tests
        timeout-minutes: 120
        env:
          GPU_CONFIG: "8-gpu-h200"
          IS_H200: "1"
        run: |
          cd test
          python3 run_suite.py --hw cuda --suite weekly-8-gpu-h200 --nightly --continue-on-error --timeout-per-file 7200