sgl-project/sglang
65 workflows · maturity 67% · 14 patterns · GitHub ↗
Practices
✓ Matrix✓ Permissions○ Security scan○ AI review✓ Cache✓ Concurrency✓ Reusable workflows
Detected patterns
Security dimensions
Workflows (65)
amd-aiter-scout .github/workflows/amd-aiter-scout.yml
View raw YAML
name: AMD AITER Scout
on:
schedule:
- cron: '0 20 * * 1' # Monday 20:00 UTC
- cron: '0 20 * * 4' # Thursday 20:00 UTC
workflow_dispatch:
inputs:
aiter_ref:
description: 'AITER git ref (branch, tag, or SHA). Default: main (latest commit)'
required: false
type: string
default: 'main'
job_filter:
description: 'Comma-separated workflows to run: nightly-amd, nightly-amd-rocm720, pr-test-amd, pr-test-amd-rocm720. Default: all'
required: false
type: string
default: 'all'
continue_on_error:
description: 'Continue running other workflows even if one fails'
required: false
type: boolean
default: true
concurrency:
group: amd-aiter-scout-${{ github.run_id }}
cancel-in-progress: true
jobs:
resolve-aiter:
runs-on: ubuntu-latest
outputs:
aiter_sha: ${{ steps.resolve.outputs.sha }}
run_nightly_amd: ${{ steps.parse.outputs.run_nightly_amd }}
run_nightly_amd_rocm720: ${{ steps.parse.outputs.run_nightly_amd_rocm720 }}
run_pr_test_amd: ${{ steps.parse.outputs.run_pr_test_amd }}
run_pr_test_amd_rocm720: ${{ steps.parse.outputs.run_pr_test_amd_rocm720 }}
steps:
- name: Resolve AITER commit
id: resolve
run: |
REF="${{ inputs.aiter_ref || 'main' }}"
echo "Resolving AITER ref: ${REF}"
SHA=$(git ls-remote https://github.com/ROCm/aiter.git "refs/heads/${REF}" | head -1 | cut -f1)
if [ -z "$SHA" ]; then
SHA=$(git ls-remote https://github.com/ROCm/aiter.git "refs/tags/${REF}" | head -1 | cut -f1)
fi
if [ -z "$SHA" ]; then
SHA=$(git ls-remote https://github.com/ROCm/aiter.git "${REF}" | head -1 | cut -f1)
fi
if [ -z "$SHA" ]; then
SHA="${REF}"
fi
echo "sha=${SHA}" >> $GITHUB_OUTPUT
echo "### AITER Ref Resolution" >> $GITHUB_STEP_SUMMARY
echo "- **Requested ref:** \`${REF}\`" >> $GITHUB_STEP_SUMMARY
echo "- **Resolved SHA:** \`${SHA}\`" >> $GITHUB_STEP_SUMMARY
echo "- **AITER commit:** https://github.com/ROCm/aiter/commit/${SHA}" >> $GITHUB_STEP_SUMMARY
- name: Parse job filter
id: parse
run: |
FILTER="${{ inputs.job_filter || 'all' }}"
echo "Job filter: ${FILTER}"
if [[ "$FILTER" == "all" ]]; then
echo "run_nightly_amd=true" >> $GITHUB_OUTPUT
echo "run_nightly_amd_rocm720=true" >> $GITHUB_OUTPUT
echo "run_pr_test_amd=true" >> $GITHUB_OUTPUT
echo "run_pr_test_amd_rocm720=true" >> $GITHUB_OUTPUT
else
# Wrap with commas for exact substring matching (avoids "nightly-amd" matching "nightly-amd-rocm720")
PADDED=",${FILTER// /},"
echo "run_nightly_amd=$(echo "$PADDED" | grep -q ',nightly-amd,' && echo true || echo false)" >> $GITHUB_OUTPUT
echo "run_nightly_amd_rocm720=$(echo "$PADDED" | grep -q ',nightly-amd-rocm720,' && echo true || echo false)" >> $GITHUB_OUTPUT
echo "run_pr_test_amd=$(echo "$PADDED" | grep -q ',pr-test-amd,' && echo true || echo false)" >> $GITHUB_OUTPUT
echo "run_pr_test_amd_rocm720=$(echo "$PADDED" | grep -q ',pr-test-amd-rocm720,' && echo true || echo false)" >> $GITHUB_OUTPUT
fi
echo "### Job Filter" >> $GITHUB_STEP_SUMMARY
echo "- **Filter:** \`${FILTER}\`" >> $GITHUB_STEP_SUMMARY
call-nightly-amd:
if: needs.resolve-aiter.outputs.run_nightly_amd == 'true'
needs: resolve-aiter
uses: ./.github/workflows/nightly-test-amd.yml
secrets: inherit
with:
ref: ${{ github.sha }}
aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
job_filter: 'all'
continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}
call-nightly-amd-rocm720:
if: needs.resolve-aiter.outputs.run_nightly_amd_rocm720 == 'true'
needs: resolve-aiter
uses: ./.github/workflows/nightly-test-amd-rocm720.yml
secrets: inherit
with:
ref: ${{ github.sha }}
aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
job_filter: 'all'
continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}
call-pr-test-amd:
if: needs.resolve-aiter.outputs.run_pr_test_amd == 'true'
needs: resolve-aiter
uses: ./.github/workflows/pr-test-amd.yml
secrets: inherit
with:
run_all_tests: true
aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}
call-pr-test-amd-rocm720:
if: needs.resolve-aiter.outputs.run_pr_test_amd_rocm720 == 'true'
needs: resolve-aiter
uses: ./.github/workflows/pr-test-amd-rocm720.yml
secrets: inherit
with:
run_all_tests: true
aiter_ref: ${{ needs.resolve-aiter.outputs.aiter_sha }}
continue_on_error: ${{ inputs.continue_on_error == '' && true || inputs.continue_on_error }}
check-all-jobs:
if: always()
needs:
- resolve-aiter
- call-nightly-amd
- call-nightly-amd-rocm720
- call-pr-test-amd
- call-pr-test-amd-rocm720
runs-on: ubuntu-latest
steps:
- name: Summary
run: |
echo "## AMD AITER Scout Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "- **AITER SHA:** \`${{ needs.resolve-aiter.outputs.aiter_sha }}\`" >> $GITHUB_STEP_SUMMARY
echo "- **AITER commit:** https://github.com/ROCm/aiter/commit/${{ needs.resolve-aiter.outputs.aiter_sha }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Workflow | Result |" >> $GITHUB_STEP_SUMMARY
echo "|----------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| Nightly AMD (AITER Latest) | \`${{ needs.call-nightly-amd.result }}\` |" >> $GITHUB_STEP_SUMMARY
echo "| Nightly AMD ROCm 7.2 | \`${{ needs.call-nightly-amd-rocm720.result }}\` |" >> $GITHUB_STEP_SUMMARY
echo "| PR Test AMD (AITER Latest) | \`${{ needs.call-pr-test-amd.result }}\` |" >> $GITHUB_STEP_SUMMARY
echo "| PR Test AMD ROCm 7.2 | \`${{ needs.call-pr-test-amd-rocm720.result }}\` |" >> $GITHUB_STEP_SUMMARY
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more workflows failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more workflows were cancelled"
exit 1
fi
echo "All workflows passed"
amd-ci-job-monitor matrix .github/workflows/amd-ci-job-monitor.yml
View raw YAML
name: AMD CI Job Monitor
on:
schedule:
- cron: '0 0 * * *' # Daily at midnight UTC
pull_request:
paths:
- '.github/workflows/amd-ci-job-monitor.yml'
- 'scripts/ci/utils/query_job_status.py'
workflow_dispatch:
inputs:
hours:
description: 'Time window in hours'
required: false
default: '24'
type: string
job_filter:
description: 'Job name filter (leave empty for all AMD jobs)'
required: false
type: string
jobs:
fetch-actions-data:
name: Fetch Actions Snapshot
runs-on: ubuntu-latest
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install tabulate
- name: Select workflows for snapshot
id: select-workflows
run: |
if [[ -n "${{ inputs.job_filter }}" ]]; then
echo "workflows=pr-test-amd.yml" >> "$GITHUB_OUTPUT"
else
echo "workflows=pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" >> "$GITHUB_OUTPUT"
fi
- name: Fetch Actions data snapshot
timeout-minutes: 30
run: |
python scripts/ci/utils/query_job_status.py \
--repo ${{ github.repository }} \
--workflow "${{ steps.select-workflows.outputs.workflows }}" \
--hours ${{ inputs.hours || '24' }} \
--dump-data-file actions-job-snapshot.json
- name: Upload Actions data snapshot
uses: actions/upload-artifact@v4
with:
name: actions-job-snapshot
path: actions-job-snapshot.json
if-no-files-found: error
# Single job filter mode
custom-report:
name: Custom Job Report
if: ${{ inputs.job_filter }}
needs: fetch-actions-data
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install tabulate
- name: Download Actions data snapshot
uses: actions/download-artifact@v4
with:
name: actions-job-snapshot
path: ci-data
- name: Generate Custom Job Report
timeout-minutes: 30
run: |
python scripts/ci/utils/query_job_status.py \
--repo ${{ github.repository }} \
--job "${{ inputs.job_filter }}" \
--workflow "pr-test-amd.yml" \
--hours ${{ inputs.hours || '24' }} \
--input-data-file ci-data/actions-job-snapshot.json \
--summary
# Parse workflow files to get job names dynamically
parse-workflows:
name: Parse Workflow Jobs
if: ${{ !inputs.job_filter }}
runs-on: ubuntu-latest
outputs:
pr_jobs: ${{ steps.parse.outputs.pr_jobs }}
nightly_jobs: ${{ steps.parse.outputs.nightly_jobs }}
pr_rocm720_jobs: ${{ steps.parse.outputs.pr_rocm720_jobs }}
nightly_rocm720_jobs: ${{ steps.parse.outputs.nightly_rocm720_jobs }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Parse workflow files
id: parse
run: |
# Parse pr-test-amd.yml and extract job names (exclude utility jobs)
# Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs
pr_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd.yml | \
grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \
jq -R -s -c 'split("\n") | map(select(length > 0))')
echo "pr_jobs=$pr_jobs" >> $GITHUB_OUTPUT
echo "PR jobs: $pr_jobs"
# Parse nightly-test-amd.yml and extract job names (exclude utility jobs)
# Excluded: check-all-jobs
nightly_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd.yml | \
grep -v -E '^(check-all-jobs)$' | \
jq -R -s -c 'split("\n") | map(select(length > 0))')
echo "nightly_jobs=$nightly_jobs" >> $GITHUB_OUTPUT
echo "Nightly jobs: $nightly_jobs"
# Parse pr-test-amd-rocm720.yml (exclude utility jobs)
# Excluded: call-gate, check-changes, pr-test-amd-finish, cancel, check-all-jobs
pr_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/pr-test-amd-rocm720.yml | \
grep -v -E '^(call-gate|check-changes|pr-test-amd-finish|cancel|check-all-jobs)$' | \
jq -R -s -c 'split("\n") | map(select(length > 0))')
echo "pr_rocm720_jobs=$pr_rocm720_jobs" >> $GITHUB_OUTPUT
echo "PR ROCm 7.2 jobs: $pr_rocm720_jobs"
# Parse nightly-test-amd-rocm720.yml (exclude utility jobs)
# Excluded: check-all-jobs
nightly_rocm720_jobs=$(yq -r '.jobs | keys | .[]' .github/workflows/nightly-test-amd-rocm720.yml | \
grep -v -E '^(check-all-jobs)$' | \
jq -R -s -c 'split("\n") | map(select(length > 0))')
echo "nightly_rocm720_jobs=$nightly_rocm720_jobs" >> $GITHUB_OUTPUT
echo "Nightly ROCm 7.2 jobs: $nightly_rocm720_jobs"
# PR CI reports using dynamic matrix
pr-ci-reports:
name: PR - ${{ matrix.job_name }}
needs: [parse-workflows, fetch-actions-data]
if: ${{ !inputs.job_filter }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_jobs) }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install tabulate
- name: Download Actions data snapshot
uses: actions/download-artifact@v4
with:
name: actions-job-snapshot
path: ci-data
- name: Generate Report
timeout-minutes: 15
run: |
python scripts/ci/utils/query_job_status.py \
--repo ${{ github.repository }} \
--job "${{ matrix.job_name }}" \
--workflow "pr-test-amd.yml" \
--hours ${{ inputs.hours || '24' }} \
--input-data-file ci-data/actions-job-snapshot.json \
--summary
# Nightly AMD test reports using dynamic matrix
nightly-reports:
name: Nightly - ${{ matrix.job_name }}
needs: [parse-workflows, fetch-actions-data]
if: ${{ !inputs.job_filter }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_jobs) }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install tabulate
- name: Download Actions data snapshot
uses: actions/download-artifact@v4
with:
name: actions-job-snapshot
path: ci-data
- name: Generate Nightly Report
timeout-minutes: 15
run: |
python scripts/ci/utils/query_job_status.py \
--repo ${{ github.repository }} \
--job "${{ matrix.job_name }}" \
--workflow "nightly-test-amd.yml" \
--hours ${{ inputs.hours || '24' }} \
--input-data-file ci-data/actions-job-snapshot.json \
--summary
# PR ROCm 7.2 CI reports using dynamic matrix
pr-rocm720-ci-reports:
name: PR ROCm720 - ${{ matrix.job_name }}
needs: [parse-workflows, fetch-actions-data]
if: ${{ !inputs.job_filter }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
job_name: ${{ fromJson(needs.parse-workflows.outputs.pr_rocm720_jobs) }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install tabulate
- name: Download Actions data snapshot
uses: actions/download-artifact@v4
with:
name: actions-job-snapshot
path: ci-data
- name: Generate PR ROCm 7.2 Report
timeout-minutes: 15
run: |
python scripts/ci/utils/query_job_status.py \
--repo ${{ github.repository }} \
--job "${{ matrix.job_name }}" \
--workflow "pr-test-amd-rocm720.yml" \
--hours ${{ inputs.hours || '24' }} \
--input-data-file ci-data/actions-job-snapshot.json \
--summary
# Nightly ROCm 7.2 reports using dynamic matrix
nightly-rocm720-reports:
name: Nightly ROCm720 - ${{ matrix.job_name }}
needs: [parse-workflows, fetch-actions-data]
if: ${{ !inputs.job_filter }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
job_name: ${{ fromJson(needs.parse-workflows.outputs.nightly_rocm720_jobs) }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install tabulate
- name: Download Actions data snapshot
uses: actions/download-artifact@v4
with:
name: actions-job-snapshot
path: ci-data
- name: Generate Nightly ROCm 7.2 Report
timeout-minutes: 15
run: |
python scripts/ci/utils/query_job_status.py \
--repo ${{ github.repository }} \
--job "${{ matrix.job_name }}" \
--workflow "nightly-test-amd-rocm720.yml" \
--hours ${{ inputs.hours || '24' }} \
--input-data-file ci-data/actions-job-snapshot.json \
--summary
# Runner fleet report - cross-workflow runner analytics in a single pass
runner-fleet-report:
name: Runner Fleet Report
if: ${{ !inputs.job_filter }}
needs: fetch-actions-data
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: pip install tabulate
- name: Download Actions data snapshot
uses: actions/download-artifact@v4
with:
name: actions-job-snapshot
path: ci-data
- name: Generate Runner Fleet Report
timeout-minutes: 30
run: |
python scripts/ci/utils/query_job_status.py \
--repo ${{ github.repository }} \
--runner-report \
--workflow "pr-test-amd.yml,nightly-test-amd.yml,pr-test-amd-rocm720.yml,nightly-test-amd-rocm720.yml" \
--hours ${{ inputs.hours || '24' }} \
--input-data-file ci-data/actions-job-snapshot.json \
--summary
auto-tune .github/workflows/auto-tune.yml
View raw YAML
name: Auto tune
on:
workflow_dispatch:
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
bot-bump-flashinfer-version perms .github/workflows/bot-bump-flashinfer-version.yml
View raw YAML
name: Bot Bump Flashinfer Version
on:
workflow_dispatch:
inputs:
new_version:
description: 'New flashinfer version (e.g., 0.6.4)'
required: true
type: string
permissions:
contents: write
pull-requests: write
jobs:
bump-flashinfer-version:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install Python dependencies
run: |
pip install tomli
- name: Configure Git and branch
run: |
git config user.name "sglang-bot"
git config user.email "sglang-bot@users.noreply.github.com"
RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
BRANCH_NAME="bot/bump-flashinfer-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}"
git checkout -b "$BRANCH_NAME"
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
- name: Run flashinfer version bump script
run: |
python scripts/release/bump_flashinfer_version.py "${{ github.event.inputs.new_version }}"
- name: Commit and create PR
env:
GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
run: |
bash scripts/release/commit_and_pr.sh "flashinfer" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"
bot-bump-kernel-version perms .github/workflows/bot-bump-kernel-version.yml
View raw YAML
name: Bot Bump Kernel Version
on:
workflow_dispatch:
inputs:
new_version:
description: 'New sgl-kernel version (e.g., 0.3.12)'
required: true
type: string
permissions:
contents: write
pull-requests: write
jobs:
bump-kernel-version:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install Python dependencies
run: |
pip install tomli
- name: Configure Git and branch
run: |
git config user.name "sglang-bot"
git config user.email "sglang-bot@users.noreply.github.com"
RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
BRANCH_NAME="bot/bump-kernel-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}"
git checkout -b "$BRANCH_NAME"
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
- name: Run kernel version bump script
run: |
python scripts/release/bump_kernel_version.py "${{ github.event.inputs.new_version }}"
- name: Commit and create PR
env:
GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
run: |
bash scripts/release/commit_and_pr.sh "sgl-kernel" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"
bot-bump-kernel-version-to-sglang perms .github/workflows/bot-bump-kernel-version-to-sglang.yml
View raw YAML
name: Bot Bump Kernel Version to SGLang
on:
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
bump-kernel-version-to-sglang:
runs-on: ubuntu-latest
outputs:
branch_name: ${{ steps.set_output.outputs.branch_name }}
needs_sync: ${{ steps.check_sync.outputs.needs_sync }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install Python dependencies
run: |
pip install tomli
- name: Check if sync is needed
id: check_sync
run: |
python scripts/release/check_kernel_version_to_sglang.py
- name: Configure Git and branch
if: steps.check_sync.outputs.needs_sync == 'true'
id: set_output
run: |
git config user.name "sglang-bot"
git config user.email "sglang-bot@users.noreply.github.com"
RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
KERNEL_VERSION="${{ steps.check_sync.outputs.kernel_version }}"
BRANCH_NAME="bot/bump-kernel-version-to-sglang-${KERNEL_VERSION}-${RANDOM_SUFFIX}"
git checkout -b "$BRANCH_NAME"
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV
echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
- name: Run kernel version bump script
if: steps.check_sync.outputs.needs_sync == 'true'
run: |
python scripts/release/bump_kernel_version_to_sglang.py
- name: Commit and create PR
if: steps.check_sync.outputs.needs_sync == 'true'
env:
GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
run: |
bash scripts/release/commit_and_pr_kernel_to_sglang.sh "$KERNEL_VERSION" "$BRANCH_NAME"
run-nightly-tests-nvidia:
needs: bump-kernel-version-to-sglang
if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
uses: ./.github/workflows/nightly-test-nvidia.yml
with:
ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
secrets: inherit
run-nightly-tests-amd:
needs: bump-kernel-version-to-sglang
if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
uses: ./.github/workflows/nightly-test-amd.yml
with:
ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
secrets: inherit
run-nightly-tests-npu:
needs: bump-kernel-version-to-sglang
if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
uses: ./.github/workflows/nightly-test-npu.yml
with:
ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
secrets: inherit
run-pr-tests-xeon:
needs: bump-kernel-version-to-sglang
if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
uses: ./.github/workflows/pr-test-xeon.yml
with:
ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
secrets: inherit
run-pr-tests-xpu:
needs: bump-kernel-version-to-sglang
if: needs.bump-kernel-version-to-sglang.outputs.needs_sync == 'true'
uses: ./.github/workflows/pr-test-xpu.yml
with:
ref: ${{ needs.bump-kernel-version-to-sglang.outputs.branch_name }}
secrets: inherit
bot-bump-sglang-version perms .github/workflows/bot-bump-sglang-version.yml
View raw YAML
name: Bot Bump SGLang Version
on:
workflow_dispatch:
inputs:
new_version:
description: 'New SGLang version (e.g., 0.5.3 or 0.5.3rc0)'
required: true
type: string
permissions:
contents: write
pull-requests: write
jobs:
bump-sglang-version:
runs-on: ubuntu-latest
outputs:
branch_name: ${{ steps.set_output.outputs.branch_name }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install Python dependencies
run: |
pip install tomli
- name: Configure Git and branch
id: set_output
run: |
git config user.name "sglang-bot"
git config user.email "sglang-bot@users.noreply.github.com"
RANDOM_SUFFIX=$(echo $RANDOM | md5sum | head -c 4)
BRANCH_NAME="bot/bump-sglang-version-${{ github.event.inputs.new_version }}-${RANDOM_SUFFIX}"
git checkout -b "$BRANCH_NAME"
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
- name: Run SGLang version bump script
run: |
python scripts/release/bump_sglang_version.py "${{ github.event.inputs.new_version }}"
- name: Commit and create PR
env:
GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
run: |
bash scripts/release/commit_and_pr.sh "SGLang" "${{ github.event.inputs.new_version }}" "$BRANCH_NAME"
run-nightly-tests-nvidia:
needs: bump-sglang-version
uses: ./.github/workflows/nightly-test-nvidia.yml
with:
ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
secrets: inherit
run-nightly-tests-amd:
needs: bump-sglang-version
uses: ./.github/workflows/nightly-test-amd.yml
with:
ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
secrets: inherit
run-nightly-tests-npu:
needs: bump-sglang-version
uses: ./.github/workflows/nightly-test-npu.yml
with:
ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
secrets: inherit
run-pr-tests-xeon:
needs: bump-sglang-version
uses: ./.github/workflows/pr-test-xeon.yml
with:
ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
secrets: inherit
run-pr-tests-xpu:
needs: bump-sglang-version
uses: ./.github/workflows/pr-test-xpu.yml
with:
ref: ${{ needs.bump-sglang-version.outputs.branch_name }}
secrets: inherit
bot-cherry-pick perms .github/workflows/bot-cherry-pick.yml
View raw YAML
name: Bot Cherry Pick to Release Branch
on:
workflow_dispatch:
inputs:
commit_sha:
description: 'Commit SHA to cherry-pick (full or short hash)'
required: true
type: string
target_branch:
description: 'Target release branch (e.g., release/v0.5.7)'
required: true
type: string
create_pr:
description: 'Create a PR instead of pushing directly'
required: false
type: boolean
default: true
permissions:
contents: write
pull-requests: write
concurrency:
group: cherry-pick-${{ github.event.inputs.target_branch }}
cancel-in-progress: false
jobs:
cherry-pick:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
environment: 'prod'
steps:
- name: Validate inputs
env:
TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
run: |
if [[ ! "$TARGET_BRANCH" =~ ^release/v[0-9]+\.[0-9]+(\.[0-9]+)?$ ]]; then
echo "::error::Target branch must match pattern 'release/vX.Y' or 'release/vX.Y.Z' (e.g., release/v0.5.7)"
exit 1
fi
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
- name: Configure Git
run: |
git config user.name "sglang-bot"
git config user.email "sglang-bot@users.noreply.github.com"
- name: Validate target branch exists
env:
TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
run: |
git fetch origin
if ! git ls-remote --exit-code --heads origin "$TARGET_BRANCH" > /dev/null 2>&1; then
echo "::error::Target branch '$TARGET_BRANCH' does not exist on remote"
exit 1
fi
- name: Get commit info
id: commit_info
env:
COMMIT_SHA_INPUT: ${{ github.event.inputs.commit_sha }}
run: |
# Verify commit exists
if ! git cat-file -t "$COMMIT_SHA_INPUT" > /dev/null 2>&1; then
echo "::error::Commit SHA '$COMMIT_SHA_INPUT' does not exist"
exit 1
fi
# Get full SHA if short hash provided
FULL_SHA=$(git rev-parse "$COMMIT_SHA_INPUT")
COMMIT_TITLE=$(git log -1 --format="%s" "$FULL_SHA")
SHORT_SHA=$(git rev-parse --short "$FULL_SHA")
echo "full_sha=$FULL_SHA" >> $GITHUB_OUTPUT
echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT
# Use delimiter for multiline-safe output
{
echo "commit_title<<EOF"
echo "$COMMIT_TITLE"
echo "EOF"
} >> $GITHUB_OUTPUT
echo "Cherry-picking commit: $SHORT_SHA - $COMMIT_TITLE"
- name: Cherry-pick commit
id: cherry_pick
env:
TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
FULL_SHA: ${{ steps.commit_info.outputs.full_sha }}
SHORT_SHA: ${{ steps.commit_info.outputs.short_sha }}
CREATE_PR: ${{ github.event.inputs.create_pr }}
run: |
if [[ "$CREATE_PR" == "true" ]]; then
# Create a new branch for the PR
RANDOM_SUFFIX=$(head -c 4 /dev/urandom | xxd -p)
NEW_BRANCH="cherry-pick/${SHORT_SHA}-to-${TARGET_BRANCH#release/}-${RANDOM_SUFFIX}"
git checkout -b "$NEW_BRANCH" "origin/$TARGET_BRANCH"
echo "new_branch=$NEW_BRANCH" >> $GITHUB_OUTPUT
else
# Checkout target branch directly
git checkout "$TARGET_BRANCH"
fi
# Attempt cherry-pick
if git cherry-pick "$FULL_SHA"; then
echo "cherry_pick_success=true" >> $GITHUB_OUTPUT
else
echo "::error::Cherry-pick failed due to conflicts. Please resolve manually."
git cherry-pick --abort || true
echo "cherry_pick_success=false" >> $GITHUB_OUTPUT
exit 1
fi
- name: Push changes
if: steps.cherry_pick.outputs.cherry_pick_success == 'true'
env:
CREATE_PR: ${{ github.event.inputs.create_pr }}
TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
NEW_BRANCH: ${{ steps.cherry_pick.outputs.new_branch }}
run: |
if [[ "$CREATE_PR" == "true" ]]; then
git push origin "$NEW_BRANCH"
else
git push origin "$TARGET_BRANCH"
fi
- name: Create Pull Request
if: steps.cherry_pick.outputs.cherry_pick_success == 'true' && github.event.inputs.create_pr == 'true'
env:
GH_TOKEN: ${{ secrets.GH_PAT_FOR_PULL_REQUEST }}
TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
SHORT_SHA: ${{ steps.commit_info.outputs.short_sha }}
COMMIT_TITLE: ${{ steps.commit_info.outputs.commit_title }}
FULL_SHA: ${{ steps.commit_info.outputs.full_sha }}
NEW_BRANCH: ${{ steps.cherry_pick.outputs.new_branch }}
run: |
PR_TITLE="[Cherry-pick] ${COMMIT_TITLE} to ${TARGET_BRANCH}"
gh pr create \
--title "$PR_TITLE" \
--base "$TARGET_BRANCH" \
--head "$NEW_BRANCH" \
--label "cherry-pick" \
--body-file - <<EOF
Cherry-pick of commit ${FULL_SHA} to \`${TARGET_BRANCH}\`
**Original commit:** ${FULL_SHA}
**Original title:** ${COMMIT_TITLE}
---
*This PR was automatically created by the cherry-pick workflow.*
EOF
- name: Summary
if: always()
env:
FULL_SHA: ${{ steps.commit_info.outputs.full_sha }}
COMMIT_TITLE: ${{ steps.commit_info.outputs.commit_title }}
TARGET_BRANCH: ${{ github.event.inputs.target_branch }}
CHERRY_PICK_SUCCESS: ${{ steps.cherry_pick.outputs.cherry_pick_success }}
CREATE_PR: ${{ github.event.inputs.create_pr }}
NEW_BRANCH: ${{ steps.cherry_pick.outputs.new_branch }}
ACTOR: ${{ github.actor }}
run: |
echo "## Cherry-Pick Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "- **Triggered by:** @${ACTOR}" >> $GITHUB_STEP_SUMMARY
echo "- **Commit:** ${FULL_SHA}" >> $GITHUB_STEP_SUMMARY
echo "- **Title:** ${COMMIT_TITLE}" >> $GITHUB_STEP_SUMMARY
echo "- **Target Branch:** ${TARGET_BRANCH}" >> $GITHUB_STEP_SUMMARY
if [[ "$CHERRY_PICK_SUCCESS" == "true" ]]; then
echo "- **Status:** ✅ Success" >> $GITHUB_STEP_SUMMARY
else
echo "- **Status:** ❌ Failed" >> $GITHUB_STEP_SUMMARY
fi
if [[ "$CREATE_PR" == "true" && "$CHERRY_PICK_SUCCESS" == "true" ]]; then
echo "- **PR Branch:** ${NEW_BRANCH}" >> $GITHUB_STEP_SUMMARY
fi
cancel-pr-workflow-on-merge perms .github/workflows/cancel-pr-workflow-on-merge.yml
View raw YAML
name: Cancel PR Workflows on Merge
on:
pull_request_target:
types:
- closed
permissions:
actions: write
jobs:
cancel:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.12.1
with:
workflow_id: all
access_token: ${{ secrets.GITHUB_TOKEN }}
ignore_sha: true
pr_number: ${{ github.event.pull_request.number }}
cancel-unfinished-pr-tests perms .github/workflows/cancel-unfinished-pr-tests.yml
View raw YAML
name: Cancel Unfinished PR Runs
on:
workflow_dispatch:
inputs:
workflows:
description: 'Space-separated list of workflow filenames to cancel'
required: true
type: string
default: 'pr-test.yml'
include_high_priority:
description: 'Also cancel runs from high-priority PRs'
required: false
type: boolean
default: false
permissions:
actions: write # Needed to cancel runs
contents: read # Needed to read repo info
pull-requests: read # needed for gh pr view (labels)
jobs:
cancel-unfinished-pr-runs:
runs-on: ubuntu-latest
steps:
- name: Install GitHub CLI
run: sudo apt-get install -y gh jq
- name: Cancel unfinished PR-associated runs (skip high-priority PRs)
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
WORKFLOWS: ${{ github.event.inputs.workflows || 'pr-test.yml' }}
INCLUDE_HIGH_PRIORITY: ${{ github.event.inputs.include_high_priority || 'false' }}
shell: bash
run: |
set -euo pipefail
# Read the space-separated string from the input into a bash array
read -r -a WORKFLOW_FILES <<< "${WORKFLOWS}"
echo "Targeting ${#WORKFLOW_FILES[@]} workflow(s): ${WORKFLOWS}"
echo ""
for workflow_file in "${WORKFLOW_FILES[@]}"; do
echo "========================================="
echo "Workflow: $workflow_file"
echo "========================================="
# Get all unfinished runs
all_runs=$(gh run list \
--repo "$REPO" \
--workflow "$workflow_file" \
--json databaseId,status,event,url,createdAt \
--limit 1000 \
| jq -c '.[] | select(.status=="queued" or .status=="waiting" or .status=="in_progress")')
if [ -z "$all_runs" ]; then
echo "✅ No unfinished runs found"
echo ""
continue
fi
# Count runs by event type
total_runs=$(echo "$all_runs" | wc -l)
pr_runs=$(echo "$all_runs" | jq -s '[.[] | select(.event=="pull_request")] | length')
other_runs=$(echo "$all_runs" | jq -s '[.[] | select(.event!="pull_request")] | length')
echo "📊 Summary: $total_runs unfinished runs ($pr_runs PR-related, $other_runs other)"
echo ""
# Process non-PR runs first
if [ "$other_runs" -gt 0 ]; then
echo "--- Non-PR Runs ---"
echo "$all_runs" | jq -c 'select(.event!="pull_request")' | while read -r run; do
run_url=$(echo "$run" | jq -r '.url')
run_event=$(echo "$run" | jq -r '.event')
run_status=$(echo "$run" | jq -r '.status')
echo " • $run_event ($run_status): $run_url"
done
echo ""
fi
# Process PR runs
if [ "$pr_runs" -gt 0 ]; then
echo "--- PR Runs (checking for cancellation) ---"
echo "$all_runs" | jq -c 'select(.event=="pull_request")' | while read -r run; do
run_id=$(echo "$run" | jq -r '.databaseId')
run_url=$(echo "$run" | jq -r '.url')
run_status=$(echo "$run" | jq -r '.status')
echo ""
echo "Run ($run_status): $run_url"
# Fetch full run details to get head repository and branch info
run_details=$(gh api -H "Accept: application/vnd.github+json" \
"repos/$REPO/actions/runs/$run_id" 2>/dev/null || true)
if [ -z "$run_details" ]; then
echo " ⚠️ Could not fetch run details, skipping"
continue
fi
# Get head owner and branch (works for both fork and non-fork PRs)
head_owner=$(echo "$run_details" | jq -r '.head_repository.owner.login // empty')
head_branch=$(echo "$run_details" | jq -r '.head_branch // empty')
if [ -z "$head_owner" ] || [ -z "$head_branch" ]; then
echo " ⚠️ Missing head info, skipping"
continue
fi
echo " Branch: ${head_owner}:${head_branch}"
# Find PR by searching with head=owner:branch
pr_number=$(gh api -H "Accept: application/vnd.github+json" \
"repos/$REPO/pulls?state=open&head=${head_owner}:${head_branch}" \
--jq '.[0].number // empty' 2>/dev/null || true)
if [ -z "$pr_number" ]; then
echo " ⚠️ No open PR found, skipping"
continue
fi
pr_url="https://github.com/$REPO/pull/$pr_number"
echo " PR: $pr_url"
# Check for high priority label
labels=$(gh pr view "$pr_number" --repo "$REPO" --json labels \
| jq -r '.labels[].name' 2>/dev/null || true)
if echo "$labels" | grep -Fxq "bypass-maintenance"; then
echo " 🛑 Skipping (bypass-maintenance label, never cancelled)"
continue
fi
if echo "$labels" | grep -Fxq "high priority"; then
if [ "$INCLUDE_HIGH_PRIORITY" != "true" ]; then
echo " 🛑 Skipping (high priority label)"
continue
fi
echo " ⚠️ High priority PR, but include_high_priority is enabled"
fi
echo " 🚫 Cancelling..."
gh run cancel "$run_id" --repo "$REPO" || echo " ⚠️ Cancellation failed"
done
fi
echo ""
done
echo "========================================="
echo "✅ Processing complete"
echo "========================================="
ci-coverage-overview .github/workflows/ci-coverage-overview.yml
View raw YAML
name: CI Coverage Overview
on:
schedule:
- cron: '0 6 * * *' # Daily at 6 AM UTC
pull_request:
paths:
- '.github/workflows/ci-coverage-overview.yml'
- 'scripts/ci/utils/ci_coverage_report.py'
- 'test/registered/**'
workflow_dispatch:
inputs:
output_format:
description: 'Output format'
required: false
default: 'markdown'
type: choice
options:
- markdown
- json
jobs:
summary:
name: Summary
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Generate Summary Report
run: |
python scripts/ci/utils/ci_coverage_report.py --section summary
by-folder:
name: Tests by Folder
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Generate Tests by Folder Report
run: |
python scripts/ci/utils/ci_coverage_report.py --section by-folder
by-suite:
name: Tests by Suite
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Generate Tests by Suite Report
run: |
python scripts/ci/utils/ci_coverage_report.py --section by-suite
unit-test-coverage:
name: Unit Test Code Coverage
if: github.event_name != 'pull_request'
runs-on: 1-gpu-h100
timeout-minutes: 30
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
timeout-minutes: 10
run: |
pip install -e "python/[test]"
- name: Run unit tests with coverage
timeout-minutes: 10
run: |
pytest test/registered/unit/ \
--cov --cov-config=.coveragerc \
--cov-report=term-missing:skip-covered \
--continue-on-collection-errors \
-v | tee coverage_output.txt
- name: Write coverage to summary
if: always()
run: |
echo "## Unit Test Code Coverage" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Commit:** \`${GITHUB_SHA::8}\` | **Branch:** \`${GITHUB_REF_NAME}\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Test result line (e.g., "== 42 passed, 1 failed in 23.5s ==")
echo '```' >> $GITHUB_STEP_SUMMARY
grep -E '^=+.*passed' coverage_output.txt >> $GITHUB_STEP_SUMMARY || true
echo "" >> $GITHUB_STEP_SUMMARY
# Coverage total
grep -E '^TOTAL ' coverage_output.txt >> $GITHUB_STEP_SUMMARY || true
echo '```' >> $GITHUB_STEP_SUMMARY
# Partially covered core modules (1-49%) — most actionable for contributors
# Only show modules with testable logic; skip configs, models, layers, etc.
LOW_COV=$(awk '/^python\/.*%/ {
for (i=1; i<=NF; i++) {
if ($i ~ /^[0-9]+%$/) {
pct = $i + 0
if (pct >= 1 && pct < 50) printf "%-80s %5s %s\n", $1, $(i-2), $i
break
}
}
}' coverage_output.txt \
| grep -E '/(mem_cache|managers|sampling|parser|observability|function_call|entrypoints|speculative|multimodal|utils)/' \
| head -40 || true)
if [ -n "$LOW_COV" ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "<details><summary>Core modules with coverage below 50% — good candidates for more unit tests</summary>" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
echo "$LOW_COV" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
echo "</details>" >> $GITHUB_STEP_SUMMARY
fi
json-export:
name: JSON Export
runs-on: ubuntu-latest
if: inputs.output_format == 'json'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Generate JSON Report
run: |
python scripts/ci/utils/ci_coverage_report.py --output-format json > ci_coverage.json
- name: Upload JSON artifact
uses: actions/upload-artifact@v4
with:
name: ci-coverage-report
path: ci_coverage.json
ci-failure-monitor perms .github/workflows/ci-failure-monitor.yml
View raw YAML
name: CI Failure Monitor
on:
schedule:
- cron: '0 */12 * * *' # Every 12 hour
workflow_dispatch:
concurrency:
group: ci-failure-monitor-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
actions: read
jobs:
failure-analysis:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.14'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests slack_sdk
- name: Run Failure Analysis
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GH_PAT_FOR_RUNNER_ADMIN: ${{ secrets.GH_PAT_FOR_RUNNER_ADMIN }}
PYTHONUNBUFFERED: 1
PYTHONIOENCODING: utf-8
run: |
cd scripts/ci_monitor
python ci_failures_analysis.py \
--token $GITHUB_TOKEN \
--limit 100 \
--output ci_failure_analysis_$(date +%Y%m%d_%H%M%S).json
- name: Upload Analysis Results
uses: actions/upload-artifact@v4
with:
name: ci-failure-analysis-${{ github.run_number }}
path: |
scripts/ci_monitor/ci_failure_analysis_*.json
retention-days: 7
- name: Send Slack Notification
if: always()
env:
SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
run: |
cd scripts/ci_monitor
LATEST_REPORT=$(ls -t ci_failure_analysis_*.json | head -1)
if [ ! -f "$LATEST_REPORT" ]; then
echo "No report found, so skipping Slack notification"
exit 0
fi
if [ -n "$SGLANG_DIFFUSION_SLACK_TOKEN" ]; then
python3 post_ci_failures_to_slack.py --report-file "$LATEST_REPORT"
else
echo "SGLANG_DIFFUSION_SLACK_TOKEN not configured, skipping notification"
fi
close-inactive-issues perms .github/workflows/close-inactive-issues.yml
View raw YAML
name: Close Inactive Issues
on:
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
permissions:
issues: write
contents: read
jobs:
close-inactive-issues:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
steps:
- name: Check and close inactive issues
uses: actions/github-script@v6
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const sixtyDaysAgo = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000);
const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/');
console.log(`Owner: ${owner}, Repo: ${repo}`);
async function fetchIssues(page = 1) {
console.log(`Fetching issues for ${owner}/${repo}, page ${page}`);
return await github.rest.issues.listForRepo({
owner,
repo,
state: 'open',
sort: 'updated',
direction: 'asc',
per_page: 100,
page: page
});
}
async function processIssues() {
console.log('Starting to process issues');
console.log(`Repository: ${owner}/${repo}`);
let page = 1;
let hasMoreIssues = true;
while (hasMoreIssues) {
try {
const issues = await fetchIssues(page);
console.log(`Fetched ${issues.data.length} issues on page ${page}`);
if (issues.data.length === 0) {
hasMoreIssues = false;
break;
}
for (const issue of issues.data) {
// Skip if the issue has 'good first issue' label
if (issue.labels.some(label => label.name === 'good first issue')) {
console.log(`Skipping issue #${issue.number} as it's marked as 'good first issue'`);
continue;
}
if (new Date(issue.updated_at) < sixtyDaysAgo) {
try {
await github.rest.issues.update({
owner,
repo,
issue_number: issue.number,
state: 'closed',
labels: [...issue.labels.map(l => l.name), 'inactive']
});
await github.rest.issues.createComment({
owner,
repo,
issue_number: issue.number,
body: 'This issue has been automatically closed due to inactivity. Please feel free to reopen it if needed.'
});
console.log(`Closed issue #${issue.number} due to inactivity.`);
} catch (error) {
console.error(`Failed to close issue #${issue.number}: ${error.message}`);
}
} else {
console.log(`Issue #${issue.number} is still active. Stopping processing.`);
hasMoreIssues = false;
break;
}
}
page += 1;
} catch (error) {
console.error(`Error fetching issues on page ${page}: ${error.message}`);
hasMoreIssues = false;
}
}
console.log('Finished processing issues');
}
await processIssues();
diffusion-ci-gt-gen matrix perms .github/workflows/diffusion-ci-gt-gen.yml
View raw YAML
name: Diffusion CI Ground Truth Generation
on:
workflow_dispatch:
inputs:
ref:
description: 'Git ref to checkout'
required: false
default: ''
type: string
case_ids:
description: 'Specific case IDs to run (space-separated, optional)'
required: false
default: ''
type: string
concurrency:
group: diffusion-ci-gt-gen-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
actions: read
jobs:
multimodal-diffusion-gen-1gpu:
if: github.repository == 'sgl-project/sglang'
runs-on: 1-gpu-h100
strategy:
matrix:
part: [0, 1]
timeout-minutes: 150
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Generate outputs
run: |
cd python
python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2 \
--out-dir ./diffusion-ci-outputs \
${{ inputs.case_ids != '' && format('--case-ids {0}', inputs.case_ids) || '' }}
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: diffusion-gen-1gpu-part${{ matrix.part }}
path: python/diffusion-ci-outputs
retention-days: 7
multimodal-diffusion-gen-2gpu:
if: github.repository == 'sgl-project/sglang'
runs-on: 2-gpu-h100
strategy:
matrix:
part: [0, 1]
timeout-minutes: 150
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Generate outputs
run: |
cd python
python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2 \
--out-dir ./diffusion-ci-outputs \
${{ inputs.case_ids != '' && format('--case-ids {0}', inputs.case_ids) || '' }}
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: diffusion-gen-2gpu-part${{ matrix.part }}
path: python/diffusion-ci-outputs
retention-days: 7
diffusion-ci-push:
needs: [multimodal-diffusion-gen-1gpu, multimodal-diffusion-gen-2gpu]
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
pattern: diffusion-gen-*
path: combined
merge-multiple: true
- name: Collect image files
run: |
mkdir -p gt_images
find combined \( -name "*.png" -o -name "*.jpg" -o -name "*.jpeg" -o -name "*.webp" \) -type f -exec cp -f {} gt_images/ \;
- name: Publish GT images to sglang-bot/sglang-ci-data
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
run: python scripts/ci/utils/diffusion/publish_diffusion_gt.py --source-dir gt_images
execute-notebook .github/workflows/execute-notebook.yml
View raw YAML
name: Execute Notebooks
on:
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened, labeled]
paths:
- "python/sglang/**"
- "docs/**"
- "!python/sglang/**/*.md"
- "!docs/**/*.md"
workflow_dispatch:
concurrency:
group: execute-notebook-${{ github.ref }}
cancel-in-progress: true
env:
SGLANG_IS_IN_CI: true
jobs:
call-gate:
# Align with PR Test: fail fast if PR doesn't have run-ci label.
# This makes /tag-and-rerun-ci work by rerunning this failed workflow.
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
run-all-notebooks:
needs: [call-gate]
runs-on: 1-gpu-h100
if: github.event_name != 'pull_request' || needs.call-gate.result == 'success'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
pip install -r docs/requirements.txt
apt-get update && apt-get install -y pandoc parallel retry
ln -sf "$(which python3)" /usr/bin/python
- name: Setup Jupyter Kernel
run: |
python -m ipykernel install --user --name python3 --display-name "Python 3"
- name: Execute notebooks
timeout-minutes: 40
run: |
cd docs
make clean
make compile
notebook-finish:
needs: [
call-gate,
run-all-notebooks
]
runs-on: ubuntu-latest
if: always() && needs.run-all-notebooks.result != 'skipped'
steps:
- name: Check all dependent job statuses
run: |
results=(${{ join(needs.*.result, ' ') }})
for result in "${results[@]}"; do
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
echo "Job failed with result: $result"
exit 1
fi
done
echo "All jobs completed successfully"
exit 0
labeler perms .github/workflows/labeler.yml
View raw YAML
name: Auto Label PRs
on:
pull_request_target:
types: [opened, synchronize, reopened]
permissions:
contents: read
pull-requests: write
jobs:
label:
runs-on: ubuntu-latest
steps:
- name: Auto-label by file changes
uses: actions/labeler@v5
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/labeler.yml
sync-labels: false
lint .github/workflows/lint.yml
View raw YAML
name: Lint
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12"
- name: Install pre-commit hook
run: |
python -m pip install pre-commit
pre-commit install
- name: Run pre-commit checks
run: SKIP=no-commit-to-branch pre-commit run --all-files --show-diff-on-failure
- name: Run lychee docs checks (offline references)
uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2
with:
args: --config .github/linters/lychee.toml README.md "docs/**/*.md" "docs/**/*.rst" "docs/**/*.ipynb"
- name: Run sgl-kernel clang-format checks
uses: DoozyX/clang-format-lint-action@v0.20
with:
source: sgl-kernel
extensions: h,c,cpp,hpp,cu,cuh,cc
clangFormatVersion: 20
style: file
list-active-pr-runs perms .github/workflows/list-active-pr-runs.yml
View raw YAML
name: List Active Runs
on:
workflow_dispatch:
inputs:
workflows:
description: 'Space-separated list of workflow filenames to check'
required: false
type: string
default: 'pr-test.yml'
permissions:
actions: read
contents: read
pull-requests: read
jobs:
list-active-runs:
runs-on: ubuntu-latest
steps:
- name: Install GitHub CLI
run: sudo apt-get install -y gh jq
- name: List active runs grouped by PR
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
WORKFLOWS: ${{ github.event.inputs.workflows || 'pr-test.yml' }}
shell: bash
run: |
set -euo pipefail
echo "========================================="
echo "🔍 Active Workflow Runs Report"
echo "========================================="
echo ""
# Get all workflows or specific ones
read -r -a workflow_files <<< "${WORKFLOWS}"
echo "📋 Checking specified workflows: ${WORKFLOWS}"
echo ""
# Create a temporary file to store PR data
pr_data_file=$(mktemp)
# Process each workflow
for workflow_file in ${workflow_files[@]}; do
echo "Scanning workflow: $workflow_file"
# Get all active runs (queued, waiting, in_progress)
active_runs=$(gh run list \
--repo "$REPO" \
--workflow "$workflow_file" \
--json databaseId,status,event,headBranch,createdAt,updatedAt,headSha,number,attempt \
--limit 500 \
| jq -c '.[] | select(.status=="queued" or .status=="waiting" or .status=="in_progress")')
if [ -z "$active_runs" ]; then
continue
fi
# Process each run
echo "$active_runs" | while read -r run; do
run_id=$(echo "$run" | jq -r '.databaseId')
run_status=$(echo "$run" | jq -r '.status')
run_event=$(echo "$run" | jq -r '.event')
created_at=$(echo "$run" | jq -r '.createdAt')
head_sha=$(echo "$run" | jq -r '.headSha')
run_number=$(echo "$run" | jq -r '.number')
run_attempt=$(echo "$run" | jq -r '.attempt // 1')
# Get detailed run information including jobs
run_details=$(gh api "repos/$REPO/actions/runs/$run_id" 2>/dev/null || true)
if [ -z "$run_details" ]; then
continue
fi
head_owner=$(echo "$run_details" | jq -r '.head_repository.owner.login // empty')
head_branch=$(echo "$run_details" | jq -r '.head_branch // empty')
if [ -z "$head_owner" ] || [ -z "$head_branch" ]; then
continue
fi
# Find PR number (may be empty for non-PR runs)
pr_number=$(gh api "repos/$REPO/pulls?state=open&head=${head_owner}:${head_branch}" \
--jq '.[0].number // empty' 2>/dev/null || true)
if [ -z "$pr_number" ]; then
pr_number="NO_PR"
fi
# Get jobs for this run (with pagination to avoid missing jobs)
jobs=$(gh api "repos/$REPO/actions/runs/$run_id/jobs" --paginate --jq '.jobs[]' | jq -s '.')
running_jobs=$(echo "$jobs" | jq '[.[] | select(.status=="in_progress")] | length')
queued_jobs=$(echo "$jobs" | jq '[.[] | select(.status=="queued" or .status=="waiting")] | length')
# Get runner info for running jobs
runners=$(echo "$jobs" | jq -r '.[] | select(.status=="in_progress") | .runner_name // "N/A"' | paste -sd "," -)
# Calculate queue time
current_time=$(date -u +%s)
created_time=$(date -u -d "$created_at" +%s 2>/dev/null || echo "$current_time")
queue_time=$((current_time - created_time))
queue_minutes=$((queue_time / 60))
# Store data in temporary file (unified format with event and branch)
echo "$pr_number|$workflow_file|$run_id|$run_status|$running_jobs|$queued_jobs|$runners|$queue_minutes|$created_at|$head_sha|$run_attempt|$run_event|$head_branch" >> "$pr_data_file"
done
done
echo ""
echo "========================================="
echo "📊 Active Runs Summary"
echo "========================================="
echo ""
if [ ! -s "$pr_data_file" ]; then
echo "✅ No active runs found"
rm -f "$pr_data_file"
exit 0
fi
# Get unique PR numbers (exclude NO_PR entries)
pr_numbers=$(cut -d'|' -f1 < "$pr_data_file" | grep -v '^NO_PR$' | sort -u || true)
# Separate high priority and normal PRs
high_priority_prs=()
normal_prs=()
for pr_num in $pr_numbers; do
labels=$(gh pr view "$pr_num" --repo "$REPO" --json labels \
| jq -r '.labels[].name' 2>/dev/null || true)
if echo "$labels" | grep -Fxq "high priority"; then
high_priority_prs+=($pr_num)
else
normal_prs+=($pr_num)
fi
done
# Combine: high priority first, then normal
sorted_pr_numbers=("${high_priority_prs[@]}" "${normal_prs[@]}")
pr_count=0
total_running=0
total_queued=0
for pr_num in "${sorted_pr_numbers[@]}"; do
pr_count=$((pr_count + 1))
# Get PR details
pr_info=$(gh pr view "$pr_num" --repo "$REPO" --json title,author,labels,url 2>/dev/null || true)
if [ -z "$pr_info" ]; then
continue
fi
pr_title=$(echo "$pr_info" | jq -r '.title')
pr_author=$(echo "$pr_info" | jq -r '.author.login')
pr_url=$(echo "$pr_info" | jq -r '.url')
pr_labels=$(echo "$pr_info" | jq -r '.labels[].name' | paste -sd ", " -)
if [ -z "$pr_labels" ]; then
pr_labels="(no labels)"
fi
# Add priority indicator
priority_indicator=""
if echo "$pr_labels" | grep -q "high priority"; then
priority_indicator="🔴 [HIGH PRIORITY] "
fi
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "🔗 ${priority_indicator}PR #$pr_num: $pr_title"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "👤 Author: $pr_author"
echo "🏷️ Labels: $pr_labels"
echo "🔗 URL: $pr_url"
echo ""
# Get all runs for this PR
pr_runs=$(grep "^$pr_num|" "$pr_data_file")
pr_running_total=0
pr_queued_total=0
echo "$pr_runs" | while read -r line; do
workflow=$(echo "$line" | cut -d'|' -f2)
run_id=$(echo "$line" | cut -d'|' -f3)
status=$(echo "$line" | cut -d'|' -f4)
running=$(echo "$line" | cut -d'|' -f5)
queued=$(echo "$line" | cut -d'|' -f6)
runners=$(echo "$line" | cut -d'|' -f7)
queue_min=$(echo "$line" | cut -d'|' -f8)
created=$(echo "$line" | cut -d'|' -f9)
attempt=$(echo "$line" | cut -d'|' -f11)
pr_running_total=$((pr_running_total + running))
pr_queued_total=$((pr_queued_total + queued))
run_url="https://github.com/$REPO/actions/runs/$run_id"
# Calculate retry count for this specific run
retry_count=$((attempt - 1))
# Show retry indicator
retry_indicator=""
if [ "$retry_count" -gt 0 ]; then
retry_indicator=" 🔄 Retry #$retry_count"
fi
echo " 📦 Workflow: $workflow (Run #$run_id)$retry_indicator"
echo " Status: $status"
echo " 🟢 Running jobs: $running"
echo " 🟡 Queued jobs: $queued"
if [ "$running" -gt 0 ] && [ "$runners" != "" ]; then
echo " 🖥️ Runners: $runners"
fi
if [ "$queue_min" -gt 0 ]; then
echo " ⏱️ Queue time: ${queue_min} minutes"
fi
echo " 🔗 Run URL: $run_url"
echo ""
done
# Summary for this PR
pr_running_total=$(grep "^$pr_num|" "$pr_data_file" | cut -d'|' -f5 | awk '{sum+=$1} END {print sum+0}')
pr_queued_total=$(grep "^$pr_num|" "$pr_data_file" | cut -d'|' -f6 | awk '{sum+=$1} END {print sum+0}')
total_running=$((total_running + pr_running_total))
total_queued=$((total_queued + pr_queued_total))
echo " 📊 PR Total: $pr_running_total running, $pr_queued_total queued"
echo ""
done
# --- Non-PR Runs Section ---
non_pr_runs=$(grep '^NO_PR|' "$pr_data_file" 2>/dev/null || true)
non_pr_running=0
non_pr_queued=0
if [ -n "$non_pr_runs" ]; then
echo "========================================="
echo "📦 Non-PR Runs (manual / scheduled / other)"
echo "========================================="
echo ""
echo "$non_pr_runs" | while read -r line; do
workflow=$(echo "$line" | cut -d'|' -f2)
run_id=$(echo "$line" | cut -d'|' -f3)
status=$(echo "$line" | cut -d'|' -f4)
running=$(echo "$line" | cut -d'|' -f5)
queued=$(echo "$line" | cut -d'|' -f6)
runners=$(echo "$line" | cut -d'|' -f7)
queue_min=$(echo "$line" | cut -d'|' -f8)
created=$(echo "$line" | cut -d'|' -f9)
attempt=$(echo "$line" | cut -d'|' -f11)
event=$(echo "$line" | cut -d'|' -f12)
branch=$(echo "$line" | cut -d'|' -f13)
run_url="https://github.com/$REPO/actions/runs/$run_id"
retry_count=$((attempt - 1))
retry_indicator=""
if [ "$retry_count" -gt 0 ]; then
retry_indicator=" 🔄 Retry #$retry_count"
fi
echo " 📦 Workflow: $workflow (Run #$run_id)$retry_indicator"
echo " Event: $event"
echo " Branch: $branch"
echo " Status: $status"
echo " 🟢 Running jobs: $running"
echo " 🟡 Queued jobs: $queued"
if [ "$running" -gt 0 ] && [ "$runners" != "" ]; then
echo " 🖥️ Runners: $runners"
fi
if [ "$queue_min" -gt 0 ]; then
echo " ⏱️ Queue time: ${queue_min} minutes"
fi
echo " 🔗 Run URL: $run_url"
echo ""
done
non_pr_running=$(echo "$non_pr_runs" | cut -d'|' -f5 | awk '{sum+=$1} END {print sum+0}')
non_pr_queued=$(echo "$non_pr_runs" | cut -d'|' -f6 | awk '{sum+=$1} END {print sum+0}')
non_pr_count=$(echo "$non_pr_runs" | wc -l | tr -d ' ')
total_running=$((total_running + non_pr_running))
total_queued=$((total_queued + non_pr_queued))
echo " 📊 Non-PR Total: $non_pr_running running, $non_pr_queued queued"
echo ""
fi
# Overall summary
echo "========================================="
echo "📈 Overall Summary"
echo "========================================="
echo "Total PRs with active runs: $pr_count"
echo "Total non-PR active runs: ${non_pr_count:-0}"
echo "Total running jobs: $total_running"
echo "Total queued jobs: $total_queued"
echo "========================================="
# Cleanup
rm -f "$pr_data_file"
nightly-link-check .github/workflows/nightly-link-check.yml
View raw YAML
name: Nightly Link Check
on:
schedule:
- cron: "0 2 * * *"
workflow_dispatch:
concurrency:
group: nightly-link-check-${{ github.ref }}
cancel-in-progress: true
jobs:
lychee-online:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run lychee online link checks
uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2
with:
fail: true
args: >-
--config .github/linters/lychee-ci.toml
README.md
docs/**/*.md
docs/**/*.rst
docs/**/*.ipynb
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
nightly-release-gateway matrix .github/workflows/nightly-release-gateway.yml
View raw YAML
# Nightly release workflow for SGLang Model Gateway
name: Nightly Release SGLang Model Gateway to PyPI
on:
schedule:
# Run at 2 AM UTC every day
- cron: '0 2 * * *'
workflow_dispatch: # Allow manual trigger
jobs:
build:
name: build on ${{ matrix.platform || matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }})
runs-on: ${{ matrix.os }}-latest
strategy:
fail-fast: false
matrix:
os: [ubuntu, macos, windows]
target: [x86_64, aarch64]
manylinux: [auto]
include:
- os: ubuntu
platform: linux
- os: windows
ls: dir
target: x86_64
python-architecture: x64
interpreter: 3.9 3.10 3.11 3.12 3.13
- os: macos
target: aarch64
interpreter: 3.9 3.10 3.11 3.12 3.13
- os: ubuntu
platform: linux
target: aarch64
# musllinux
- os: ubuntu
platform: linux
target: x86_64
manylinux: musllinux_1_1
- os: ubuntu
platform: linux
target: aarch64
manylinux: musllinux_1_1
exclude:
- os: windows
target: aarch64
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move sgl-model-gateway folder to root and delete sglang-repo
run: |
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
ls -alt
shell: bash
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
architecture: ${{ matrix.python-architecture || 'x64' }}
- name: Modify version for nightly release
run: |
# Get current version from pyproject.toml
CURRENT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])" 2>/dev/null || python -c "import tomli; print(tomli.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])")
# Create nightly version with date: e.g., 0.2.1.dev20250128
NIGHTLY_VERSION="${CURRENT_VERSION}.dev$(date +%Y%m%d)"
echo "Nightly version: $NIGHTLY_VERSION"
# Update pyproject.toml with nightly version (temporary, not committed)
sed -i.bak "s/version = \"${CURRENT_VERSION}\"/version = \"${NIGHTLY_VERSION}\"/" bindings/python/pyproject.toml
# Verify the change
cat bindings/python/pyproject.toml | grep "^version"
shell: bash
- name: Install twine and tomli
run: pip install -U twine tomli
- name: Install protoc (macOS)
if: matrix.os == 'macos'
run: brew install protobuf
- name: Install protoc (Windows)
if: matrix.os == 'windows'
run: choco install protoc -y
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
working-directory: bindings/python
target: ${{ matrix.target }}
manylinux: ${{ matrix.manylinux || 'auto' }}
args: --release --out dist --features vendored-openssl --interpreter ${{ matrix.interpreter || '3.9 3.10 3.11 3.12 3.13 3.14' }}
rust-toolchain: stable
docker-options: -e CI -e CC_aarch64_unknown_linux_gnu=aarch64-linux-gnu-gcc -e CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++
before-script-linux: |
# Install build dependencies (perl/make for vendored OpenSSL, protoc for gRPC)
if command -v yum &> /dev/null; then
yum update -y && yum install -y wget unzip gcc gcc-c++ perl-core make
# Install cross-compilation toolchain for aarch64 if needed
if [ "${{ matrix.target }}" = "aarch64" ]; then
yum install -y gcc-aarch64-linux-gnu gcc-c++-aarch64-linux-gnu || true
fi
elif command -v apt-get &> /dev/null; then
apt-get update && apt-get install -y wget unzip gcc g++ perl make
# Install cross-compilation toolchain for aarch64 if needed
if [ "${{ matrix.target }}" = "aarch64" ]; then
apt-get install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu || true
fi
fi
(cd /tmp && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip && \
unzip protoc-32.0-linux-x86_64.zip -d /usr/local && \
rm protoc-32.0-linux-x86_64.zip)
protoc --version
- name: List built packages
run: ${{ matrix.ls || 'ls -lh' }} bindings/python/dist/
- name: Check packages
run: twine check --strict bindings/python/dist/*
- uses: actions/upload-artifact@v4
with:
name: packages-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux || 'auto' }}
path: bindings/python/dist/
build-sdist:
name: Build SDist
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move sgl-model-gateway folder to root and delete sglang-repo
run: |
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
ls -alt
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Modify version for nightly release
run: |
# Get current version from pyproject.toml
CURRENT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])" 2>/dev/null || python -c "import tomli; print(tomli.load(open('bindings/python/pyproject.toml', 'rb'))['project']['version'])")
# Create nightly version with date: e.g., 0.2.1.dev20250128
NIGHTLY_VERSION="${CURRENT_VERSION}.dev$(date +%Y%m%d)"
echo "Nightly version: $NIGHTLY_VERSION"
# Update pyproject.toml with nightly version (temporary, not committed)
sed -i "s/version = \"${CURRENT_VERSION}\"/version = \"${NIGHTLY_VERSION}\"/" bindings/python/pyproject.toml
# Verify the change
cat bindings/python/pyproject.toml | grep "^version"
- name: Build SDist
uses: PyO3/maturin-action@v1
with:
working-directory: bindings/python
command: sdist
args: --out dist
rust-toolchain: stable
- uses: actions/upload-artifact@v4
with:
name: sdist
path: bindings/python/dist/*.tar.gz
upload:
name: Upload to TestPyPI
if: github.repository == 'sgl-project/sglang' # Ensure this job only runs for the sgl-project/sglang repository
needs: [build, build-sdist]
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v4
with:
path: dist
merge-multiple: true
- name: Upload to TestPyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN_ROUTER }}
run: |
pip install twine
twine upload --repository testpypi dist/* --verbose
nightly-test-amd .github/workflows/nightly-test-amd.yml
View raw YAML
name: Nightly Test (AMD)
on:
schedule:
- cron: '30 17 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
inputs:
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
job_select:
description: 'Select a job to run from dropdown (choose "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
- nightly-test-1-gpu-unit
- nightly-accuracy-2-gpu
- nightly-accuracy-2-gpu-vlm
- nightly-perf-2-gpu-text
- nightly-perf-2-gpu-vlm
- nightly-4-gpu
- nightly-accuracy-8-gpu
- nightly-8-gpu-grok1-int4
- nightly-8-gpu-grok2
- nightly-8-gpu-deepseek-v31
- nightly-8-gpu-deepseek-v32
- nightly-8-gpu-deepseek-v32-mtp
- nightly-8-gpu-deepseek-v3-kv-fp8
- nightly-8-gpu-kimi-k25
- nightly-8-gpu-qwen3-235b
- nightly-8-gpu-qwen35
- nightly-8-gpu-glm5
- nightly-8-gpu-minimax-m25
- nightly-1-gpu-zimage-turbo
- nightly-test-1-gpu-mi35x
- nightly-accuracy-8-gpu-mi35x
- nightly-8-gpu-mi35x-grok1-int4
- nightly-8-gpu-mi35x-grok2
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
- nightly-accuracy-8-gpu-mi35x-deepseek-v32
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
- nightly-perf-8-gpu-mi35x-deepseek-v32-basic
- nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
- nightly-8-gpu-mi35x-kimi-k25
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4
- nightly-8-gpu-mi35x-qwen35
- nightly-8-gpu-mi35x-glm5
- nightly-8-gpu-mi35x-minimax-m25
job_filter:
description: 'Or type comma-separated job names (overrides dropdown if non-empty)'
required: false
type: string
default: ''
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
env:
AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}
concurrency:
# When called via workflow_call with ref set, use a unique group per caller run to avoid
# collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name)
# to detect this, because github.event_name inherits from the caller in workflow_call.
group: nightly-test-amd-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }}
cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }}
jobs:
# ============================================== MI30x Unit Tests ==============================================
# 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x only)
nightly-test-1-gpu-unit:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Unit Test (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Accuracy Tests ==============================================
# 2-GPU Accuracy Tests - GSM8K eval (MI30x only)
nightly-accuracy-2-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test (2-GPU)
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation
nightly-accuracy-2-gpu-vlm:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Accuracy Test (2-GPU VLM MMMU)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU Text Models Performance Tests
nightly-perf-2-gpu-text:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test (2-GPU Text Models)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Performance Tests
nightly-perf-2-gpu-vlm:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test (2-GPU VLM Models)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x 4-GPU Tests ==============================================
# 4-GPU Nightly Tests - Dumper/Comparator E2E, VLM Encoder DP
nightly-4-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu,'))
runs-on: linux-mi325-4gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test (4-GPU)
timeout-minutes: 120
run: |
> github_summary.md
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --continue-on-error --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (accuracy only)
nightly-accuracy-8-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Accuracy Test (8-GPU Grok1-FP8)
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Combined Accuracy + Performance Tests ==============================================
# 8-GPU Grok1-INT4 (Accuracy + Performance combined)
nightly-8-gpu-grok1-int4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Grok2 (Accuracy + Performance combined)
nightly-8-gpu-grok2:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.1 (Accuracy + Performance combined)
nightly-8-gpu-deepseek-v31:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-V3.1)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU DeepSeek-V3.1)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_ROCM700A=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf)
nightly-8-gpu-deepseek-v32:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf)
nightly-8-gpu-deepseek-v32-mtp:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3)
nightly-8-gpu-deepseek-v3-kv-fp8:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: DeepSeek-V3 KV FP8 Test (8-GPU Basic + MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Kimi-K2.5 (Accuracy)
nightly-8-gpu-kimi-k25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU Kimi-K2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
nightly-8-gpu-qwen3-235b:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test + Performance Test (8-GPU Qwen3)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Qwen 3.5 (Accuracy)
nightly-8-gpu-qwen35:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
nightly-8-gpu-glm5:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test (8-GPU GLM-5 NSA)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU MiniMax-M2.5 (Accuracy)
nightly-8-gpu-minimax-m25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Accuracy Test (8-GPU MiniMax-M2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x Diffusion Tests ==============================================
# 1-GPU Z-Image-Turbo (Diffusion T2I)
nightly-1-gpu-zimage-turbo:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Z-Image-Turbo Diffusion Test (1-GPU)
timeout-minutes: 45
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
-e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \
pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Upload generated images
if: always()
uses: actions/upload-artifact@v4
with:
name: zimage-turbo-outputs
path: diffusion-artifacts/
if-no-files-found: ignore
retention-days: 30
# ============================================== MI35x Tests ==============================================
# MI35x 1-GPU tests - platform-agnostic tests that may work on CDNA4 (gfx950)
nightly-test-1-gpu-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x,'))
runs-on: linux-mi35x-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Nightly Test MI35x (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Accuracy Tests - GPT-OSS (accuracy only)
nightly-accuracy-8-gpu-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok1-INT4 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-grok1-int4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Grok1-INT4)
timeout-minutes: 90
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok2 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-grok2:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-deepseek-r1-mxfp4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance combined)
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 300
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Accuracy Test
nightly-accuracy-8-gpu-mi35x-deepseek-v32:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test
nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU DeepSeek-V3.2 TP+MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic)
nightly-perf-8-gpu-mi35x-deepseek-v32-basic:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Kimi-K2.5 (Accuracy)
nightly-8-gpu-mi35x-kimi-k25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU Kimi-K2.5)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance)
nightly-8-gpu-mi35x-qwen3-235b-mxfp4:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test + Performance Test MI35x (8-GPU Qwen3-235B-MXFP4)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen 3.5 (Accuracy)
nightly-8-gpu-mi35x-qwen35:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test MI35x (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
nightly-8-gpu-mi35x-glm5:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test MI35x (8-GPU GLM-5 NSA)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU MiniMax-M2.5 (Accuracy)
nightly-8-gpu-mi35x-minimax-m25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x (8-GPU MiniMax-M2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP)
nightly-perf-8-gpu-mi35x-deepseek-v32-mtp:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
check-all-jobs:
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
needs:
# MI30x Unit Tests
- nightly-test-1-gpu-unit
# MI30x Accuracy Tests
- nightly-accuracy-2-gpu
- nightly-accuracy-2-gpu-vlm
# MI30x 4-GPU Tests
- nightly-4-gpu
- nightly-accuracy-8-gpu
# MI30x Performance Tests - excluded from check (perf failures don't block CI)
# - nightly-perf-2-gpu-text
# - nightly-perf-2-gpu-vlm
# MI30x Combined Accuracy + Performance Tests
- nightly-8-gpu-grok1-int4
- nightly-8-gpu-grok2
- nightly-8-gpu-deepseek-v31
- nightly-8-gpu-deepseek-v32
- nightly-8-gpu-deepseek-v32-mtp
- nightly-8-gpu-deepseek-v3-kv-fp8
- nightly-8-gpu-kimi-k25
- nightly-8-gpu-qwen3-235b
- nightly-8-gpu-qwen35
- nightly-8-gpu-glm5
- nightly-8-gpu-minimax-m25
# MI30x Diffusion Tests
- nightly-1-gpu-zimage-turbo
# MI35x jobs
- nightly-test-1-gpu-mi35x
- nightly-accuracy-8-gpu-mi35x
- nightly-8-gpu-mi35x-grok1-int4
- nightly-8-gpu-mi35x-grok2
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion
- nightly-accuracy-8-gpu-mi35x-deepseek-v32
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp
- nightly-8-gpu-mi35x-kimi-k25
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4
- nightly-8-gpu-mi35x-qwen35
- nightly-8-gpu-mi35x-glm5
- nightly-8-gpu-mi35x-minimax-m25
# MI35x perf jobs excluded from check - perf failures don't block CI
# - nightly-perf-8-gpu-mi35x-deepseek-v32-basic
# - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"
nightly-test-amd-rocm720 .github/workflows/nightly-test-amd-rocm720.yml
View raw YAML
name: Nightly Test (AMD ROCm 7.2)
on:
schedule:
- cron: '30 17 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
inputs:
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
job_select:
description: 'Select a job to run from dropdown (choose "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
- nightly-test-1-gpu-unit-rocm720
- nightly-accuracy-2-gpu-rocm720
- nightly-accuracy-2-gpu-vlm-rocm720
- nightly-perf-2-gpu-text-rocm720
- nightly-perf-2-gpu-vlm-rocm720
- nightly-4-gpu-rocm720
- nightly-accuracy-8-gpu-rocm720
- nightly-8-gpu-grok1-int4-rocm720
- nightly-8-gpu-grok2-rocm720
- nightly-8-gpu-deepseek-v31-rocm720
- nightly-8-gpu-deepseek-v32-rocm720
- nightly-8-gpu-deepseek-v32-mtp-rocm720
- nightly-8-gpu-deepseek-v3-kv-fp8-rocm720
- nightly-8-gpu-kimi-k25-rocm720
- nightly-8-gpu-qwen3-235b-rocm720
- nightly-8-gpu-qwen35-rocm720
- nightly-8-gpu-glm5-rocm720
- nightly-8-gpu-minimax-m25-rocm720
- nightly-1-gpu-zimage-turbo-rocm720
- nightly-test-1-gpu-mi35x-rocm720
- nightly-accuracy-8-gpu-mi35x-rocm720
- nightly-8-gpu-mi35x-grok1-int4-rocm720
- nightly-8-gpu-mi35x-grok2-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-8-gpu-mi35x-kimi-k25-rocm720
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720
- nightly-8-gpu-mi35x-qwen35-rocm720
- nightly-8-gpu-mi35x-glm5-rocm720
- nightly-8-gpu-mi35x-glm47-fp8-rocm720
- nightly-8-gpu-mi35x-minimax-m25-rocm720
job_filter:
description: 'Or type comma-separated job names (overrides dropdown if non-empty)'
required: false
type: string
default: ''
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
env:
AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}
concurrency:
# When called via workflow_call with ref set, use a unique group per caller run to avoid
# collisions with direct schedule/push triggers. We use inputs.ref (not github.event_name)
# to detect this, because github.event_name inherits from the caller in workflow_call.
group: nightly-test-amd-rocm720-${{ inputs.ref && format('caller-{0}', github.run_id) || github.ref }}
cancel-in-progress: ${{ !inputs.ref && github.event_name != 'workflow_call' }}
jobs:
# ============================================== MI30x ROCm 7.2 Unit Tests ==============================================
# 1-GPU Unit Tests - LoRA, debug utils, scheduler, etc. (MI30x ROCm 7.2)
nightly-test-1-gpu-unit-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-unit-rocm720,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Unit Test ROCm 7.2 (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x ROCm 7.2 Accuracy Tests ==============================================
# 2-GPU Accuracy Tests - GSM8K eval (MI30x ROCm 7.2)
nightly-accuracy-2-gpu-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test ROCm 7.2 (2-GPU)
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Accuracy Tests - Vision-Language Models MMMU evaluation (ROCm 7.2)
nightly-accuracy-2-gpu-vlm-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-2-gpu-vlm-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Accuracy Test ROCm 7.2 (2-GPU VLM MMMU)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-2-gpu-vlm --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU Text Models Performance Tests (ROCm 7.2)
nightly-perf-2-gpu-text-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-text-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test ROCm 7.2 (2-GPU Text Models)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-text-2-gpu --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 2-GPU VLM Performance Tests (ROCm 7.2)
nightly-perf-2-gpu-vlm-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-2-gpu-vlm-rocm720,'))
runs-on: linux-mi325-2gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Performance Test ROCm 7.2 (2-GPU VLM Models)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-perf-vlm-2-gpu --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x ROCm 7.2 4-GPU Tests ==============================================
# 4-GPU Nightly Tests - Dumper/Comparator E2E, VLM Encoder DP (ROCm 7.2)
nightly-4-gpu-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-4-gpu-rocm720,'))
runs-on: linux-mi325-4gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test ROCm 7.2 (4-GPU)
timeout-minutes: 120
run: |
> github_summary.md
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-4-gpu --nightly --continue-on-error --timeout-per-file 3600 || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Accuracy Tests - GPT-OSS, Grok1-FP8 (ROCm 7.2)
nightly-accuracy-8-gpu-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-gpt-oss --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Accuracy Test ROCm 7.2 (8-GPU Grok1-FP8)
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x ROCm 7.2 Combined Accuracy + Performance Tests ==============================================
# 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-grok1-int4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok1-int4-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-grok2-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-grok2-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.1 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-deepseek-v31-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v31-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.1)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v31 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.1)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_ROCM700A=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v31 --nightly --timeout-per-file 18000 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 (Basic Accuracy + Perf) ROCm 7.2
nightly-8-gpu-deepseek-v32-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3.2 MTP (MTP Accuracy + Perf) ROCm 7.2
nightly-8-gpu-deepseek-v32-mtp-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v32-mtp-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU DeepSeek-V3 KV FP8 (Basic + MTP with --kv-cache-dtype fp8_e4m3) ROCm 7.2
nightly-8-gpu-deepseek-v3-kv-fp8-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-deepseek-v3-kv-fp8-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: DeepSeek-V3 KV FP8 Test ROCm 7.2 (8-GPU Basic + MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-deepseek-v3-kv-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-kimi-k25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-kimi-k25-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU Kimi-K2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-kimi-k25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Qwen3-235B (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-qwen3-235b-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen3-235b-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test + Performance Test ROCm 7.2 (8-GPU Qwen3)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-qwen3-235b --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2
nightly-8-gpu-qwen35-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-qwen35-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test ROCm 7.2 (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU GLM-5 (Accuracy) ROCm 7.2
nightly-8-gpu-glm5-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test ROCm 7.2 (8-GPU GLM-5 NSA)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-glm5 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-minimax-m25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
- name: Accuracy Test ROCm 7.2 (8-GPU MiniMax-M2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# ============================================== MI30x ROCm 7.2 Diffusion Tests ==============================================
# 1-GPU Z-Image-Turbo (Diffusion T2I) ROCm 7.2
nightly-1-gpu-zimage-turbo-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-1-gpu-zimage-turbo-rocm720,'))
runs-on: linux-mi325-1gpu-sglang
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Z-Image-Turbo Diffusion Test ROCm 7.2 (1-GPU)
timeout-minutes: 45
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
-e SGLANG_DIFFUSION_ARTIFACT_DIR="/sglang-checkout/diffusion-artifacts" \
pytest test/registered/amd/test_zimage_turbo.py -v -s ${{ inputs.continue_on_error && '|| true' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Upload generated images
if: always()
uses: actions/upload-artifact@v4
with:
name: zimage-turbo-outputs-rocm720
path: diffusion-artifacts/
if-no-files-found: ignore
retention-days: 30
# ============================================== MI35x ROCm 7.2 Tests ==============================================
# MI35x 1-GPU ROCm 7.2 tests
nightly-test-1-gpu-mi35x-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-test-1-gpu-mi35x-rocm720,'))
runs-on: linux-mi35x-gpu-1
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Nightly Test MI35x ROCm 7.2 (1-GPU)
timeout-minutes: 90
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-1-gpu-mi35x --nightly --timeout-per-file 900 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Accuracy Tests - GPT-OSS (ROCm 7.2)
nightly-accuracy-8-gpu-mi35x-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU GPT-OSS)
timeout-minutes: 180
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok1-INT4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-grok1-int4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok1-int4-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU Grok1-INT4)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Grok2 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-grok2-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-grok2-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU Grok2)
timeout-minutes: 60
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e RCCL_MSCCL_ENABLE=0 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-grok2 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 KV FP8 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 KV FP8)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_kv_fp8_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-R1-MXFP4 AllReduce Fusion)
timeout-minutes: 300
continue-on-error: true
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_ar_fusion_perf_mi35x.py || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Accuracy Test (ROCm 7.2)
nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-deepseek-v32 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 TP+MTP Accuracy Test (ROCm 7.2)
nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 TP+MTP)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (Basic) ROCm 7.2
nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 Basic)
timeout-minutes: 150
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-basic --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Kimi-K2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-kimi-k25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-kimi-k25-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Kimi-K2.5)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k25 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen3-235B-MXFP4 (Accuracy + Performance) ROCm 7.2
nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test + Performance Test MI35x ROCm 7.2 (8-GPU Qwen3-235B-MXFP4)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-8-gpu-mi35x-qwen3-235b-mxfp4 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU Qwen 3.5 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-qwen35-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-qwen35-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install mistral-common "lm-eval[api]"
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU Qwen 3.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-qwen35 --nightly --timeout-per-file 3600 --continue-on-error || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
nightly-8-gpu-mi35x-glm5-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
bash scripts/ci/amd/amd_ci_exec.sh pip install git+https://github.com/huggingface/transformers.git@96f807a33b75
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-5 NSA)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm5 --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU GLM-4.7-FP8 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-glm47-fp8-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm47-fp8-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-4.7-FP8)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm47-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-minimax-m25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Accuracy Test MI35x ROCm 7.2 (8-GPU MiniMax-M2.5)
timeout-minutes: 120
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
# MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP) ROCm 7.2
nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720,'))
runs-on: linux-mi35x-gpu-8
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Setup docker (ROCm 7.2)
run: |
touch github_summary.md
bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-test-time-deps
# Install tabulate for run_suite.py (missing in MI35x container)
bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate
- name: Performance Test MI35x ROCm 7.2 (8-GPU DeepSeek-V3.2 MTP)
timeout-minutes: 180
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-deepseek-v32-mtp --nightly --timeout-per-file 7200 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}
check-all-jobs:
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch')
needs:
# MI30x ROCm 7.2 Unit Tests
- nightly-test-1-gpu-unit-rocm720
# MI30x ROCm 7.2 Accuracy Tests
- nightly-accuracy-2-gpu-rocm720
- nightly-accuracy-2-gpu-vlm-rocm720
# MI30x ROCm 7.2 Performance Tests
- nightly-perf-2-gpu-text-rocm720
- nightly-perf-2-gpu-vlm-rocm720
# MI30x ROCm 7.2 4-GPU Tests
- nightly-4-gpu-rocm720
- nightly-accuracy-8-gpu-rocm720
# MI30x ROCm 7.2 Combined Accuracy + Performance Tests
- nightly-8-gpu-grok1-int4-rocm720
- nightly-8-gpu-grok2-rocm720
- nightly-8-gpu-deepseek-v31-rocm720
- nightly-8-gpu-deepseek-v32-rocm720
- nightly-8-gpu-deepseek-v32-mtp-rocm720
- nightly-8-gpu-deepseek-v3-kv-fp8-rocm720
- nightly-8-gpu-kimi-k25-rocm720
- nightly-8-gpu-qwen3-235b-rocm720
- nightly-8-gpu-qwen35-rocm720
- nightly-8-gpu-glm5-rocm720
- nightly-8-gpu-minimax-m25-rocm720
# MI30x ROCm 7.2 Diffusion Tests
- nightly-1-gpu-zimage-turbo-rocm720
# MI35x ROCm 7.2 jobs
- nightly-test-1-gpu-mi35x-rocm720
- nightly-accuracy-8-gpu-mi35x-rocm720
- nightly-8-gpu-mi35x-grok1-int4-rocm720
- nightly-8-gpu-mi35x-grok2-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-kv-fp8-rocm720
- nightly-8-gpu-mi35x-deepseek-r1-mxfp4-ar-fusion-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-rocm720
- nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-basic-rocm720
- nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720
- nightly-8-gpu-mi35x-kimi-k25-rocm720
- nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720
- nightly-8-gpu-mi35x-qwen35-rocm720
- nightly-8-gpu-mi35x-glm5-rocm720
- nightly-8-gpu-mi35x-glm47-fp8-rocm720
- nightly-8-gpu-mi35x-minimax-m25-rocm720
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more ROCm 7.2 nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more ROCm 7.2 nightly test jobs were cancelled"
exit 1
fi
echo "All ROCm 7.2 nightly test jobs passed"
nightly-test-intel .github/workflows/nightly-test-intel.yml
View raw YAML
name: Nightly Test (Intel)
on:
schedule:
- cron: '0 0 * * *'
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
workflow_call:
inputs:
ref:
description: "Branch, tag or SHA to checkout"
required: false
type: string
default: ""
concurrency:
group: nightly-test-intel-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
# Placeholder for Intel GPU tests
# Add Intel-specific nightly test workflows here when available
placeholder:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
steps:
- name: Placeholder
run: echo "Intel nightly tests will be added here"
nightly-test-npu matrix .github/workflows/nightly-test-npu.yml
View raw YAML
name: Nightly Test (NPU)
on:
schedule:
- cron: '0 17 * * *' # Execute at 1:00 a.m. Beijing Time every day
pull_request:
branches:
- main
paths:
- ".github/workflows/nightly-test-npu.yml"
workflow_dispatch:
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
concurrency:
group: nightly-test-npu-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
nightly-1-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-2
strategy:
fail-fast: false
matrix:
part: [0, 1]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Print Log Information
run: |
bash scripts/ci/npu/npu_log_print.sh
- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
pip install sglang_router
hf download lmms-lab/MMMU --repo-type dataset
pip install sentence_transformers torchaudio==2.8.0
pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
cd ./lmms-eval
nohup pip install . > lmmslog.txt 2>&1 &
sleep 120
export PYTHONPATH=$PYTHONPATH:$(pwd)
cd ../
cd test
python3 run_suite.py --hw npu --suite nightly-1-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
nightly-2-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-2
strategy:
fail-fast: false
matrix:
part: [0]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Print Log Information
run: |
bash scripts/ci/npu/npu_log_print.sh
- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
pip install sglang_router
hf download lmms-lab/MMMU --repo-type dataset
pip install sentence_transformers torchaudio==2.8.0
pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
cd ./lmms-eval
nohup pip install . > lmmslog.txt 2>&1 &
sleep 120
export PYTHONPATH=$PYTHONPATH:$(pwd)
cd ../
cd test
python3 run_suite.py --hw npu --suite nightly-2-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
nightly-4-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-4
strategy:
fail-fast: false
matrix:
part: [0]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Print Log Information
run: |
bash scripts/ci/npu/npu_log_print.sh
- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
pip install sglang_router
hf download lmms-lab/MMMU --repo-type dataset
pip install sentence_transformers torchaudio==2.8.0
pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
cd ./lmms-eval
nohup pip install . > lmmslog.txt 2>&1 &
sleep 120
export PYTHONPATH=$PYTHONPATH:$(pwd)
cd ../
cd test
python3 run_suite.py --hw npu --suite nightly-4-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
nightly-8-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-8
strategy:
fail-fast: false
matrix:
part: [0]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Print Log Information
run: |
bash scripts/ci/npu/npu_log_print.sh
- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
pip install sglang_router
hf download lmms-lab/MMMU --repo-type dataset
pip install sentence_transformers torchaudio==2.8.0
pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
cd ./lmms-eval
nohup pip install . > lmmslog.txt 2>&1 &
sleep 120
export PYTHONPATH=$PYTHONPATH:$(pwd)
cd ../
cd test
python3 run_suite.py --hw npu --suite nightly-8-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
nightly-16-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-16
strategy:
fail-fast: false
matrix:
part: [0, 1]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Print Log Information
run: |
bash scripts/ci/npu/npu_log_print.sh
- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
pip install sglang_router
hf download lmms-lab/MMMU --repo-type dataset
pip install sentence_transformers torchaudio==2.8.0
pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.32 numpy==1.26.4 dotenv
git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
cd ./lmms-eval
nohup pip install . > lmmslog.txt 2>&1 &
sleep 120
export PYTHONPATH=$PYTHONPATH:$(pwd)
cd ../
cd test
python3 run_suite.py --hw npu --suite nightly-16-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
check-all-jobs:
if: github.repository == 'sgl-project/sglang' && always()
needs:
- nightly-1-npu-a3
- nightly-2-npu-a3
- nightly-4-npu-a3
- nightly-8-npu-a3
- nightly-16-npu-a3
runs-on: ubuntu-latest
container:
image: docker.m.daocloud.io/ubuntu:22.04
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"
nightly-test-nvidia matrix .github/workflows/nightly-test-nvidia.yml
View raw YAML
name: Nightly Test (Nvidia)
on:
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
- 'nightly-test-general-1-gpu-h100'
- 'nightly-test-general-4-gpu-h100'
- 'nightly-test-general-8-gpu-h200'
- 'nightly-test-general-8-gpu-h20'
- 'nightly-test-general-8-gpu-b200'
- 'nightly-test-text-accuracy-2-gpu-h100'
- 'nightly-test-text-perf-2-gpu-h100'
- 'nightly-test-vlm-accuracy-2-gpu-h100'
- 'nightly-test-vlm-perf-2-gpu-h100'
- 'nightly-test-multimodal-server-1-gpu'
- 'nightly-test-multimodal-server-2-gpu'
- 'nightly-test-perf-4-gpu-b200'
- 'nightly-test-perf-8-gpu-b200'
- 'nightly-test-kernel-1-gpu-h100'
- 'nightly-test-diffusion-comparison'
- 'nightly-test-kernel-8-gpu-h200'
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: string
default: 'all'
concurrency:
group: nightly-test-nvidia-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
env:
SGLANG_IS_IN_CI: true
SGLANG_CUDA_COREDUMP: "1"
HF_HUB_DOWNLOAD_TIMEOUT: 300
HF_HUB_ETAG_TIMEOUT: 300
jobs:
# General tests - 1 GPU
nightly-test-general-1-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-1-gpu-h100')
runs-on: 1-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# JIT kernel full unit tests (expanded parameter ranges via SGLANG_JIT_KERNEL_RUN_FULL_TESTS)
nightly-test-kernel-1-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-1-gpu-h100')
runs-on: 1-gpu-h100
timeout-minutes: 240
env:
# Full jit_kernel test grids (see sglang.jit_kernel.utils.should_run_full_tests)
SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
# Match pr-test-jit-kernel workflow for consistent JIT warmup behavior
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
# Allow maintenance bypass on default branch (same semantics as PR JIT workflow)
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
timeout-minutes: 20
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run jit kernel nightly suite
timeout-minutes: 60
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-kernel-1-gpu --nightly --continue-on-error
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
nightly-test-kernel-8-gpu-h200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-8-gpu-h200')
runs-on: 8-gpu-h200
timeout-minutes: 240
env:
SGLANG_JIT_KERNEL_RUN_FULL_TESTS: "1"
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
timeout-minutes: 20
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run multi-GPU jit kernel nightly suite
timeout-minutes: 90
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-kernel-8-gpu-h200 --nightly --continue-on-error
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# General tests - 4 GPU H100
nightly-test-general-4-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-4-gpu-h100')
runs-on: 4-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# General tests - 8 GPU H200
nightly-test-general-8-gpu-h200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h200')
runs-on: 8-gpu-h200
strategy:
fail-fast: false
matrix:
partition: [0, 1, 2, 3]
env:
RUNNER_LABELS: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run common 8-GPU model tests
if: always()
timeout-minutes: 300
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "8-gpu-h200"
IS_H200: "1"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=18000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4
- name: Publish traces to storage repo
if: always()
continue-on-error: true
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
TRACE_ARGS=""
for dir in test/performance_profiles_*/; do
[ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
done
if [ -n "$TRACE_ARGS" ]; then
python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
find test/performance_profiles_*/ -name '*.json.gz' -delete
else
echo "No trace directories found, skipping publish"
fi
- name: Run test
timeout-minutes: 30
env:
GPU_CONFIG: "8-gpu-h200"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-h200 --nightly --continue-on-error
- name: Collect performance metrics
if: always()
run: |
python3 scripts/ci/utils/save_metrics.py \
--gpu-config 8-gpu-h200 \
--partition ${{ matrix.partition }} \
--run-id ${{ github.run_id }} \
--output test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json \
--search-dir test/performance_profiles_8_gpu \
--search-dir test
- name: Upload partition metrics
if: always()
uses: actions/upload-artifact@v4
with:
name: metrics-8gpu-h200-partition-${{ matrix.partition }}
path: test/metrics-8gpu-h200-partition-${{ matrix.partition }}.json
retention-days: 5
if-no-files-found: ignore
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.partition }}
# General tests - 8 GPU H20
nightly-test-general-8-gpu-h20:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-h20')
runs-on: 8-gpu-h20
env:
SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
env:
GPU_CONFIG: "8-gpu-h20"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# General tests - 8 GPU B200
nightly-test-general-8-gpu-b200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-general-8-gpu-b200')
runs-on: 8-gpu-b200
strategy:
fail-fast: false
matrix:
partition: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run common 8-GPU model tests
if: always()
timeout-minutes: 300
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "8-gpu-b200"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-common --nightly --timeout-per-file=12000 --continue-on-error --auto-partition-id=${{ matrix.partition }} --auto-partition-size=4
- name: Publish traces to storage repo
if: always()
continue-on-error: true
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
TRACE_ARGS=""
for dir in test/performance_profiles_*/; do
[ -d "$dir" ] && TRACE_ARGS="$TRACE_ARGS --traces-dir $dir"
done
if [ -n "$TRACE_ARGS" ]; then
python3 scripts/ci/utils/publish_traces.py $TRACE_ARGS
find test/performance_profiles_*/ -name '*.json.gz' -delete
else
echo "No trace directories found, skipping publish"
fi
- name: Collect performance metrics
if: always()
run: |
python3 scripts/ci/utils/save_metrics.py \
--gpu-config 8-gpu-b200 \
--partition ${{ matrix.partition }} \
--run-id ${{ github.run_id }} \
--output test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json \
--search-dir test/performance_profiles_8_gpu \
--search-dir test
- name: Upload partition metrics
if: always()
uses: actions/upload-artifact@v4
with:
name: metrics-8gpu-b200-partition-${{ matrix.partition }}
path: test/metrics-8gpu-b200-partition-${{ matrix.partition }}.json
retention-days: 5
if-no-files-found: ignore
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.partition }}
# Text model accuracy tests
nightly-test-text-accuracy-2-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-accuracy-2-gpu-h100')
runs-on: 2-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run eval test for text models
timeout-minutes: 120
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# Text model performance tests
nightly-test-text-perf-2-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-text-perf-2-gpu-h100')
runs-on: 2-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run performance test for text models
timeout-minutes: 180
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "2-gpu-h100"
run: |
cd test
rm -rf performance_profiles_text_models/
python3 run_suite.py --hw cuda --suite nightly-perf-text-2-gpu --nightly --continue-on-error --timeout-per-file 3600
- name: Publish traces to storage repo
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_text_models
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# VLM accuracy tests
nightly-test-vlm-accuracy-2-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-accuracy-2-gpu-h100')
runs-on: 2-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run eval test for VLM models (fixed MMMU-100)
timeout-minutes: 240
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# VLM performance tests
nightly-test-vlm-perf-2-gpu-h100:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-vlm-perf-2-gpu-h100')
runs-on: 2-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run perf test for VLM models (MMMU)
timeout-minutes: 240
env:
TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
GPU_CONFIG: "2-gpu-h100"
run: |
cd test
rm -rf performance_profiles_vlms/
python3 run_suite.py --hw cuda --suite nightly-perf-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 3600
- name: Publish traces to storage repo
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
run: |
python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_vlms
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# diffusion performance tests
nightly-test-multimodal-server-1-gpu:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-1-gpu')
runs-on: 1-gpu-h100
strategy:
fail-fast: false
max-parallel: 5
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
pip install slack_sdk
- name: Run diffusion server tests
env:
SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
GITHUB_RUN_ID: ${{ github.run_id }}
GPU_CONFIG: "1-gpu-h100"
timeout-minutes: 60
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2
- name: Collect diffusion performance metrics
if: always()
run: |
python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
--gpu-config 1-gpu-h100 \
--run-id ${{ github.run_id }} \
--output python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json \
--results-json python/diffusion-results.json
- name: Upload diffusion metrics
if: always()
uses: actions/upload-artifact@v4
with:
name: diffusion-metrics-1gpu-partition-${{ matrix.part }}
path: python/diffusion-metrics-1gpu-partition-${{ matrix.part }}.json
retention-days: 90
if-no-files-found: ignore
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
nightly-test-multimodal-server-2-gpu:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-multimodal-server-2-gpu')
runs-on: 2-gpu-h100
strategy:
fail-fast: false
max-parallel: 5
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
pip install slack_sdk
- name: Run diffusion server tests
env:
SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
GITHUB_RUN_ID: ${{ github.run_id }}
GPU_CONFIG: "2-gpu-h100"
timeout-minutes: 60
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2
- name: Collect diffusion performance metrics
if: always()
run: |
python3 scripts/ci/utils/diffusion/save_diffusion_metrics.py \
--gpu-config 2-gpu-h100 \
--run-id ${{ github.run_id }} \
--output python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json \
--results-json python/diffusion-results.json
- name: Upload diffusion metrics
if: always()
uses: actions/upload-artifact@v4
with:
name: diffusion-metrics-2gpu-partition-${{ matrix.part }}
path: python/diffusion-metrics-2gpu-partition-${{ matrix.part }}.json
retention-days: 90
if-no-files-found: ignore
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
# B200 Performance tests - 4 GPU
nightly-test-perf-4-gpu-b200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-4-gpu-b200')
runs-on: 4-gpu-b200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 300
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error --timeout-per-file 12000
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# Specialized B200 tests - 8 GPU, for specific backends and configs
nightly-test-specialized-8-gpu-b200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-perf-8-gpu-b200')
runs-on: 8-gpu-b200
env:
RUNNER_LABELS: 8-gpu-b200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 120
env:
GPU_CONFIG: "8-gpu-b200"
run: |
cd test
python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# Diffusion cross-framework comparison
nightly-test-diffusion-comparison:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-diffusion-comparison')
runs-on: 4-gpu-h100
timeout-minutes: 240
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run cross-framework comparison
env:
GITHUB_SHA: ${{ github.sha }}
GITHUB_RUN_ID: ${{ github.run_id }}
PYTHONUNBUFFERED: "1"
timeout-minutes: 210
run: |
python3 -u scripts/ci/utils/diffusion/run_comparison.py \
--output comparison-results.json
- name: Generate dashboard
if: always()
env:
GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
run: |
python3 scripts/ci/utils/diffusion/generate_diffusion_dashboard.py \
--results comparison-results.json \
--output dashboard.md \
--charts-dir comparison-charts \
--fetch-history \
--step-summary
- name: Publish to sglang-ci-data
if: always()
env:
GH_PAT_FOR_NIGHTLY_CI_DATA: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
run: |
python3 scripts/ci/utils/diffusion/publish_comparison_results.py \
--results comparison-results.json \
--dashboard dashboard.md \
--charts-dir comparison-charts
- name: Upload comparison artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: diffusion-comparison-${{ github.run_id }}
path: |
comparison-results.json
dashboard.md
comparison-charts/
comparison-logs/
retention-days: 90
if-no-files-found: ignore
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
# Consolidate performance metrics from all jobs
consolidate-metrics:
if: github.repository == 'sgl-project/sglang' && always()
needs:
- nightly-test-general-8-gpu-h200
- nightly-test-general-8-gpu-b200
- nightly-test-multimodal-server-1-gpu
- nightly-test-multimodal-server-2-gpu
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Download all partition metrics
uses: actions/download-artifact@v4
with:
pattern: "*metrics-*"
path: metrics/
merge-multiple: true
- name: List downloaded metrics
run: |
echo "Downloaded metrics files:"
find metrics/ -name "*.json" -type f 2>/dev/null || echo "No metrics files found"
- name: Merge metrics
run: |
python3 scripts/ci/utils/merge_metrics.py \
--input-dir metrics/ \
--output consolidated-metrics-${{ github.run_id }}.json \
--run-id ${{ github.run_id }} \
--commit-sha ${{ github.sha }} \
--branch ${{ github.ref_name }}
- name: Upload consolidated metrics
uses: actions/upload-artifact@v4
with:
name: consolidated-metrics-${{ github.run_id }}
path: consolidated-metrics-${{ github.run_id }}.json
retention-days: 90
if-no-files-found: warn
# Final check job
check-all-jobs:
if: github.repository == 'sgl-project/sglang' && always()
needs:
- nightly-test-general-1-gpu-h100
- nightly-test-general-4-gpu-h100
- nightly-test-general-8-gpu-h200
- nightly-test-general-8-gpu-h20
- nightly-test-general-8-gpu-b200
- nightly-test-text-accuracy-2-gpu-h100
- nightly-test-text-perf-2-gpu-h100
- nightly-test-vlm-accuracy-2-gpu-h100
- nightly-test-vlm-perf-2-gpu-h100
- nightly-test-multimodal-server-1-gpu
- nightly-test-multimodal-server-2-gpu
- nightly-test-perf-4-gpu-b200
- nightly-test-specialized-8-gpu-b200
- nightly-test-diffusion-comparison
- consolidate-metrics
runs-on: ubuntu-latest
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"
open-pr-copy-from-oss perms .github/workflows/open-pr-copy-from-oss.yml
View raw YAML
name: Open A PR to Copy Code From OSS
on:
workflow_dispatch:
# schedule:
# - cron: '0 10 * * *'
permissions:
contents: write
jobs:
copy:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: 'main'
- name: Install GitHub CLI (if not present)
run: |
bash scripts/code_sync/install_github_cli.sh
- name: Copy from OSS code
env:
GH_TOKEN: ${{ secrets.GH_PAT_FOR_OPEN_PR_TO_PRIVATE }}
run: |
python3 scripts/code_sync/copy_from_oss.py
open-pr-copy-to-oss perms .github/workflows/open-pr-copy-to-oss.yml
View raw YAML
name: Open A PR to Copy Diff To OSS
on:
workflow_dispatch:
inputs:
commit_sha:
description: 'The commit SHA to copy. Defaults to LAST to copy the latest commit.'
required: false
default: 'LAST'
permissions:
contents: write
jobs:
copy:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install GitHub CLI (if not present)
run: |
bash scripts/code_sync/install_github_cli.sh
- name: Copy to OSS code
env:
GH_TOKEN: ${{ secrets.GH_PAT_FOR_OPEN_PR_TO_OSS }}
run: |
python3 scripts/code_sync/copy_to_oss.py --commit ${{ github.event.inputs.commit_sha }}
patch-docker-dev .github/workflows/patch-docker-dev.yml
View raw YAML
name: Patch Docker Image
on:
workflow_dispatch:
inputs:
pr_numbers:
description: "Comma-separated PR numbers to apply (e.g. 18962,19010)"
required: false
default: ""
image_tag:
description: "Base image tag to patch (e.g. dev-x86, dev-x86-cu13)"
required: true
concurrency:
group: patch-docker-${{ inputs.image_tag }}
cancel-in-progress: true
jobs:
patch:
if: github.repository == 'sgl-project/sglang'
runs-on: x64-docker-build-node
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Pull base image and extract commit
run: |
IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}"
docker pull "${IMAGE}"
if BASE_SHA=$(docker run --rm "${IMAGE}" git -C /sgl-workspace/sglang rev-parse HEAD 2>/dev/null); then
echo "Image built from commit: ${BASE_SHA}"
else
BASE_SHA=""
echo "::warning::Image has no .git directory — cannot extract base commit"
fi
echo "BASE_SHA=${BASE_SHA}" >> "$GITHUB_ENV"
- name: Generate patches
run: |
git config --global --add safe.directory "$GITHUB_WORKSPACE"
git fetch origin main
mkdir -p /tmp/patch-ctx
if [ -n "${{ inputs.pr_numbers }}" ]; then
IFS=',' read -ra PRS <<< "${{ inputs.pr_numbers }}"
for pr in "${PRS[@]}"; do
pr=$(echo "${pr}" | xargs)
echo "Fetching PR #${pr}"
git fetch origin "pull/${pr}/head:pr-${pr}"
MERGE_BASE=$(git merge-base origin/main "pr-${pr}")
echo " PR #${pr}: merge-base=${MERGE_BASE}"
git diff "${MERGE_BASE}..pr-${pr}" > "/tmp/patch-ctx/${pr}.patch"
echo " PR #${pr}: $(wc -l < /tmp/patch-ctx/${pr}.patch) lines"
done
elif [ -n "${BASE_SHA}" ]; then
echo "Generating diff: image ${BASE_SHA} → latest main"
git fetch origin "${BASE_SHA}"
git diff "${BASE_SHA}..origin/main" > /tmp/patch-ctx/main.patch
echo " main: $(wc -l < /tmp/patch-ctx/main.patch) lines"
else
echo "::error::No PR numbers specified and image has no .git — cannot generate diff against main"
exit 1
fi
TOTAL=$(cat /tmp/patch-ctx/*.patch | wc -l)
if [ "${TOTAL}" -eq 0 ]; then
echo "::warning::All patches are empty — image is already up to date"
echo "SKIP_BUILD=true" >> "$GITHUB_ENV"
fi
- name: Build patched image
if: env.SKIP_BUILD != 'true'
run: |
IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}"
cat <<'DOCKERFILE' > /tmp/patch-ctx/Dockerfile
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
COPY *.patch /tmp/patches/
RUN cd /sgl-workspace/sglang \
&& for p in /tmp/patches/*.patch; do \
if [ ! -s "${p}" ]; then \
echo "Skipping ${p} (empty)"; \
else \
echo "Applying ${p}..." \
&& patch -p1 --fuzz=2 --no-backup-if-mismatch -f < "${p}" \
|| { echo "ERROR: Failed to apply ${p}"; exit 1; }; \
fi; \
done \
&& rm -rf /tmp/patches
DOCKERFILE
docker build \
--no-cache \
--build-arg BASE_IMAGE="${IMAGE}" \
-t "${IMAGE}" \
/tmp/patch-ctx/
- name: Push patched image
if: env.SKIP_BUILD != 'true'
run: |
IMAGE="lmsysorg/sglang:${{ inputs.image_tag }}"
docker push "${IMAGE}"
echo "### Patched \`${IMAGE}\`" >> "$GITHUB_STEP_SUMMARY"
echo "- **Base commit:** \`${BASE_SHA:-unknown (no .git)}\`" >> "$GITHUB_STEP_SUMMARY"
echo "- **Source:** ${{ inputs.pr_numbers && format('PRs: {0}', inputs.pr_numbers) || 'latest main' }}" >> "$GITHUB_STEP_SUMMARY"
pr-benchmark-rust matrix perms .github/workflows/pr-benchmark-rust.yml
View raw YAML
name: PR Benchmark (SMG Components)
on:
push:
branches: [ main ]
paths:
- "sgl-model-gateway/**"
pull_request:
branches: [ main ]
paths:
- "sgl-model-gateway/**"
workflow_dispatch:
concurrency:
group: pr-benchmark-rust-${{ github.ref }}
cancel-in-progress: true
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
permissions:
contents: read
pull-requests: write
issues: write
jobs:
benchmark-compile-check:
name: Benchmark Compilation Check
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
- name: Configure sccache
uses: mozilla-actions/sccache-action@v0.0.9
with:
version: "v0.12.0"
disable_annotations: true
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-model-gateway
shared-key: "rust-cache"
save-if: true
cache-all-crates: true
cache-on-failure: true
- name: Check benchmarks compile
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
cargo check --benches
- name: Show sccache stats
if: always()
run: sccache --show-stats
benchmark:
name: Benchmark - ${{ matrix.name }}
if: |
github.repository == 'sgl-project/sglang' &&
(github.event_name == 'push' ||
github.event_name == 'workflow_dispatch' ||
(contains(github.event.pull_request.labels.*.name, 'router-benchmark') &&
contains(github.event.pull_request.labels.*.name, 'run-ci')))
strategy:
fail-fast: false
matrix:
include:
- name: Request Processing
bench_name: request_processing
bench_args: "benchmark_summary --exact"
runner: ubuntu-latest
sccache_version: "v0.12.0"
artifact_name: request-processing-results
artifact_path: criterion/benchmark_summary/
- name: Manual Policy
bench_name: manual_policy_benchmark
bench_args: ""
runner: ubuntu-latest
sccache_version: "v0.12.0"
artifact_name: manual-policy-results
artifact_path: criterion/manual_policy*/
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 100
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
- name: Configure sccache
uses: mozilla-actions/sccache-action@v0.0.9
with:
version: ${{ matrix.sccache_version }}
disable_annotations: true
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-model-gateway
shared-key: "rust-cache"
cache-all-crates: true
cache-on-failure: true
save-if: true
- name: Run benchmark
timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
export RUSTC_WRAPPER=sccache
export SCCACHE_GHA_ENABLED="true"
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
echo "sccache is working, using it for compilation"
else
echo "sccache failed to start, falling back to regular cargo"
unset RUSTC_WRAPPER
unset SCCACHE_GHA_ENABLED
fi
else
echo "sccache not available, using regular cargo"
fi
cargo bench --bench ${{ matrix.bench_name }} -- ${{ matrix.bench_args }} 2>&1 | tee benchmark_output.txt
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.artifact_name }}-${{ github.sha }}
path: |
sgl-model-gateway/target/${{ matrix.artifact_path }}
sgl-model-gateway/benchmark_output.txt
retention-days: 30
- name: Show sccache stats
if: always()
run: sccache --show-stats
benchmark-summary:
name: Benchmark Summary
needs: [benchmark]
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
runs-on: ubuntu-latest
steps:
- name: Download all benchmark results
uses: actions/download-artifact@v4
with:
pattern: '*-results-${{ github.sha }}'
path: benchmark-results
- name: Generate summary
run: |
generate_section() {
local title="$1" dir_name="$2" lines="${3:-100}"
local dir="benchmark-results/${dir_name}-${{ github.sha }}"
echo "### $title" >> summary.md
if [ -d "$dir" ]; then
echo "✅ **Completed**" >> summary.md
if [ -f "$dir/benchmark_output.txt" ]; then
echo -e "\n<details>\n<summary>View Results</summary>\n\n\`\`\`" >> summary.md
tail -"$lines" "$dir/benchmark_output.txt" >> summary.md
echo -e "\`\`\`\n</details>" >> summary.md
fi
else
echo "❌ Failed or skipped" >> summary.md
fi
echo "" >> summary.md
}
echo "## 🚀 Benchmark Results Summary" > summary.md
echo "" >> summary.md
generate_section "Request Processing" "request-processing-results" 60
generate_section "Manual Policy (Sticky Sessions)" "manual-policy-results" 100
echo -e "---\n_Generated at $(date -u '+%Y-%m-%d %H:%M:%S UTC')_" >> summary.md
cat summary.md
cat summary.md >> $GITHUB_STEP_SUMMARY
- name: Upload summary
uses: actions/upload-artifact@v4
with:
name: benchmark-summary-${{ github.sha }}
path: summary.md
retention-days: 30
pr-gate .github/workflows/pr-gate.yml
View raw YAML
on:
workflow_call:
inputs:
require-run-ci:
description: "Whether the PR must have the run-ci label"
type: boolean
default: true
cool-down-minutes:
description: "Cooldown period in minutes for low-permission users; 0 disables rate limiting"
type: number
default: 120
jobs:
pr-gate:
# 1. for commits on main: no gating needed
# 2. for workflow_dispatch: this can only be triggered by users with write access
runs-on: ubuntu-latest
steps:
- name: Fetch latest PR info
if: github.event_name == 'pull_request'
id: pr
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const pr = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: context.issue.number
});
core.setOutput("labels", JSON.stringify(pr.data.labels.map(l => l.name)));
core.setOutput("draft", pr.data.draft);
core.setOutput("user", pr.data.user.login);
- name: Log PR info
if: github.event_name == 'pull_request'
run: |
echo "===== PR Info ====="
echo "PR Event: ${{ github.event_name }}"
echo "PR Labels: ${{ steps.pr.outputs.labels }}"
echo "PR Draft: ${{ steps.pr.outputs.draft }}"
echo "PR User: ${{ steps.pr.outputs.user }}"
echo "Require run-ci: ${{ inputs.require-run-ci }}"
echo "Cool down minutes: ${{ inputs.cool-down-minutes }}"
echo "==================="
- name: Block draft PR
if: github.event_name == 'pull_request' && fromJson(steps.pr.outputs.draft)
run: |
echo "PR is draft. Blocking CI."
exit 1
- name: Require run-ci label (optional)
if: github.event_name == 'pull_request' && inputs.require-run-ci == true
run: |
labels='${{ steps.pr.outputs.labels }}'
if [[ "${{ contains(fromJson(steps.pr.outputs.labels), 'run-ci') }}" == "false" ]]; then
echo "Missing required label 'run-ci'. See https://docs.sglang.io/developer_guide/contribution_guide.html#how-to-trigger-ci-tests for more details."
exit 1
fi
- name: Enforce rate limit for low-permission actors (optional)
if: github.event_name == 'pull_request' && inputs.cool-down-minutes > 0
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const DEFAULT_MINUTES = Number("${{ inputs.cool-down-minutes }}");
const owner = context.repo.owner;
const repo = context.repo.repo;
const eventName = context.eventName;
const curRun = await github.rest.actions.getWorkflowRun({
owner, repo, run_id: context.runId
});
let triggeringActor = curRun.data.triggering_actor?.login || context.actor;
if (triggeringActor === "github-actions[bot]") {
triggeringActor = `${{ steps.pr.outputs.user }}`;
core.info(
`triggering_actor is github-actions[bot]; substituting PR author '${triggeringActor}'.`
);
}
async function hasHighPermission(username) {
try {
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ owner, repo, username });
const perm = data.permission || 'none';
return perm === 'write' || perm === 'maintain' || perm === 'admin';
} catch (e) {
if (e.status === 404 || e.status === 403) return false;
throw e;
}
}
if (await hasHighPermission(triggeringActor)) {
core.info(`Triggering user '${triggeringActor}' has high permission. No rate limit applied.`);
return;
}
let effectiveCooldownMinutes = DEFAULT_MINUTES;
let perUserCooldownMinutes = null;
try {
const contentResp = await github.rest.repos.getContent({
owner,
repo,
path: ".github/CI_PERMISSIONS.json",
ref: "main",
});
if (!Array.isArray(contentResp.data) && contentResp.data && "content" in contentResp.data) {
const raw = Buffer.from(
contentResp.data.content,
contentResp.data.encoding || "base64"
).toString();
const ciPermissions = JSON.parse(raw);
const userPerm = ciPermissions[triggeringActor];
if (userPerm && typeof userPerm.cooldown_interval_minutes === "number") {
perUserCooldownMinutes = userPerm.cooldown_interval_minutes;
core.info(
`Per-user cooldown for '${triggeringActor}' from CI_PERMISSIONS.json: ${perUserCooldownMinutes} minutes.`
);
} else {
core.info(`No per-user cooldown found for '${triggeringActor}' in CI_PERMISSIONS.json.`);
}
} else {
core.info("CI_PERMISSIONS.json content response is not a file; skipping per-user cooldown.");
}
} catch (e) {
core.info(`CI_PERMISSIONS.json not found or unreadable: ${e.message}. Using default rate limit only.`);
}
if (perUserCooldownMinutes !== null) {
effectiveCooldownMinutes = Math.min(effectiveCooldownMinutes, perUserCooldownMinutes);
}
if (effectiveCooldownMinutes <= 0) {
core.info(
`Effective cooldown for '${triggeringActor}' is 0 minutes; no rate limit enforced for this user.`
);
return;
}
const cutoff = new Date(Date.now() - effectiveCooldownMinutes * 60 * 1000);
core.info(
`Checking for workflow runs since ${cutoff.toISOString()} (last ${effectiveCooldownMinutes} minutes) for event '${eventName}'.`
);
const { data } = await github.rest.actions.listWorkflowRuns({
owner,
repo,
workflow_id: 'pr-test.yml',
event: eventName,
per_page: 100,
});
const runs = data.workflow_runs || [];
// Rate Limiting Logic:
// We only count workflow runs that actually consumed CI resources (i.e., passed the gate).
// A run "passes the gate" if any jobs beyond the gate jobs (check-changes, pr-gate, call-gate)
// actually executed (not skipped/cancelled). This prevents scenarios where:
// - User has PR A with missing 'run-ci' label (fails at gate)
// - User opens PR B with 'run-ci' label
// - PR B should be able to run even though PR A triggered a run recently
// Helper function to check if a run passed the gate (i.e., actually consumed CI resources)
async function didRunPassGate(run) {
try {
// Note: Fetching up to 100 jobs (API maximum). If a workflow has >100 jobs,
// we may miss some, but this is unlikely in practice.
const { data: jobsData } = await github.rest.actions.listJobsForWorkflowRun({
owner, repo, run_id: run.id, per_page: 100
});
const jobs = jobsData.jobs || [];
// If no jobs exist yet, the run hasn't started consuming resources
if (jobs.length === 0) {
core.info(`Run ${run.id} has no jobs yet; not counting against rate limit.`);
return false;
}
// Gate jobs that don't consume significant CI resources
const gateJobs = ['check-changes', 'pr-gate', 'call-gate', 'pr-test-finish'];
const jobsBeyondGate = jobs.filter(j => !gateJobs.some(g => j.name === g || j.name.startsWith(g + ' ')));
// A job "ran" if it reached a terminal conclusion state that indicates actual execution
const ranStates = ['success', 'failure', 'timed_out', 'action_required'];
const hasJobsThatRan = jobsBeyondGate.some(j => j.conclusion && ranStates.includes(j.conclusion));
return hasJobsThatRan;
} catch (e) {
core.warning(`Could not check jobs for run ${run.id}: ${e.message}`);
// If it's a rate limit error, count it conservatively to prevent abuse
if (e.status === 429) {
core.warning(`Hit rate limit checking run ${run.id}; counting it to be safe.`);
return true;
}
// For cancelled/skipped runs, they likely didn't consume resources
if (run.conclusion === 'cancelled' || run.conclusion === 'skipped') {
return false;
}
// Default to counting it to prevent abuse
return true;
}
}
// Limit the number of runs we'll check in detail to avoid API rate limits
const MAX_RUNS_TO_CHECK = 5;
let runsChecked = 0;
let runsSkippedAtGate = 0;
let recentFound = null;
for (const run of runs) {
if (String(run.id) === String(context.runId)) continue;
if (new Date(run.created_at) < cutoff) continue;
const isUserRun = (run.actor?.login === triggeringActor) || (run.triggering_actor?.login === triggeringActor);
if (!isUserRun) continue;
runsChecked++;
core.info(`Checking run ${run.id} (created: ${run.created_at}, conclusion: ${run.conclusion})`);
// Safety limit: if we've checked too many runs, assume the next one passed to be conservative
if (runsChecked > MAX_RUNS_TO_CHECK) {
core.warning(`Checked ${MAX_RUNS_TO_CHECK} runs; assuming this one passed gate to avoid API limits.`);
recentFound = run;
break;
}
// Only count runs that actually passed the gate and consumed CI resources
if (await didRunPassGate(run)) {
recentFound = run;
core.info(`Found recent run ${run.id} that passed gate.`);
break;
} else {
runsSkippedAtGate++;
core.info(`Run ${run.id} failed at gate; not counting against rate limit.`);
}
}
core.info(`Rate limit check summary: checked ${runsChecked} runs, ${runsSkippedAtGate} failed at gate.`);
if (recentFound) {
core.setFailed(
`User '${triggeringActor}' already triggered '${context.workflow}' via '${eventName}' at ${recentFound.created_at}. ` +
`Please wait ${effectiveCooldownMinutes} minutes before triggering again.`
);
} else {
core.info(
`No recent runs detected for '${triggeringActor}' within the last ${effectiveCooldownMinutes} minutes; proceeding.`
);
}
pr-test matrix perms .github/workflows/pr-test.yml
View raw YAML
name: PR Test
# Dynamic run-name for /rerun-stage commands to enable URL lookup
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }}
on:
schedule:
- cron: '0 */6 * * *' # Run every 6 hours (UTC)
pull_request:
branches: [main]
workflow_dispatch:
inputs:
target_stage:
description: "Specific stage to run (optional, for quick testing)"
required: false
type: string
default: ""
force_continue_on_error:
description: "Force continue-on-error (test scheduled CI behavior)"
required: false
type: boolean
default: false
pr_head_sha:
description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
required: false
type: string
default: ""
test_parallel_dispatch:
description: "Test parallel dispatch behavior (simulates scheduled run)"
required: false
type: boolean
default: false
workflow_call:
inputs:
git_ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
skip_stage_health_check:
description: "Skip stage health check fast-fail (e.g. for release branch cuts)"
required: false
type: boolean
default: false
concurrency:
# Concurrency group structure: pr-test-{event}-{branch}-{pr_sha}-{stage}
# - event_name prevents scheduled runs from colliding with fork PRs whose branch is named 'main'
# (without it, both resolve the branch segment to 'main' and block each other)
# - github.head_ref (pull_request) or github.ref_name (workflow_dispatch) normalizes to branch name
# - pr_head_sha isolates /rerun-stage from main branch runs
# - target_stage allows parallel stage dispatches to run independently
group: pr-test-${{ github.event_name }}-${{ github.head_ref || github.ref_name || 'default' }}-${{ inputs.pr_head_sha || 'current' }}-${{ inputs.target_stage || inputs.git_ref || 'all' }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
env:
SGLANG_IS_IN_CI: true
SGLANG_CUDA_COREDUMP: "1"
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
# Schedule / main-branch dispatch / workflow_call from main use refs/heads/main; PR events use refs/pull/*/merge
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
permissions:
actions: write
contents: read
issues: read
pull-requests: read
jobs:
# =============================================== check changes ====================================================
check-changes:
runs-on: ubuntu-latest
outputs:
# Use API-based detection for target_stage mode (filter-api), otherwise use dorny/paths-filter (filter)
main_package: ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
# sgl_kernel is forced to false when target_stage is set, since sgl-kernel-build-wheels won't run
# This prevents CUSTOM_BUILD_SGL_KERNEL=true when the wheel artifacts aren't available
# Note: If PR has kernel changes AND target_stage is set, the validate-target-stage step will fail
sgl_kernel: ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }}
# Raw sgl_kernel value before target_stage override (used for validation)
sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}
jit_kernel: ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
max_parallel: ${{ steps.set-parallel.outputs.max_parallel }}
b200_runner: ${{ steps.set-runner.outputs.b200_runner }}
enable_retry: ${{ steps.set-retry.outputs.enable_retry }}
continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-maintenance
- name: Determine run mode
id: run-mode
run: |
# Run all tests for scheduled runs and workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.git_ref
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
# Only use paths-filter for pull_request events (where it works correctly)
# For workflow_dispatch with target_stage, we use GitHub API in the next step
if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage
with:
filters: |
main_package:
- ".github/workflows/pr-test.yml"
- ".github/workflows/pr-gate.yml"
- ".github/actions/**"
- "python/pyproject.toml"
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
- "scripts/ci/cuda/*"
- "scripts/ci/utils/*"
- "test/**/!(*.md)"
multimodal_gen:
- ".github/workflows/pr-test.yml"
- ".github/workflows/pr-test-multimodal-gen.yml"
- "python/pyproject.toml"
- "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
- "python/sglang/jit_kernel/diffusion/**"
- "python/sglang/jit_kernel/tests/diffusion/**"
- "python/sglang/jit_kernel/benchmark/diffusion/**"
- "python/sglang/cli/**"
jit_kernel:
- ".github/workflows/pr-test.yml"
- ".github/workflows/pr-test-jit-kernel.yml"
- "python/pyproject.toml"
- "python/sglang/jit_kernel/**"
sgl_kernel:
- ".github/workflows/pr-test-sgl-kernel.yml"
- "sgl-kernel/**/*.!(md|txt)"
# For /rerun-stage (workflow_dispatch with target_stage), dorny/paths-filter doesn't work
# correctly because it falls back to "last commit" detection which breaks for merge commits.
# Instead, we use the GitHub API to compare the PR commit against main.
- name: Detect file changes via API (for target_stage)
id: filter-api
if: inputs.target_stage && inputs.pr_head_sha
env:
GH_TOKEN: ${{ github.token }}
run: |
echo "Detecting file changes via GitHub API for target_stage mode..."
echo "PR head SHA: ${{ inputs.pr_head_sha }}"
# Get the list of changed files by comparing PR commit against main
# This correctly handles merge commits by looking at the actual PR diff
CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \
--jq '[.files[].filename] | .[]' 2>/dev/null || echo "")
if [ -z "$CHANGED_FILES" ]; then
echo "Warning: Could not fetch changed files from API, assuming no changes"
echo "sgl_kernel=false" >> $GITHUB_OUTPUT
echo "main_package=false" >> $GITHUB_OUTPUT
echo "jit_kernel=false" >> $GITHUB_OUTPUT
echo "multimodal_gen=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Changed files:"
echo "$CHANGED_FILES" | head -20
echo "..."
# Check for sgl-kernel changes
if echo "$CHANGED_FILES" | grep -qE "^(sgl-kernel/|\.github/workflows/pr-test-sgl-kernel\.yml)"; then
echo "sgl_kernel=true" >> $GITHUB_OUTPUT
echo "Detected sgl-kernel changes"
else
echo "sgl_kernel=false" >> $GITHUB_OUTPUT
fi
# Check for main_package changes (excluding multimodal_gen)
# Note: Need to filter out multimodal_gen before checking, not pipe grep -q output
MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml|\.github/workflows/pr-gate\.yml|\.github/actions/)" | grep -v "^python/sglang/multimodal_gen/" || true)
if [ -n "$MAIN_PKG_FILES" ]; then
echo "main_package=true" >> $GITHUB_OUTPUT
echo "Detected main_package changes"
else
echo "main_package=false" >> $GITHUB_OUTPUT
fi
# Check for jit_kernel changes
if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-jit-kernel\.yml)"; then
echo "jit_kernel=true" >> $GITHUB_OUTPUT
echo "Detected jit_kernel changes"
else
echo "jit_kernel=false" >> $GITHUB_OUTPUT
fi
# Check for multimodal_gen changes, including diffusion-specific jit_kernel coverage
if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/sglang/jit_kernel/diffusion/|python/sglang/jit_kernel/tests/diffusion/|python/sglang/jit_kernel/benchmark/diffusion/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-multimodal-gen\.yml)"; then
echo "multimodal_gen=true" >> $GITHUB_OUTPUT
echo "Detected multimodal_gen changes"
else
echo "multimodal_gen=false" >> $GITHUB_OUTPUT
fi
- name: Set max-parallel based on run type
id: set-parallel
env:
GH_TOKEN: ${{ github.token }}
run: |
# Scheduled runs and high-priority PRs get full parallelism
if [[ "${{ github.event_name }}" == "schedule" ]]; then
echo "max_parallel=14" >> $GITHUB_OUTPUT
echo "Scheduled run detected, setting max_parallel to 14"
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then
echo "max_parallel=14" >> $GITHUB_OUTPUT
echo "High priority PR detected, setting max_parallel to 14"
elif [[ -n "${{ inputs.target_stage }}" ]]; then
# /rerun-stage (workflow_dispatch): query PR labels via GitHub API
# Try SHA lookup first (fork PRs), fallback to branch name (non-fork PRs)
LABELS=""
PR_HEAD_SHA="${{ inputs.pr_head_sha }}"
if [[ -n "$PR_HEAD_SHA" ]]; then
LABELS=$(gh api "repos/${{ github.repository }}/commits/${PR_HEAD_SHA}/pulls" \
--jq '.[0].labels[].name' 2>/dev/null || true)
fi
if [[ -z "$LABELS" ]]; then
LABELS=$(gh pr list --head "${{ github.ref_name }}" --repo "${{ github.repository }}" \
--json labels --jq '.[0].labels[].name' 2>/dev/null || true)
fi
echo "PR labels: ${LABELS:-"(none)"}"
if echo "$LABELS" | grep -Fxq "high priority"; then
echo "max_parallel=14" >> $GITHUB_OUTPUT
echo "High priority PR detected via API (/rerun-stage), setting max_parallel to 14"
else
echo "max_parallel=3" >> $GITHUB_OUTPUT
echo "Using default max_parallel of 3 (/rerun-stage, no high priority label)"
fi
else
echo "max_parallel=3" >> $GITHUB_OUTPUT
echo "Using default max_parallel of 3"
fi
- name: Set B200 runner tag
id: set-runner
run: |
# Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode
# (target_stage skips wheel builds, so we can't use custom kernels)
# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}"
target_stage="${{ inputs.target_stage }}"
if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then
echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT
else
echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT
fi
- name: Enable retry for CI
id: set-retry
run: |
echo "enable_retry=true" >> $GITHUB_OUTPUT
echo "Retry logic enabled for CI"
- name: Set continue-on-error for full test runs
id: set-continue-on-error
run: |
if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then
echo "continue_on_error=true" >> $GITHUB_OUTPUT
echo "Full test run or force flag detected, enabling continue-on-error to run all tests"
else
echo "continue_on_error=false" >> $GITHUB_OUTPUT
echo "Filtered run, continue-on-error disabled"
fi
- name: Validate target_stage with kernel changes
# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
if: inputs.target_stage && (steps.filter-api.outputs.sgl_kernel == 'true' || steps.filter.outputs.sgl_kernel == 'true')
run: |
echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes."
echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files."
echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds."
echo ""
echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes."
echo ""
echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels."
echo "The /rerun-stage command skips the wheel build job, so the test would run against"
echo "the wrong (PyPI) version of sgl-kernel instead of your changes."
echo ""
echo "To properly test your kernel changes, use one of these commands instead:"
echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds"
echo " /rerun-ci - Re-run the full workflow"
echo ""
exit 1
- name: Show filter results in summary (table)
run: |
{
echo "## Change Detection"
echo ""
echo "| Component | Changed |"
echo "|-------------------|---------|"
echo "| main_package | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |"
echo "| sgl_kernel (raw) | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |"
echo "| sgl_kernel (used) | ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |"
echo "| jit_kernel | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |"
echo "| multimodal_gen | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |"
echo "| target_stage | ${{ inputs.target_stage || '(none)' }} |"
echo "| detection_method | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |"
echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |"
echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |"
echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |"
echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |"
} >> $GITHUB_STEP_SUMMARY
# =============================================== Wait Jobs for Sequential PR Execution ====================================================
# These jobs poll GitHub API to wait for previous stages to complete.
# For PR runs: wait jobs run and enforce sequential execution via polling.
# For scheduled runs: wait jobs are skipped, enabling parallel execution for easier retry.
wait-for-stage-a:
needs: [check-changes, call-gate]
# Only run for PRs (not scheduled) and when not targeting a specific stage
# Skip if call-gate failed (stage-a jobs will be skipped, nothing to wait for)
# !cancelled() ensures this job respects workflow cancellation from concurrency group
if: |
always() &&
!cancelled() &&
github.event_name == 'pull_request' &&
!inputs.target_stage &&
inputs.test_parallel_dispatch != true &&
(needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
(needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
runs-on: ubuntu-latest
outputs:
stage_a_result: ${{ steps.wait.outputs.result }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/check-maintenance
- uses: ./.github/actions/wait-for-jobs
id: wait
with:
stage-name: stage-a
jobs: '["stage-a-test-1-gpu-small", "stage-a-test-cpu"]'
max-wait-minutes: '240'
wait-for-stage-b:
needs: [check-changes, call-gate, wait-for-stage-a]
# Only run for PRs (not scheduled) and when not targeting a specific stage
# Skip if call-gate failed (stage-b jobs will be skipped, nothing to wait for)
if: |
always() &&
!cancelled() &&
github.event_name == 'pull_request' &&
!inputs.target_stage &&
inputs.test_parallel_dispatch != true &&
(needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
(needs.wait-for-stage-a.result == 'success' || needs.wait-for-stage-a.result == 'skipped') &&
(needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
runs-on: ubuntu-latest
outputs:
stage_b_result: ${{ steps.wait.outputs.result }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/check-maintenance
- uses: ./.github/actions/wait-for-jobs
id: wait
with:
stage-name: stage-b
jobs: |
[
{"prefix": "stage-b-test-1-gpu-small", "expected_count": 8},
{"prefix": "stage-b-test-1-gpu-large", "expected_count": 14},
{"prefix": "stage-b-test-2-gpu-large", "expected_count": 4},
{"prefix": "stage-b-test-4-gpu-b200", "expected_count": 1}
]
max-wait-minutes: '480'
# =============================================== PR Gate ====================================================
call-gate:
needs: check-changes
# Skip for scheduled runs (they run all tests) and when target_stage is specified
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
(
needs.check-changes.outputs.main_package == 'true' ||
needs.check-changes.outputs.sgl_kernel == 'true' ||
needs.check-changes.outputs.jit_kernel == 'true' ||
needs.check-changes.outputs.multimodal_gen == 'true'
)
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
# =============================================== sgl-kernel ====================================================
sgl-kernel-build-wheels:
needs: [check-changes, call-gate]
# Skip for scheduled runs (they run stages independently) and when target_stage is set
if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: x64-kernel-build-node
timeout-minutes: 240
strategy:
matrix:
include:
- python-version: "3.10"
cuda-version: "12.9"
# Add back when CUDA 13.0 is supported on CI
# - python-version: "3.10"
# cuda-version: "13.0"
name: Build Wheel
steps:
- name: Cleanup
run: |
sudo rm -rf $GITHUB_WORKSPACE/* || true
- uses: actions/checkout@v4
with:
submodules: "recursive"
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-maintenance
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
env:
USE_CCACHE: 1
- name: Verify wheel artifacts
run: |
ls -alh sgl-kernel/dist
ls -alh sgl-kernel/dist/*.whl
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
if-no-files-found: error
sgl-kernel-build-wheels-arm:
needs: [check-changes, call-gate]
# Skip for scheduled runs (they run stages independently) and when target_stage is set
if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: arm-kernel-build-node
timeout-minutes: 240
strategy:
matrix:
include:
- python-version: "3.10"
cuda-version: "12.9"
name: Build Wheel Arm
steps:
- name: Cleanup
run: |
if [ -d "$GITHUB_WORKSPACE" ]; then
sudo rm -rf "$GITHUB_WORKSPACE"/* || true
else
echo "$GITHUB_WORKSPACE does not exist, nothing to clean"
fi
- uses: actions/checkout@v4
with:
submodules: "recursive"
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-maintenance
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
env:
USE_CCACHE: 1
- name: Verify wheel artifacts
run: |
ls -alh sgl-kernel/dist
ls -alh sgl-kernel/dist/*.whl
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
path: sgl-kernel/dist/*
if-no-files-found: error
call-sgl-kernel-tests:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
needs.check-changes.outputs.sgl_kernel == 'true'
uses: ./.github/workflows/pr-test-sgl-kernel.yml
with:
sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
b200_runner: ${{ needs.check-changes.outputs.b200_runner }}
pr_head_sha: ${{ inputs.pr_head_sha || '' }}
git_ref: ${{ inputs.git_ref || '' }}
skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
secrets: inherit
# =============================================== jit-kernel ====================================================
call-jit-kernel-tests:
needs: [check-changes, call-gate]
if: needs.check-changes.outputs.jit_kernel == 'true'
uses: ./.github/workflows/pr-test-jit-kernel.yml
with:
jit_kernel: ${{ needs.check-changes.outputs.jit_kernel }}
pr_head_sha: ${{ inputs.pr_head_sha || '' }}
git_ref: ${{ inputs.git_ref || '' }}
target_stage: ${{ inputs.target_stage || '' }}
test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }}
skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }}
secrets: inherit
# =============================================== primary ====================================================
# Runs on 5090 (32GB, SM120)
stage-a-test-1-gpu-small:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-a-test-1-gpu-small') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-5090
timeout-minutes: 240
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 10
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-a-test-1-gpu-small $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
stage-a-test-cpu:
needs: [check-changes, call-gate]
if: |
always() &&
(
(inputs.target_stage == 'stage-a-test-cpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
(needs.check-changes.outputs.main_package == 'true')
)
)
runs-on: ubuntu-latest
timeout-minutes: 240
steps:
- name: Free disk space
run: |
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
df -h
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install uv
uses: astral-sh/setup-uv@v5
# uv pip targets a venv by default; setup-python has no venv — install into that interpreter (see UV_SYSTEM_PYTHON in https://docs.astral.sh/uv/guides/integration/github/)
- name: Install dependencies
timeout-minutes: 20
env:
UV_SYSTEM_PYTHON: "1"
run: |
uv pip install -e "python[dev]" --index-strategy unsafe-best-match --prerelease allow
- name: Run test
timeout-minutes: 10
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test/
python3 run_suite.py --hw cpu --suite stage-a-test-cpu $CONTINUE_ON_ERROR_FLAG
# Runs on 5090 (32GB, SM120)
stage-b-test-1-gpu-small:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-1-gpu-small') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-5090
timeout-minutes: 240
strategy:
fail-fast: false
max-parallel: 8
matrix:
partition: [0, 1, 2, 3, 4, 5, 6, 7]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
source /etc/profile.d/sglang-ci.sh
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e . --no-build-isolation
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
source /etc/profile.d/sglang-ci.sh
cd test/
python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-small --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.partition }}
# Runs on H100 (80GB, SM90) - tests that don't pass on 5090 (FA3, FP8, high VRAM, etc.)
stage-b-test-1-gpu-large:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-1-gpu-large') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-h100
timeout-minutes: 240
strategy:
fail-fast: false
max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
matrix:
partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.partition }}
stage-b-test-2-gpu-large:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-2-gpu-large') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 2-gpu-h100
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
partition: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e . --no-build-isolation
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-test-2-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.partition }}
stage-b-test-4-gpu-b200:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-4-gpu-b200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
timeout-minutes: 240
strategy:
fail-fast: false
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v6
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG
- name: Run FA4 jit_kernel tests (SM100+)
timeout-minutes: 10
run: |
python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
call-multimodal-gen-tests:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: |
always() &&
!cancelled() &&
(
inputs.target_stage == 'multimodal-gen-test-1-gpu' ||
inputs.target_stage == 'multimodal-gen-test-2-gpu' ||
inputs.target_stage == 'multimodal-gen-unit-test' ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
needs.check-changes.outputs.multimodal_gen == 'true'
)
)
uses: ./.github/workflows/pr-test-multimodal-gen.yml
with:
multimodal_gen: ${{ needs.check-changes.outputs.multimodal_gen }}
sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }}
continue_on_error: ${{ needs.check-changes.outputs.continue_on_error }}
pr_head_sha: ${{ inputs.pr_head_sha || '' }}
git_ref: ${{ inputs.git_ref || '' }}
target_stage: ${{ inputs.target_stage || '' }}
test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }}
caller_needs_failure: ${{ (needs.call-gate.result == 'failure' || needs.sgl-kernel-build-wheels.result == 'failure' || needs.check-changes.result == 'failure') && 'true' || 'false' }}
skip_stage_health_check: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
secrets: inherit
stage-c-test-4-gpu-h100:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-4-gpu-h100') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 4-gpu-h100
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
part: [0, 1, 2]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
stage-c-test-8-gpu-h200:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-8-gpu-h200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 8-gpu-h200
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
part: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Warmup DeepGEMM JIT Compilation
timeout-minutes: 25
run: |
python3 scripts/ci/cuda/warmup_deep_gemm.py \
deepseek-ai/DeepSeek-V3-0324:8 \
deepseek-ai/DeepSeek-V3.2-Exp:8
- name: Warmup Server CUDA Graphs
timeout-minutes: 25
run: |
python3 scripts/ci/cuda/warmup_server.py \
deepseek-ai/DeepSeek-V3-0324:8 \
inclusionAI/Ring-2.5-1T:8
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
stage-c-test-8-gpu-h20:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-8-gpu-h20') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 8-gpu-h20
timeout-minutes: 240
env:
SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
strategy:
fail-fast: false
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
stage-c-test-deepep-4-gpu-h100:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-deepep-4-gpu-h100') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 4-gpu-h100
timeout-minutes: 240
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
- name: Warmup DeepGEMM JIT Compilation
timeout-minutes: 25
run: |
python3 scripts/ci/cuda/warmup_deep_gemm.py \
lmsys/sglang-ci-dsv3-test:4
- name: Warmup Server CUDA Graphs
timeout-minutes: 25
run: |
python3 scripts/ci/cuda/warmup_server.py \
lmsys/sglang-ci-dsv3-test:4
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu-h100 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
stage-c-test-deepep-8-gpu-h200:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 8-gpu-h200
timeout-minutes: 240
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
- name: Warmup DeepGEMM JIT Compilation
timeout-minutes: 25
run: |
python3 scripts/ci/cuda/warmup_deep_gemm.py \
deepseek-ai/DeepSeek-V3-0324:8 \
deepseek-ai/DeepSeek-V3.2-Exp:8
- name: Warmup Server CUDA Graphs
timeout-minutes: 25
run: |
python3 scripts/ci/cuda/warmup_server.py \
deepseek-ai/DeepSeek-V3-0324:8
- name: Run test
timeout-minutes: 45
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
stage-c-test-4-gpu-b200:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-4-gpu-b200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
part: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v6
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
env:
CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
# NOTE: GB200 stage temporarily disabled — no company-owned GB200 runner available yet.
# Re-enable when a 4-gpu-gb200 runner is provisioned.
# stage-c-test-4-gpu-gb200:
# needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels-arm]
# if: |
# always() &&
# (
# (inputs.target_stage == 'stage-c-test-4-gpu-gb200') ||
# (
# !inputs.target_stage &&
# ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
# ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
# )
# )
# runs-on: 4-gpu-gb200
# timeout-minutes: 240
# strategy:
# fail-fast: false
# steps:
# - uses: ./.github/actions/check-maintenance
# with:
# github-token: ${{ github.token }}
#
# - name: Checkout code
# uses: actions/checkout@v4
# with:
# ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
#
# - name: Download artifacts
# if: needs.check-changes.outputs.sgl_kernel == 'true'
# uses: actions/download-artifact@v4
# with:
# path: sgl-kernel/dist/
# merge-multiple: true
# pattern: wheel-python3.10-cuda12.9-aarch64
#
# - name: Install dependencies
# timeout-minutes: 20
# run: |
# CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} GRACE_BLACKWELL=1 bash scripts/ci/cuda/ci_install_deepep.sh
#
# - name: Run test
# timeout-minutes: 45
# env:
# CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
# run: |
# cd test
# python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG
#
# - uses: ./.github/actions/upload-cuda-coredumps
# if: always()
pr-test-finish:
needs:
[
call-gate,
check-changes,
sgl-kernel-build-wheels,
sgl-kernel-build-wheels-arm,
call-sgl-kernel-tests,
wait-for-stage-a,
wait-for-stage-b,
call-jit-kernel-tests,
call-multimodal-gen-tests,
stage-a-test-1-gpu-small,
stage-a-test-cpu,
stage-b-test-1-gpu-small,
stage-b-test-1-gpu-large,
stage-b-test-2-gpu-large,
stage-b-test-4-gpu-b200,
stage-c-test-4-gpu-h100,
stage-c-test-8-gpu-h20,
stage-c-test-8-gpu-h200,
stage-c-test-deepep-4-gpu-h100,
stage-c-test-deepep-8-gpu-h200,
stage-c-test-4-gpu-b200,
# stage-c-test-4-gpu-gb200, # Temporarily disabled — no GB200 runner
]
if: always()
runs-on: ubuntu-latest
steps:
- name: Check all dependent job statuses
run: |
# Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
# Get a list of all job names from the JSON keys
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
for job in $job_names; do
# For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
# Print the job name and its result
echo "$job: $result"
# Check for failure or cancellation and exit if found
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "The above jobs failed."
exit 1
fi
done
# If the loop completes, all jobs were successful
echo "All jobs completed successfully"
exit 0
pr-test-amd matrix .github/workflows/pr-test-amd.yml
View raw YAML
name: PR Test (AMD)
# Dynamic run-name for /rerun-stage commands to enable URL lookup
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
run-name: ${{ (inputs.target_stage || inputs.target_stage_select) && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage || inputs.target_stage_select, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage || inputs.target_stage_select)) || '' }}
on:
push:
branches: [ main ]
paths:
- "python/**"
- "scripts/ci/**"
- "test/**"
- "sgl-kernel/**"
- ".github/workflows/pr-test-amd.yml"
- "docker/rocm.Dockerfile"
pull_request:
branches: [ main ]
paths:
- "python/**"
- "scripts/ci/**"
- "test/**"
- "sgl-kernel/**"
- ".github/workflows/pr-test-amd.yml"
- "docker/rocm.Dockerfile"
workflow_dispatch:
inputs:
target_stage_select:
description: "Select a stage to run from dropdown (leave empty for auto-detect)"
required: false
type: choice
default: ''
options:
- ''
- sgl-kernel-unit-test-amd
- sgl-kernel-unit-test-2-gpu-amd
- stage-a-test-1-gpu-small-amd
- jit-kernel-unit-test-amd
- stage-b-test-1-gpu-small-amd
- stage-b-test-1-gpu-small-amd-nondeterministic
- stage-b-test-1-gpu-small-amd-mi35x
- stage-b-test-1-gpu-large-amd
- stage-b-test-2-gpu-large-amd
- multimodal-gen-test-1-gpu-amd
- multimodal-gen-test-2-gpu-amd
- stage-c-test-4-gpu-amd
- stage-c-test-large-8-gpu-amd
- stage-c-test-large-8-gpu-amd-mi35x
- stage-b-test-large-8-gpu-disaggregation-amd
target_stage:
description: "Or type comma-separated stage names (overrides dropdown if non-empty)"
required: false
type: string
default: ""
pr_head_sha:
description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
required: false
type: string
default: ""
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: false
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: false
env:
AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}
concurrency:
# When called via workflow_call with run_all_tests=true, use a unique group per run to
# avoid collisions with direct push/PR triggers. We use run_all_tests (not github.event_name)
# to detect this, because github.event_name inherits from the caller in workflow_call.
group: pr-test-amd-${{ inputs.run_all_tests && format('full-{0}', github.run_id) || inputs.pr_head_sha || inputs.ref || github.ref }}
cancel-in-progress: ${{ !inputs.run_all_tests && github.event_name != 'workflow_call' }}
jobs:
call-gate:
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
check-changes:
needs: [call-gate]
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
sgl_kernel: ${{ steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}
jit_kernel: ${{ steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
if: steps.run-mode.outputs.run_all_tests != 'true'
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
- "python/pyproject_rocm.toml"
- "python/pyproject_other.toml"
- "scripts/ci/amd/*"
- "scripts/ci/utils/*"
- "test/**/!(*.md)"
- ".github/workflows/pr-test-amd.yml"
sgl_kernel:
- "sgl-kernel/**/*.!(md|txt)"
- ".github/workflows/pr-test-amd.yml"
jit_kernel:
- "python/sglang/jit_kernel/**"
- ".github/workflows/pr-test-amd.yml"
multimodal_gen:
- "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
- "python/sglang/cli/**"
- "python/sglang/jit_kernel/diffusion/**"
- "python/sglang/jit_kernel/tests/diffusion/**"
- "python/sglang/jit_kernel/benchmark/diffusion/**"
- "python/pyproject_rocm.toml"
- "python/pyproject_other.toml"
# =============================================== sgl-kernel ====================================================
sgl-kernel-unit-test-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.sgl_kernel == 'true'
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 14
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_topk.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_sigmoid.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_torch_defaults_reset.py
sgl-kernel-unit-test-2-gpu-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-2-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.sgl_kernel == 'true'
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-2gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 20
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_deterministic_custom_allreduce.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_nccl_allreduce_determinism.py
# =============================================== primary ====================================================
stage-a-test-1-gpu-small-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-a-test-1-gpu-small-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 10
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-a-test-1-gpu-small-amd ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
jit-kernel-unit-test-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',jit-kernel-unit-test-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.jit_kernel == 'true'
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run JIT kernel unit tests
timeout-minutes: 10
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout" python3 -m pytest -q python/sglang/jit_kernel/tests/test_store_cache.py
stage-b-test-1-gpu-small-amd:
needs: [check-changes, stage-a-test-1-gpu-small-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 14 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-1-gpu-small-amd-nondeterministic:
needs: [check-changes, stage-a-test-1-gpu-small-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-nondeterministic,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-nondeterministic --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-1-gpu-small-amd-mi35x:
needs: [check-changes, stage-a-test-1-gpu-small-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-mi35x,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-mi35x ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-1-gpu-large-amd:
needs: [check-changes, stage-a-test-1-gpu-small-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-large-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-2-gpu-large-amd:
needs: [check-changes, stage-a-test-1-gpu-small-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-2-gpu-large-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-2gpu-sglang]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-2-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
multimodal-gen-test-1-gpu-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-1-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.multimodal_gen == 'true'
)
)
strategy:
fail-fast: false
max-parallel: 1 # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
matrix:
runner: [linux-mi325-1gpu-sglang]
part: [0, 1, 2, 3] # 2 partitions: 11 tests ÷ 2 = ~5-6 tests each
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion
- name: Setup kernel caches
run: |
# Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
# This directory persists across container restarts on the self-hosted runner
docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub
# Clear pre-built AITER kernels from Docker image to avoid segfaults
# The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
echo "Clearing pre-built AITER kernels from Docker image..."
docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
echo "AITER kernels cleared - will be rebuilt on first use"
# Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
# This tells the test cleanup code to NOT delete downloaded models
if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
echo "Created .persistent_cache marker - HF cache will persist"
else
echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
fi
# Check MIOpen cache (VAE convolution kernels)
miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
echo "Found ${miopen_files} MIOpen cache files"
- name: Diagnose HF cache and system resources
run: |
echo "=== System Memory Status ==="
free -h
echo ""
echo "=== Disk Space ==="
df -h /home/runner/sgl-data 2>/dev/null || df -h
echo ""
echo "=== HF Cache Directory Structure ==="
docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
echo ""
echo "=== Checking for cached diffusion models (1-GPU tests) ==="
# Models used in 1-GPU tests: Wan2.1-T2V-1.3B, HunyuanVideo, Qwen-Image, FLUX.1, FLUX.2
for model in "Wan-AI--Wan2.1-T2V-1.3B-Diffusers" "tencent--HunyuanVideo" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev" "black-forest-labs--FLUX.2-dev"; do
cache_path="/sgl-data/hf-cache/hub/models--${model}"
if docker exec ci_sglang test -d "$cache_path"; then
size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
echo "✓ CACHED: $model ($size)"
else
echo "✗ NOT CACHED: $model"
fi
done
echo ""
echo "=== GPU Memory Status ==="
docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"
- name: Run diffusion server tests (1-GPU)
timeout-minutes: 90
run: |
# AMD CI: All 1-GPU tests except FLUX.2 (FLUX.1 covers same code path)
# Tests: T2V, T2I, I2V, LoRA
#
# HF download env vars:
# - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
# - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
docker exec \
-e SGLANG_E2E_TOLERANCE=0.3 \
-e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
-e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
-e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
-e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
-e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
-e AITER_JIT_DIR=/sgl-data/aiter-kernels \
-e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
-e HF_HUB_ENABLE_HF_TRANSFER=1 \
-e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
-w /sglang-checkout/python \
ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 4 \
-k "not flux_2"
# Post-test diagnostics
echo "=== Post-test System Memory Status ==="
free -h
multimodal-gen-test-2-gpu-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-2-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.multimodal_gen == 'true'
)
)
strategy:
fail-fast: false
max-parallel: 1 # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
matrix:
runner: [linux-mi325-2gpu-sglang]
part: [0, 1] # 2 partitions: 9 tests ÷ 2 = ~4-5 tests each
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion
- name: Setup kernel caches
run: |
# Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub
# Clear pre-built AITER kernels from Docker image to avoid segfaults
# The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
echo "Clearing pre-built AITER kernels from Docker image..."
docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
echo "AITER kernels cleared - will be rebuilt on first use"
# Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
# This tells the test cleanup code to NOT delete downloaded models
if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
echo "Created .persistent_cache marker - HF cache will persist"
else
echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
fi
# Check MIOpen cache (VAE convolution kernels)
miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
echo "Found ${miopen_files} MIOpen cache files"
- name: Diagnose HF cache and system resources
run: |
echo "=== System Memory Status ==="
free -h
echo ""
echo "=== Disk Space ==="
df -h /home/runner/sgl-data 2>/dev/null || df -h
echo ""
echo "=== HF Cache Directory Structure ==="
docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
echo ""
echo "=== Checking for cached diffusion models (2-GPU tests) ==="
# Models used in 2-GPU tests: Wan2.2-T2V-A14B, Wan2.1-T2V-14B, Qwen-Image, FLUX.1
for model in "Wan-AI--Wan2.2-T2V-A14B-Diffusers" "Wan-AI--Wan2.1-T2V-14B-Diffusers" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev"; do
cache_path="/sgl-data/hf-cache/hub/models--${model}"
if docker exec ci_sglang test -d "$cache_path"; then
size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
echo "✓ CACHED: $model ($size)"
else
echo "✗ NOT CACHED: $model"
fi
done
echo ""
echo "=== GPU Memory Status ==="
docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"
- name: Run diffusion server tests (2-GPU)
timeout-minutes: 80
run: |
# AMD CI: All 2-GPU tests including LoRA
# Tests: T2V, T2I, I2V, LoRA
#
# HF download env vars:
# - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
# - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
docker exec \
-e SGLANG_E2E_TOLERANCE=0.3 \
-e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
-e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
-e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
-e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
-e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
-e AITER_JIT_DIR=/sgl-data/aiter-kernels \
-e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
-e HF_HUB_ENABLE_HF_TRANSFER=1 \
-e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
-w /sglang-checkout/python \
ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2
# Post-test diagnostics
echo "=== Post-test System Memory Status ==="
free -h
stage-c-test-4-gpu-amd:
needs: [check-changes, call-gate, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-4-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-4gpu-sglang]
part: [0]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_NVLS_ENABLE=0 \
-e RCCL_MSCCL_ENABLE=0 \
-e SGLANG_USE_ROCM700A=1 \
-w "/sglang-checkout/test" \
python3 run_suite.py \
--hw amd \
--suite stage-c-test-4-gpu-amd \
--auto-partition-id ${{ matrix.part }} \
--auto-partition-size 1 \
--timeout-per-file 1800 \
--enable-retry \
--max-attempts 2 \
--retry-wait-seconds 120 \
--retry-timeout-increase 0 \
${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-c-test-large-8-gpu-amd:
needs: [check-changes, call-gate, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
env:
RUNNER_LABELS: linux-mi325-8gpu-sglang
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-8gpu-sglang]
part: [0, 1, 2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Test RCCL multi-GPU communication
timeout-minutes: 5
run: |
echo "Testing RCCL multi-GPU communication with debug info..."
docker exec ci_sglang bash -c "cd /sglang-checkout && NCCL_DEBUG=INFO RCCL_DEBUG=INFO torchrun --nproc_per_node=8 scripts/ci/amd/test_rccl_multi_gpu.py"
- name: Run test
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-c-test-large-8-gpu-amd-mi35x:
needs: [check-changes, call-gate, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd-mi35x,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-8]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd-mi35x --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
# =============================================== Disaggregation ====================================================
stage-b-test-large-8-gpu-35x-disaggregation-amd:
needs: [check-changes, stage-a-test-1-gpu-small-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-large-8-gpu-disaggregation-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-8.fabric]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Check Host RDMA Environment
id: rdma_detect
run: |
set +e
echo "=== Checking Host RDMA Environment ==="
echo ""
echo "=== 1. Ionic driver library check ==="
ls -l /usr/lib/x86_64-linux-gnu/libibverbs/libionic* 2>/dev/null || echo "libionic not found in standard path"
echo ""
echo "=== 2. Infiniband devices ==="
ls -la /dev/infiniband/ 2>/dev/null || echo "/dev/infiniband not found"
ls -la /sys/class/infiniband/ 2>/dev/null || echo "/sys/class/infiniband not found"
echo ""
echo "=== 3. ibv_devinfo ==="
which ibv_devinfo 2>/dev/null && ibv_devinfo 2>&1 || echo "ibv_devinfo not available"
echo ""
echo "=== 4. Kernel modules ==="
lsmod 2>/dev/null | grep -E "ib_|rdma|ionic" || echo "No RDMA kernel modules loaded"
echo ""
echo "=== 5. Detect RDMA Devices for test environment ==="
if [ -d "/sys/class/infiniband" ]; then
RDMA_DEVS=$(ls /sys/class/infiniband | paste -sd "," -)
echo "Detected RDMA Devices: $RDMA_DEVS"
echo "SGLANG_TEST_RDMA_DEVICE=$RDMA_DEVS" >> $GITHUB_ENV
else
echo "No RDMA devices found in /sys/class/infiniband"
echo "SGLANG_TEST_RDMA_DEVICE=" >> $GITHUB_ENV
fi
echo ""
echo "=== Host RDMA Check Complete ==="
- name: Start Special Container
run: bash scripts/ci/amd/amd_ci_start_container_disagg.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Verify RDMA in Container
run: |
docker exec -u root ci_sglang bash -c '
echo "=== Container RDMA Verification ==="
echo "Device nodes:"
ls -la /dev/infiniband/
echo ""
echo "Provider libraries:"
ls /usr/lib/x86_64-linux-gnu/libibverbs/ | grep -E "ionic|mlx" || echo "No Ionic/Mellanox providers"
echo ""
echo "HCA devices:"
HCA_COUNT=$(ibv_devinfo -list 2>&1 | grep -oE "^[0-9]+ HCAs? found" | grep -oE "^[0-9]+" || echo "0")
ibv_devinfo -list
if [ "$HCA_COUNT" -gt 0 ]; then
echo ""
echo "=== SUCCESS: RDMA setup complete. Found $HCA_COUNT HCA(s) ==="
else
echo ""
echo "=== WARNING: No HCAs detected. RDMA tests may fail ==="
fi
'
- name: Run Aiter Op Test (RMSNorm)
timeout-minutes: 10
run: |
echo "Running pre-check: test_rmsnorm2d.py"
docker exec \
-e MAX_JOBS=192 \
ci_sglang \
python /sgl-workspace/aiter/op_tests/test_rmsnorm2d.py
- name: Run test_disaggregation
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh \
-e SGLANG_TEST_RDMA_DEVICE="${{ env.SGLANG_TEST_RDMA_DEVICE }}" \
-w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-8-gpu-35x-disaggregation-amd --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
pr-test-amd-finish:
needs:
[
call-gate,
check-changes,
sgl-kernel-unit-test-amd,
sgl-kernel-unit-test-2-gpu-amd,
multimodal-gen-test-1-gpu-amd,
multimodal-gen-test-2-gpu-amd,
stage-a-test-1-gpu-small-amd,
jit-kernel-unit-test-amd,
stage-b-test-1-gpu-small-amd,
stage-b-test-1-gpu-small-amd-nondeterministic,
stage-b-test-1-gpu-small-amd-mi35x,
stage-b-test-1-gpu-large-amd,
stage-b-test-2-gpu-large-amd,
stage-b-test-large-8-gpu-35x-disaggregation-amd,
stage-c-test-4-gpu-amd,
stage-c-test-large-8-gpu-amd,
stage-c-test-large-8-gpu-amd-mi35x,
]
if: always()
runs-on: ubuntu-latest
steps:
- name: Check all dependent job statuses
run: |
# Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
# Get a list of all job names from the JSON keys
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
for job in $job_names; do
# For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
# Print the job name and its result
echo "$job: $result"
# Check for failure or cancellation and exit if found
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "The above jobs failed."
exit 1
fi
done
# If the loop completes, all jobs were successful
echo "All jobs completed successfully"
exit 0
pr-test-amd-rocm720 matrix .github/workflows/pr-test-amd-rocm720.yml
View raw YAML
name: PR Test ROCm 7.2 (AMD)
# Dynamic run-name for /rerun-stage commands to enable URL lookup
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
run-name: ${{ (inputs.target_stage || inputs.target_stage_select) && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage || inputs.target_stage_select, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage || inputs.target_stage_select)) || '' }}
on:
schedule:
- cron: '30 17 * * *'
# push:
# branches: [ main ]
# paths:
# - "python/**"
# - "scripts/ci/**"
# - "test/**"
# - "sgl-kernel/**"
# - ".github/workflows/pr-test-amd-rocm720.yml"
# - "docker/rocm.Dockerfile"
# pull_request:
# branches: [ main ]
# paths:
# - "python/**"
# - "scripts/ci/**"
# - "test/**"
# - "sgl-kernel/**"
# - ".github/workflows/pr-test-amd-rocm720.yml"
# - "docker/rocm.Dockerfile"
workflow_dispatch:
inputs:
target_stage_select:
description: "Select a stage to run from dropdown (leave empty for auto-detect)"
required: false
type: choice
default: ''
options:
- ''
- sgl-kernel-unit-test-amd
- sgl-kernel-unit-test-2-gpu-amd
- stage-a-test-1-gpu-small-amd
- jit-kernel-unit-test-amd
- stage-b-test-1-gpu-small-amd
- stage-b-test-1-gpu-small-amd-nondeterministic
- stage-b-test-1-gpu-small-amd-mi35x
- stage-b-test-1-gpu-large-amd
- stage-b-test-2-gpu-large-amd
- multimodal-gen-test-1-gpu-amd
- multimodal-gen-test-2-gpu-amd
- stage-c-test-large-8-gpu-amd
- stage-c-test-large-8-gpu-amd-mi35x
- stage-b-test-large-8-gpu-disaggregation-amd
- stage-c-test-4-gpu-amd
target_stage:
description: "Or type comma-separated stage names (overrides dropdown if non-empty)"
required: false
type: string
default: ""
pr_head_sha:
description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
required: false
type: string
default: ""
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
aiter_ref:
description: 'Override AITER commit (optional, leave empty to use Dockerfile default)'
required: false
type: string
default: ''
continue_on_error:
description: 'Continue on error (do not fail the workflow on test failures)'
required: false
type: boolean
default: true
env:
AITER_COMMIT_OVERRIDE: ${{ inputs.aiter_ref }}
concurrency:
# When called via workflow_call with run_all_tests=true, use a unique group per run to
# avoid collisions with direct schedule/workflow_dispatch triggers. We use run_all_tests
# (not github.event_name) to detect this, because github.event_name inherits from the caller.
group: pr-test-amd-rocm720-${{ inputs.run_all_tests && format('full-{0}', github.run_id) || inputs.pr_head_sha || inputs.ref || github.ref }}
cancel-in-progress: ${{ !inputs.run_all_tests && github.event_name != 'workflow_call' }}
jobs:
call-gate:
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
check-changes:
needs: [call-gate]
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
sgl_kernel: ${{ steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}
jit_kernel: ${{ steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
if: steps.run-mode.outputs.run_all_tests != 'true'
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
- "python/pyproject_rocm.toml"
- "python/pyproject_other.toml"
- "scripts/ci/amd/*"
- "scripts/ci/utils/*"
- "test/**/!(*.md)"
- ".github/workflows/pr-test-amd-rocm720.yml"
sgl_kernel:
- "sgl-kernel/**/*.!(md|txt)"
- ".github/workflows/pr-test-amd-rocm720.yml"
jit_kernel:
- "python/sglang/jit_kernel/**"
- ".github/workflows/pr-test-amd-rocm720.yml"
multimodal_gen:
- "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
- "python/sglang/cli/**"
- "python/sglang/jit_kernel/diffusion/**"
- "python/sglang/jit_kernel/tests/diffusion/**"
- "python/sglang/jit_kernel/benchmark/diffusion/**"
- "python/pyproject_rocm.toml"
- "python/pyproject_other.toml"
# =============================================== sgl-kernel ====================================================
sgl-kernel-unit-test-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.sgl_kernel == 'true'
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 14
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_topk.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_sigmoid.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_torch_defaults_reset.py
sgl-kernel-unit-test-2-gpu-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',sgl-kernel-unit-test-2-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.sgl_kernel == 'true'
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-2gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 20
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_deterministic_custom_allreduce.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_nccl_allreduce_determinism.py
# =============================================== primary ====================================================
stage-a-test-1-gpu-small-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-a-test-1-gpu-small-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 10
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-a-test-1-gpu-small-amd ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
jit-kernel-unit-test-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',jit-kernel-unit-test-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
needs.check-changes.outputs.jit_kernel == 'true'
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run JIT kernel unit tests
timeout-minutes: 10
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout" python3 -m pytest -q python/sglang/jit_kernel/tests/test_store_cache.py
stage-b-test-1-gpu-small-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 14 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-1-gpu-small-amd-nondeterministic:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-nondeterministic,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-nondeterministic --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-1-gpu-small-amd-mi35x:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-small-amd-mi35x,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-small-amd-mi35x ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-1-gpu-large-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-1-gpu-large-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-1gpu-sglang]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-1-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-b-test-2-gpu-large-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-2-gpu-large-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-2gpu-sglang]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-2-gpu-large-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
multimodal-gen-test-1-gpu-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-1-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
max-parallel: 1 # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
matrix:
runner: [linux-mi325-1gpu-sglang]
part: [0, 1, 2, 3]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion
docker exec ci_sglang pip install amdsmi
- name: Setup kernel caches
run: |
# Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
# This directory persists across container restarts on the self-hosted runner
docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub
# Clear pre-built AITER kernels from Docker image to avoid segfaults
# The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
echo "Clearing pre-built AITER kernels from Docker image..."
docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
echo "AITER kernels cleared - will be rebuilt on first use"
# Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
# This tells the test cleanup code to NOT delete downloaded models
if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
echo "Created .persistent_cache marker - HF cache will persist"
else
echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
fi
# Check MIOpen cache (VAE convolution kernels)
miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
echo "Found ${miopen_files} MIOpen cache files"
- name: Diagnose HF cache and system resources
run: |
echo "=== System Memory Status ==="
free -h
echo ""
echo "=== Disk Space ==="
df -h /home/runner/sgl-data 2>/dev/null || df -h
echo ""
echo "=== HF Cache Directory Structure ==="
docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
echo ""
echo "=== Checking for cached diffusion models (1-GPU tests) ==="
# Models used in 1-GPU tests: Wan2.1-T2V-1.3B, HunyuanVideo, Qwen-Image, FLUX.1, FLUX.2
for model in "Wan-AI--Wan2.1-T2V-1.3B-Diffusers" "tencent--HunyuanVideo" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev" "black-forest-labs--FLUX.2-dev"; do
cache_path="/sgl-data/hf-cache/hub/models--${model}"
if docker exec ci_sglang test -d "$cache_path"; then
size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
echo "✓ CACHED: $model ($size)"
else
echo "✗ NOT CACHED: $model"
fi
done
echo ""
echo "=== GPU Memory Status ==="
docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"
- name: Run diffusion server tests (1-GPU)
timeout-minutes: 60
run: |
# AMD CI: All 1-GPU tests except FLUX.2 (FLUX.1 covers same code path)
# Tests: T2V, T2I, I2V, LoRA
#
# HF download env vars:
# - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
# - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
docker exec \
-e SGLANG_E2E_TOLERANCE=0.3 \
-e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
-e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
-e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
-e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
-e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
-e AITER_JIT_DIR=/sgl-data/aiter-kernels \
-e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
-e HF_HUB_ENABLE_HF_TRANSFER=1 \
-e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
-w /sglang-checkout/python \
ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 4 \
-k "not flux_2"
# Post-test diagnostics
echo "=== Post-test System Memory Status ==="
free -h
multimodal-gen-test-2-gpu-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',multimodal-gen-test-2-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
max-parallel: 1 # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT
matrix:
runner: [linux-mi325-2gpu-sglang]
part: [0, 1] # 2 partitions: 9 tests ÷ 2 = ~4-5 tests each
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion
docker exec ci_sglang pip install amdsmi
- name: Setup kernel caches
run: |
# Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data)
docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub
# Clear pre-built AITER kernels from Docker image to avoid segfaults
# The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/
echo "Clearing pre-built AITER kernels from Docker image..."
docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true
docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true
echo "AITER kernels cleared - will be rebuilt on first use"
# Create persistent cache marker if /sgl-data is a real mount (not ephemeral)
# This tells the test cleanup code to NOT delete downloaded models
if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then
docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache
echo "Created .persistent_cache marker - HF cache will persist"
else
echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test"
fi
# Check MIOpen cache (VAE convolution kernels)
miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0")
echo "Found ${miopen_files} MIOpen cache files"
- name: Diagnose HF cache and system resources
run: |
echo "=== System Memory Status ==="
free -h
echo ""
echo "=== Disk Space ==="
df -h /home/runner/sgl-data 2>/dev/null || df -h
echo ""
echo "=== HF Cache Directory Structure ==="
docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found"
docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found"
echo ""
echo "=== Checking for cached diffusion models (2-GPU tests) ==="
# Models used in 2-GPU tests: Wan2.2-T2V-A14B, Wan2.1-T2V-14B, Qwen-Image, FLUX.1
for model in "Wan-AI--Wan2.2-T2V-A14B-Diffusers" "Wan-AI--Wan2.1-T2V-14B-Diffusers" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev"; do
cache_path="/sgl-data/hf-cache/hub/models--${model}"
if docker exec ci_sglang test -d "$cache_path"; then
size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1)
echo "✓ CACHED: $model ($size)"
else
echo "✗ NOT CACHED: $model"
fi
done
echo ""
echo "=== GPU Memory Status ==="
docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available"
- name: Run diffusion server tests (2-GPU)
timeout-minutes: 80
run: |
# AMD CI: All 2-GPU tests including LoRA
# Tests: T2V, T2I, I2V, LoRA
#
# HF download env vars:
# - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available)
# - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings
docker exec \
-e SGLANG_E2E_TOLERANCE=0.3 \
-e SGLANG_STAGE_TIME_TOLERANCE=0.2 \
-e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \
-e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \
-e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \
-e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \
-e AITER_JIT_DIR=/sgl-data/aiter-kernels \
-e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \
-e HF_HUB_ENABLE_HF_TRANSFER=1 \
-e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
-w /sglang-checkout/python \
ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2
# Post-test diagnostics
echo "=== Post-test System Memory Status ==="
free -h
stage-c-test-4-gpu-amd:
needs: [check-changes, stage-b-test-1-gpu-small-amd, stage-b-test-2-gpu-large-amd]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-4-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-4gpu-sglang]
part: [0]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_NVLS_ENABLE=0 \
-e RCCL_MSCCL_ENABLE=0 \
-e SGLANG_USE_ROCM700A=1 \
-w "/sglang-checkout/test" \
python3 run_suite.py \
--hw amd \
--suite stage-c-test-4-gpu-amd \
--auto-partition-id ${{ matrix.part }} \
--auto-partition-size 1 \
--timeout-per-file 1800 \
--enable-retry \
--max-attempts 2 \
--retry-wait-seconds 120 \
--retry-timeout-increase 0 \
${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-c-test-large-8-gpu-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
env:
RUNNER_LABELS: linux-mi325-8gpu-sglang
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-8gpu-sglang]
part: [0, 1, 2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Test RCCL multi-GPU communication
timeout-minutes: 5
run: |
echo "Testing RCCL multi-GPU communication with debug info..."
docker exec ci_sglang bash -c "cd /sglang-checkout && NCCL_DEBUG=INFO RCCL_DEBUG=INFO torchrun --nproc_per_node=8 scripts/ci/amd/test_rccl_multi_gpu.py"
- name: Run test
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
stage-c-test-large-8-gpu-amd-mi35x:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-c-test-large-8-gpu-amd-mi35x,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-8]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd-mi35x --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
# =============================================== Disaggregation ====================================================
stage-b-test-large-8-gpu-35x-disaggregation-amd:
needs: [check-changes]
if: |
always() &&
(
(contains(format(',{0},', inputs.target_stage || inputs.target_stage_select), ',stage-b-test-large-8-gpu-disaggregation-amd,')) ||
(
!(inputs.target_stage || inputs.target_stage_select) &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-8.fabric]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Ensure VRAM is clear
run: bash scripts/ci/amd/ensure_vram_clear.sh rocm
- name: Check Host RDMA Environment
id: rdma_detect
run: |
set +e
echo "=== Checking Host RDMA Environment ==="
echo ""
echo "=== 1. Ionic driver library check ==="
ls -l /usr/lib/x86_64-linux-gnu/libibverbs/libionic* 2>/dev/null || echo "libionic not found in standard path"
echo ""
echo "=== 2. Infiniband devices ==="
ls -la /dev/infiniband/ 2>/dev/null || echo "/dev/infiniband not found"
ls -la /sys/class/infiniband/ 2>/dev/null || echo "/sys/class/infiniband not found"
echo ""
echo "=== 3. ibv_devinfo ==="
which ibv_devinfo 2>/dev/null && ibv_devinfo 2>&1 || echo "ibv_devinfo not available"
echo ""
echo "=== 4. Kernel modules ==="
lsmod 2>/dev/null | grep -E "ib_|rdma|ionic" || echo "No RDMA kernel modules loaded"
echo ""
echo "=== 5. Detect RDMA Devices for test environment ==="
if [ -d "/sys/class/infiniband" ]; then
RDMA_DEVS=$(ls /sys/class/infiniband | paste -sd "," -)
echo "Detected RDMA Devices: $RDMA_DEVS"
echo "SGLANG_TEST_RDMA_DEVICE=$RDMA_DEVS" >> $GITHUB_ENV
else
echo "No RDMA devices found in /sys/class/infiniband"
echo "SGLANG_TEST_RDMA_DEVICE=" >> $GITHUB_ENV
fi
echo ""
echo "=== Host RDMA Check Complete ==="
- name: Start Special Container
run: bash scripts/ci/amd/amd_ci_start_container_disagg.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh
- name: Verify RDMA in Container
run: |
docker exec -u root ci_sglang bash -c '
echo "=== Container RDMA Verification ==="
echo "Device nodes:"
ls -la /dev/infiniband/
echo ""
echo "Provider libraries:"
ls /usr/lib/x86_64-linux-gnu/libibverbs/ | grep -E "ionic|mlx" || echo "No Ionic/Mellanox providers"
echo ""
echo "HCA devices:"
HCA_COUNT=$(ibv_devinfo -list 2>&1 | grep -oE "^[0-9]+ HCAs? found" | grep -oE "^[0-9]+" || echo "0")
ibv_devinfo -list
if [ "$HCA_COUNT" -gt 0 ]; then
echo ""
echo "=== SUCCESS: RDMA setup complete. Found $HCA_COUNT HCA(s) ==="
else
echo ""
echo "=== WARNING: No HCAs detected. RDMA tests may fail ==="
fi
'
- name: Run Aiter Op Test (RMSNorm)
timeout-minutes: 10
run: |
echo "Running pre-check: test_rmsnorm2d.py"
docker exec \
-e MAX_JOBS=192 \
ci_sglang \
python /sgl-workspace/aiter/op_tests/test_rmsnorm2d.py
- name: Run test_disaggregation
timeout-minutes: 60
run: |
bash scripts/ci/amd/amd_ci_exec.sh \
-e SGLANG_TEST_RDMA_DEVICE="${{ env.SGLANG_TEST_RDMA_DEVICE }}" \
-w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-8-gpu-35x-disaggregation-amd --timeout-per-file 1800 ${{ inputs.continue_on_error && '--continue-on-error' || '' }}
pr-test-amd-finish:
needs:
[
call-gate,
check-changes,
sgl-kernel-unit-test-amd,
sgl-kernel-unit-test-2-gpu-amd,
multimodal-gen-test-1-gpu-amd,
multimodal-gen-test-2-gpu-amd,
stage-a-test-1-gpu-small-amd,
jit-kernel-unit-test-amd,
stage-b-test-1-gpu-small-amd,
stage-b-test-1-gpu-small-amd-nondeterministic,
stage-b-test-1-gpu-small-amd-mi35x,
stage-b-test-1-gpu-large-amd,
stage-b-test-2-gpu-large-amd,
stage-b-test-large-8-gpu-35x-disaggregation-amd,
stage-c-test-4-gpu-amd,
stage-c-test-large-8-gpu-amd,
stage-c-test-large-8-gpu-amd-mi35x,
]
if: always()
runs-on: ubuntu-latest
steps:
- name: Check all dependent job statuses
run: |
# Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
# Get a list of all job names from the JSON keys
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
for job in $job_names; do
# For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
# Print the job name and its result
echo "$job: $result"
# Check for failure or cancellation and exit if found
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "The above jobs failed."
exit 1
fi
done
# If the loop completes, all jobs were successful
echo "All jobs completed successfully"
exit 0
pr-test-jit-kernel .github/workflows/pr-test-jit-kernel.yml
View raw YAML
name: PR Test - JIT Kernel
on:
workflow_call:
inputs:
jit_kernel:
required: true
type: string
pr_head_sha:
required: false
type: string
default: ''
git_ref:
required: false
type: string
default: ''
target_stage:
required: false
type: string
default: ''
test_parallel_dispatch:
required: false
type: string
default: 'false'
skip_stage_health_check:
required: false
type: boolean
default: false
# Workflow-level env is NOT inherited from the caller in reusable workflows (verified by CI test).
# The github context (including github.event_name) IS inherited from the caller.
env:
SGLANG_IS_IN_CI: true
SGLANG_CUDA_COREDUMP: "1"
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
jobs:
jit-kernel-unit-test:
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != 'true' &&
!inputs.target_stage
runs-on: 1-gpu-h100
timeout-minutes: 240
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
timeout-minutes: 20
run: |
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run test
timeout-minutes: 30
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-kernel-unit-1-gpu-large
jit-kernel-multigpu-unit-test:
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != 'true' &&
!inputs.target_stage
runs-on: 8-gpu-h200
timeout-minutes: 240
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
timeout-minutes: 20
run: |
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run multi-GPU test
timeout-minutes: 45
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-kernel-unit-8-gpu-h200
jit-kernel-benchmark-test:
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != 'true' &&
!inputs.target_stage
runs-on: 1-gpu-h100
timeout-minutes: 240
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
timeout-minutes: 20
run: |
bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run benchmark tests
timeout-minutes: 45
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-kernel-benchmark-1-gpu-large
pr-test-multimodal-gen matrix .github/workflows/pr-test-multimodal-gen.yml
View raw YAML
name: PR Test - Multimodal Gen
on:
workflow_call:
inputs:
multimodal_gen:
required: true
type: string
sgl_kernel:
required: true
type: string
continue_on_error:
required: false
type: string
default: 'false'
pr_head_sha:
required: false
type: string
default: ''
git_ref:
required: false
type: string
default: ''
target_stage:
required: false
type: string
default: ''
test_parallel_dispatch:
required: false
type: string
default: 'false'
caller_needs_failure:
required: false
type: string
default: 'false'
skip_stage_health_check:
required: false
type: string
default: 'false'
# Workflow-level env is NOT inherited from the caller in reusable workflows.
# The github context (including github.event_name) IS inherited from the caller.
env:
SGLANG_IS_IN_CI: true
SGLANG_CUDA_COREDUMP: "1"
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == 'true' }}
jobs:
multimodal-gen-test-1-gpu:
if: |
(inputs.target_stage == 'multimodal-gen-test-1-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (inputs.caller_needs_failure != 'true' && !cancelled())) &&
inputs.multimodal_gen == 'true'
)
runs-on: 1-gpu-h100
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: inputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run diffusion server tests
timeout-minutes: 240
env:
RUNAI_STREAMER_MEMORY_LIMIT: 0
CONTINUE_ON_ERROR_FLAG: ${{ inputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2 \
$CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
multimodal-gen-test-2-gpu:
if: |
(inputs.target_stage == 'multimodal-gen-test-2-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (inputs.caller_needs_failure != 'true' && !cancelled())) &&
inputs.multimodal_gen == 'true'
)
runs-on: 2-gpu-h100
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: inputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run diffusion server tests
timeout-minutes: 240
env:
RUNAI_STREAMER_MEMORY_LIMIT: 0
CONTINUE_ON_ERROR_FLAG: ${{ inputs.continue_on_error == 'true' && '--continue-on-error' || '' }}
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2 \
$CONTINUE_ON_ERROR_FLAG
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
with:
artifact-suffix: ${{ matrix.part }}
multimodal-gen-unit-test:
if: |
(inputs.target_stage == 'multimodal-gen-unit-test') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (inputs.caller_needs_failure != 'true' && !cancelled())) &&
inputs.multimodal_gen == 'true'
)
runs-on: 1-gpu-h100
timeout-minutes: 120
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Download artifacts
if: inputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run diffusion unit tests
timeout-minutes: 60
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py --suite unit
pr-test-npu matrix .github/workflows/pr-test-npu.yml
View raw YAML
name: PR Test (NPU)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
concurrency:
group: pr-test-npu-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
# ==================== Check Changes ==================== #
check-changes:
runs-on: ubuntu-latest
outputs:
changes_exist: ${{ steps.filter.outputs.main_package == 'true' || steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true'}}
main_package: ${{ steps.filter.outputs.main_package == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
if: steps.run-mode.outputs.run_all_tests != 'true'
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
- "python/pyproject_npu.toml"
- "scripts/ci/npu/npu_ci_install_dependency.sh"
- "test/srt/ascend/**"
- ".github/workflows/pr-test-npu.yml"
multimodal_gen:
- "python/sglang/multimodal_gen/**/*.!(md|ipynb)"
- "python/sglang/srt/**"
- "python/pyproject_npu.toml"
- "scripts/ci/npu/npu_ci_install_dependency.sh"
- ".github/workflows/pr-test-npu.yml"
# ==================== PR Gate ==================== #
pr-gate:
needs: check-changes
if: needs.check-changes.outputs.changes_exist == 'true'
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
per-commit-1-npu-a2:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a2-1
strategy:
fail-fast: false
matrix:
part: [ 0, 1 ]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run registered test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test
python3 run_suite.py --hw npu --suite per-commit-1-npu-a2 --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test/srt
python3 run_suite.py --suite per-commit-1-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
per-commit-2-npu-a2:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a2-2
strategy:
fail-fast: true
matrix:
part: [0, 1]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh 910b
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test/srt
python3 run_suite.py --suite per-commit-2-npu-a2 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
per-commit-4-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a3-4
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test/srt
python3 run_suite.py --suite per-commit-4-npu-a3 --timeout-per-file 3600
per-commit-16-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: linux-aarch64-a3-16
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd test/srt
python3 run_suite.py --suite per-commit-16-npu-a3 --timeout-per-file 3600
multimodal-gen-test-1-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.multimodal_gen == 'true'
runs-on: linux-aarch64-a3-2
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd python
python3 sglang/multimodal_gen/test/run_suite.py --suite 1-npu
multimodal-gen-test-2-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.multimodal_gen == 'true'
runs-on: linux-aarch64-a3-16
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
cd python
python3 sglang/multimodal_gen/test/run_suite.py --suite 2-npu
multimodal-gen-test-8-npu-a3:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.multimodal_gen == 'true'
runs-on: linux-aarch64-a3-16
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Mark repository safe
run: |
git config --system --add safe.directory ${GITHUB_WORKSPACE}
- name: Install dependencies
env:
TORCH_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/whl/cpu"
PYPI_CACHE_URL: "http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple"
GITHUB_PROXY_URL: "https://gh-proxy.test.osinfra.cn/"
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host "${CACHING_URL}"
bash scripts/ci/npu/npu_ci_install_dependency.sh a3 diffusion
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
cd python
python3 sglang/multimodal_gen/test/run_suite.py --suite 8-npu
pr-test-rust matrix .github/workflows/pr-test-rust.yml
View raw YAML
name: PR Test (SMG)
on:
push:
branches: [ main ]
paths:
- "sgl-model-gateway/**"
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened, labeled]
paths:
- "sgl-model-gateway/**"
workflow_dispatch:
concurrency:
group: gateway-tests-${{ github.ref }}
cancel-in-progress: true
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
SGLANG_IS_IN_CI: true
jobs:
build-wheel:
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
runs-on: 4-gpu-a10
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install rust dependencies
run: |
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
- name: Configure sccache
uses: mozilla-actions/sccache-action@v0.0.9
with:
version: "v0.12.0"
disable_annotations: true
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-model-gateway
shared-key: "rust-cache"
cache-all-crates: true
cache-on-failure: true
save-if: true
- name: Build python binding
run: |
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
cd sgl-model-gateway/bindings/python
python3 -m pip install --upgrade pip maturin
maturin build --profile ci --features vendored-openssl --out dist
- name: List built wheel
run: ls -lh sgl-model-gateway/bindings/python/dist/
- name: Upload wheel artifact
uses: actions/upload-artifact@v4
with:
name: smg-wheel
path: sgl-model-gateway/bindings/python/dist/*.whl
retention-days: 1
- name: Test wheel install
run: |
pip install sgl-model-gateway/bindings/python/dist/*.whl
python3 -c "import sglang_router; print('Python package: OK')"
python3 -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
python3 -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
python-unit-tests:
needs: build-wheel
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move sgl-model-gateway folder to root
run: |
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: smg-wheel
path: dist/
- name: Install wheel
run: pip install dist/*.whl
- name: Run Python unit tests
run: |
cd bindings/python
python3 -m pip install pytest pytest-cov pytest-xdist
pytest -q tests --cov=sglang_router --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80
unit-tests:
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
- name: Configure sccache
uses: mozilla-actions/sccache-action@v0.0.9
with:
version: "v0.12.0"
disable_annotations: true
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-model-gateway
shared-key: "rust-cache"
cache-all-crates: true
cache-on-failure: true
save-if: true
- name: Run lint
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
rustup component add clippy
cargo clippy --all-targets --all-features -- -D warnings
- name: Run fmt
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
rustup toolchain install nightly --profile minimal
cargo +nightly fmt -- --check
- name: Generate vision golden fixtures
run: |
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip install transformers pillow numpy scipy
pip install transformers pillow numpy
cd sgl-model-gateway/
python scripts/generate_vision_golden.py
- name: Run Rust tests
timeout-minutes: 20
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
cargo test
- name: Show sccache stats
if: always()
run: sccache --show-stats
gateway-e2e:
name: ${{ matrix.name }}
needs: build-wheel
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
strategy:
fail-fast: false
matrix:
include:
- name: benchmarks
timeout: 32
test_dirs: "e2e_test/benchmarks"
extra_deps: "genai-bench==0.0.3"
env_vars: ""
reruns: ""
upload_benchmarks: true
parallel_opts: "" # No parallel for benchmarks (performance measurement)
- name: responses
timeout: 45
test_dirs: "e2e_test/responses"
extra_deps: ""
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
reruns: "--reruns 2 --reruns-delay 5"
setup_oracle: true
setup_brave: true
parallel_opts: "" # Cloud backend tests not compatible with parallel execution
- name: e2e
timeout: 45
test_dirs: "e2e_test/router e2e_test/embeddings"
extra_deps: "pytest-parallel py" # py is required for pytest-parallel with newer pytest
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
reruns: "--reruns 2 --reruns-delay 5"
parallel_opts: "--workers 1 --tests-per-worker 4" # Thread-based parallelism
- name: chat-completions
timeout: 45
test_dirs: "e2e_test/chat_completions"
extra_deps: ""
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
reruns: "--reruns 2 --reruns-delay 5"
parallel_opts: ""
runs-on: 4-gpu-a10
timeout-minutes: ${{ matrix.timeout }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install SGLang dependencies
run: |
sudo --preserve-env=PATH bash scripts/ci/cuda/ci_install_dependency.sh
- name: Setup Oracle Instant Client
if: matrix.setup_oracle
run: |
sudo apt-get install -y unzip
INSTANT_CLIENT_DIR="/home/ubuntu/instant-client"
INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip"
if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then
echo "Downloading Oracle Instant Client..."
mkdir -p "$INSTANT_CLIENT_DIR"
cd "$INSTANT_CLIENT_DIR"
wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP
unzip $INSTANT_CLIENT_ZIP
rm $INSTANT_CLIENT_ZIP
else
echo "Oracle Instant Client already exists, skipping download"
fi
echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Start Oracle Database
if: matrix.setup_oracle
run: |
docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim
echo "Starting Oracle DB..."
# Export Oracle connection environment variables
echo "ATP_USER=system" >> $GITHUB_ENV
echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV
echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV
- name: Start Brave MCP Server
if: matrix.setup_brave
run: |
docker run -d --rm \
-p 8001:8080 \
-e BRAVE_API_KEY \
--name brave-search-server \
shoofio/brave-search-mcp-sse:1.0.10
echo "Starting Brave MCP Server..."
sleep 2
curl -f --max-time 1 http://localhost:8001/sse > /dev/null 2>&1 && echo "Brave MCP Server is healthy!" || echo "Brave MCP Server responded"
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: smg-wheel
path: wheel/
- name: Install wheel
run: |
pip uninstall -y sglang-router || true
pip install wheel/*.whl
- name: Install e2e test dependencies
run: |
python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy
if [ -n "${{ matrix.extra_deps }}" ]; then
python3 -m pip --no-cache-dir install --upgrade ${{ matrix.extra_deps }}
fi
- name: Run E2E tests
run: |
python3 python/sglang/cli/killall.py
cd sgl-model-gateway
${{ matrix.env_vars }} ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest ${{ matrix.reruns }} ${{ matrix.parallel_opts }} ${{ matrix.test_dirs }} -s -vv -o log_cli=true --log-cli-level=INFO
- name: Upload benchmark results
if: matrix.upload_benchmarks && success()
uses: actions/upload-artifact@v4
with:
name: genai-bench-results-all-policies
path: sgl-model-gateway/benchmark_**/
- name: Cleanup Brave MCP Server
if: always() && matrix.setup_brave
run: |
docker stop brave-search-server || true
docker rm brave-search-server || true
- name: Cleanup Oracle Database
if: always() && matrix.setup_oracle
run: |
docker stop oracle-db || true
docker rm oracle-db || true
docker-build-test:
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
runs-on: ubuntu-24.04
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image (no push)
uses: docker/build-push-action@v5
with:
context: .
file: docker/gateway.Dockerfile
push: false
tags: sgl-model-gateway:test
cache-from: type=gha
cache-to: type=gha,mode=max
finish:
needs: [build-wheel, python-unit-tests, unit-tests, gateway-e2e, docker-build-test]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."
summarize-benchmarks:
needs: gateway-e2e
runs-on: ubuntu-latest
if: success()
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download benchmark results
uses: actions/download-artifact@v4
with:
name: genai-bench-results-all-policies
- name: Create benchmark summary
run: python3 sgl-model-gateway/e2e_test/benchmarks/summarize.py .
pr-test-sgl-kernel .github/workflows/pr-test-sgl-kernel.yml
View raw YAML
name: PR Test - SGL Kernel
on:
workflow_call:
inputs:
sgl_kernel:
required: true
type: string
b200_runner:
required: true
type: string
pr_head_sha:
required: false
type: string
default: ''
git_ref:
required: false
type: string
default: ''
skip_stage_health_check:
required: false
type: boolean
default: false
# Workflow-level env is NOT inherited from the caller in reusable workflows.
# The github context (including github.event_name) IS inherited from the caller.
env:
SGLANG_IS_IN_CI: true
SGLANG_CUDA_COREDUMP: "1"
SGLANG_PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }}
jobs:
sgl-kernel-unit-test:
runs-on: 1-gpu-h100
timeout-minutes: 240
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run test
timeout-minutes: 30
run: |
cd sgl-kernel
pytest tests/
sgl-kernel-mla-test:
runs-on: 1-gpu-h100
timeout-minutes: 240
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/registered/mla
python3 test_mla_deepseek_v3.py
sgl-kernel-benchmark-test:
runs-on: 1-gpu-h100
timeout-minutes: 240
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run benchmark tests
timeout-minutes: 45
run: |
cd sgl-kernel/benchmark
echo "Running sgl-kernel benchmark tests in CI mode..."
echo "CI environment variable: $CI"
echo "GITHUB_ACTIONS environment variable: $GITHUB_ACTIONS"
for bench_file in bench_*.py; do
echo "Testing $bench_file..."
timeout 60 python3 "$bench_file" || echo "Warning: $bench_file timed out or failed, continuing..."
echo "Completed $bench_file"
echo "---"
done
echo "All benchmark tests completed!"
sgl-kernel-b200-test:
runs-on: ${{ inputs.b200_runner }}
timeout-minutes: 240
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
- uses: ./.github/actions/check-stage-health
- uses: ./.github/actions/check-maintenance
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run sgl-kernel unit tests on B200
timeout-minutes: 30
run: |
cd sgl-kernel
pytest tests/
# Adding a single CUDA13 smoke test to verify that the kernel builds and runs
# TODO: Add back this test when it can pass on CI
# cuda13-kernel-smoke-test:
# if: inputs.sgl_kernel == 'true'
# runs-on: x64-cu13-kernel-tests
# steps:
# - uses: actions/checkout@v4
# - name: Cleanup
# run: |
# ls -alh sgl-kernel/dist || true
# rm -rf sgl-kernel/dist/* || true
# - name: Download CUDA 13.0 artifacts
# uses: actions/download-artifact@v4
# with:
# path: sgl-kernel/dist/
# merge-multiple: true
# pattern: wheel-python3.10-cuda13.0
# - name: Install dependencies
# run: |
# CUSTOM_BUILD_SGL_KERNEL=${{inputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
# - name: Run kernel unit tests
# timeout-minutes: 30
# run: |
# cd sgl-kernel
# pytest tests/
pr-test-xeon matrix .github/workflows/pr-test-xeon.yml
View raw YAML
name: PR Test (Xeon)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
concurrency:
group: pr-test-xeon-${{ inputs.ref || github.ref }}
cancel-in-progress: false
jobs:
# ==================== Check Changes ==================== #
check-changes:
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
if: steps.run-mode.outputs.run_all_tests != 'true'
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
- "python/pyproject_cpu.toml"
- "test/**/!(*.md)"
- "sgl-kernel/**/*.!(md|txt)"
- ".github/workflows/pr-test-xeon.yml"
- "docker/xeon.Dockerfile"
# ==================== PR Gate ==================== #
pr-gate:
needs: check-changes
if: needs.check-changes.outputs.main_package == 'true'
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
build-test:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: xeon-gnr
env:
HF_HOME: /home/sdp/.cache/huggingface
strategy:
matrix:
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Build and Push
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
tag=v${version}-xeon
PR_REPO=${{ github.event.pull_request.head.repo.clone_url }}
PR_HEAD_REF=${{ github.head_ref }}
docker build \
${PR_REPO:+--build-arg SGLANG_REPO=$PR_REPO} \
${PR_HEAD_REF:+--build-arg VER_SGLANG=$PR_HEAD_REF} \
. -f docker/xeon.Dockerfile -t sglang_xeon --no-cache
- name: Run container
run: |
docker run -dt \
-v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \
-v ${HF_HOME}:/root/.cache/huggingface \
--name ci_sglang_xeon \
sglang_xeon
- name: Check AMX support
id: check_amx
timeout-minutes: 5
run: |
docker exec -w /sglang-checkout/ ci_sglang_xeon \
bash -c "source /opt/.venv/bin/activate && python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '"
- name: Run unit tests
timeout-minutes: 36
run: |
docker exec -w /sglang-checkout/ ci_sglang_xeon \
bash -c "source /opt/.venv/bin/activate && cd ./test/srt && python3 run_suite.py --suite per-commit-cpu --timeout-per-file 1500"
- name: Change permission
timeout-minutes: 2
run: |
docker exec -u root ci_sglang_xeon bash -c "
rm -rf /tmp/ci-home &&
chown -R $(id -u):$(id -g) /sglang-checkout/ 2>/dev/null || true
"
- name: Cleanup container
if: always()
run: |
docker rm -f ci_sglang_xeon || true
pr-test-xpu .github/workflows/pr-test-xpu.yml
View raw YAML
name: PR Test (XPU)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
concurrency:
group: pr-test-xpu-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
# ==================== Check Changes ==================== #
check-changes:
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
if: steps.run-mode.outputs.run_all_tests != 'true'
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**/!(*.md)"
- "python/pyproject_xpu.toml"
- "test/**/!(*.md)"
- "sgl-kernel/**/*.!(md|txt)"
- ".github/workflows/pr-test-xpu.yml"
- "docker/xpu.Dockerfile"
# ==================== PR Gate ==================== #
pr-gate:
needs: check-changes
if: needs.check-changes.outputs.main_package == 'true'
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
build-and-test:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: intel-bmg
env:
HF_HOME: /home/sdp/.cache/huggingface
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ inputs.ref || github.ref }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image
run: |
PR_REPO=${{ github.event.pull_request.head.repo.clone_url }}
PR_HEAD_REF=${{ github.head_ref }}
docker build \
${PR_REPO:+--build-arg SG_LANG_REPO=$PR_REPO} \
${PR_HEAD_REF:+--build-arg SG_LANG_BRANCH=$PR_HEAD_REF} \
--no-cache --progress=plain -f docker/xpu.Dockerfile -t xpu_sglang_main:bmg .
- name: Run container
id: start_container
run: |
container_id=$(docker run -dt \
--group-add 992 \
--group-add $(getent group video | cut -d: -f3) \
-v ${HF_HOME}:/root/.cache/huggingface \
--device /dev/dri \
-e HF_TOKEN="$(cat ~/huggingface_token.txt)" \
xpu_sglang_main:bmg)
echo "Started container: $container_id"
echo "container_id=$container_id" >> "$GITHUB_OUTPUT"
- name: Install Dependency
timeout-minutes: 20
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip uninstall -y flashinfer-python
docker exec "$cid" /bin/bash -c '/home/sdp/miniforge3/envs/py3.10/bin/hf auth login --token ${HF_TOKEN} '
- name: Run E2E Bfloat16 tests
timeout-minutes: 20
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker exec "$cid" bash -c "source /home/sdp/miniforge3/bin/activate && conda activate py3.10 && cd /home/sdp/sglang/test/srt && python3 run_suite.py --suite per-commit-xpu"
- name: Cleanup container
if: always()
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker rm -f "$cid" || true
finish:
if: always()
needs: [build-and-test, pr-gate]
runs-on: ubuntu-latest
steps:
- name: Check job status
run: |
result="${{ needs.build-and-test.result }}"
if [ "$result" != "success" ] && [ "$result" != "skipped" ]; then
echo "Job failed with result: $result"
exit 1
fi
echo "All jobs completed successfully (result: $result)"
exit 0
release-branch-cut perms .github/workflows/release-branch-cut.yml
View raw YAML
name: Release Branch Cut
on:
workflow_dispatch:
inputs:
branch_name:
description: 'Branch name to create (e.g., release/v0.5.7)'
required: true
type: string
commit_sha:
description: 'Commit SHA from main to cut the release branch from (defaults to latest main)'
required: false
type: string
default: ''
permissions:
actions: write
contents: write
issues: read
pull-requests: read
jobs:
cut-release-branch:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
environment: 'prod'
outputs:
branch_name: ${{ steps.set_output.outputs.branch_name }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: main
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Validate branch name
run: |
BRANCH_NAME="${{ github.event.inputs.branch_name }}"
if [ -z "$BRANCH_NAME" ]; then
echo "::error::Branch name is required"
exit 1
fi
# Validate branch name format (should start with release/)
if [[ ! "$BRANCH_NAME" =~ ^release/ ]]; then
echo "::warning::Branch name '$BRANCH_NAME' does not follow convention 'release/vX.Y.Z'"
fi
echo "Branch name: $BRANCH_NAME"
- name: Validate commit SHA
id: validate
run: |
COMMIT_SHA="${{ github.event.inputs.commit_sha }}"
# If no commit SHA provided, use latest main
if [ -z "$COMMIT_SHA" ]; then
COMMIT_SHA=$(git rev-parse HEAD)
echo "No commit SHA provided, using latest main: $COMMIT_SHA"
fi
# Verify the commit exists and is on main
if ! git cat-file -t "$COMMIT_SHA" > /dev/null 2>&1; then
echo "::error::Commit SHA '$COMMIT_SHA' does not exist"
exit 1
fi
# Check if commit is an ancestor of main (i.e., is on main branch)
if ! git merge-base --is-ancestor "$COMMIT_SHA" main; then
echo "::error::Commit SHA '$COMMIT_SHA' is not on the main branch"
exit 1
fi
echo "COMMIT_SHA=$COMMIT_SHA" >> $GITHUB_OUTPUT
echo "Validated commit SHA: $COMMIT_SHA"
- name: Check if branch already exists
run: |
BRANCH_NAME="${{ github.event.inputs.branch_name }}"
if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then
echo "::error::Branch '$BRANCH_NAME' already exists"
exit 1
fi
echo "Branch '$BRANCH_NAME' does not exist, proceeding with creation"
- name: Create release branch
id: set_output
run: |
COMMIT_SHA="${{ steps.validate.outputs.COMMIT_SHA }}"
BRANCH_NAME="${{ github.event.inputs.branch_name }}"
git config user.name "sglang-bot"
git config user.email "sglang-bot@users.noreply.github.com"
# Create branch from the specified commit
git checkout -b "$BRANCH_NAME" "$COMMIT_SHA"
echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT
echo "Successfully created branch '$BRANCH_NAME' from commit '$COMMIT_SHA'"
- name: Update version references in documentation
run: |
BRANCH_NAME="${{ github.event.inputs.branch_name }}"
# Extract version from branch name (e.g., release/v0.5.8 -> v0.5.8)
VERSION=$(echo "$BRANCH_NAME" | sed 's/release\///')
# Update git clone version references in docs
sed -i "s/git clone -b v[0-9]\+\.[0-9]\+\.[0-9]\+\.\?post\?[0-9]*/git clone -b $VERSION/" docs/get_started/install.md
sed -i "s/git clone -b v[0-9]\+\.[0-9]\+\.[0-9]\+\.\?post\?[0-9]*/git clone -b $VERSION/" docs/platforms/amd_gpu.md
# Check if any changes were made
if git diff --quiet; then
echo "No version references needed updating"
else
git add docs/get_started/install.md docs/platforms/amd_gpu.md
git commit -m "docs: update version references to $VERSION"
echo "Updated version references to $VERSION"
fi
- name: Push release branch
run: |
BRANCH_NAME="${{ steps.set_output.outputs.branch_name }}"
git push origin "$BRANCH_NAME"
echo "Successfully pushed branch '$BRANCH_NAME'"
- name: Summary
run: |
COMMIT_SHA="${{ steps.validate.outputs.COMMIT_SHA }}"
BRANCH_NAME="${{ github.event.inputs.branch_name }}"
echo "## Release Branch Cut Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Property | Value |" >> $GITHUB_STEP_SUMMARY
echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| Branch | \`$BRANCH_NAME\` |" >> $GITHUB_STEP_SUMMARY
echo "| Commit | \`$COMMIT_SHA\` |" >> $GITHUB_STEP_SUMMARY
echo "| Triggered by | @${{ github.actor }} |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Next Steps" >> $GITHUB_STEP_SUMMARY
echo "1. Tests are automatically triggered on the release branch" >> $GITHUB_STEP_SUMMARY
echo "2. Apply any hotfixes if needed" >> $GITHUB_STEP_SUMMARY
echo "3. Create a tag to trigger release: \`gh workflow run release-tag.yml -f version=X.Y.Z -f ref=$BRANCH_NAME\`" >> $GITHUB_STEP_SUMMARY
run-pr-tests-nvidia:
needs: cut-release-branch
uses: ./.github/workflows/pr-test.yml
with:
git_ref: ${{ needs.cut-release-branch.outputs.branch_name }}
run_all_tests: true
skip_stage_health_check: true
secrets: inherit
run-pr-tests-amd:
needs: cut-release-branch
uses: ./.github/workflows/pr-test-amd.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
run_all_tests: true
secrets: inherit
run-pr-test-npu:
needs: cut-release-branch
uses: ./.github/workflows/pr-test-npu.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
run_all_tests: true
secrets: inherit
run-pr-tests-xeon:
needs: cut-release-branch
uses: ./.github/workflows/pr-test-xeon.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
run_all_tests: true
secrets: inherit
run-pr-tests-xpu:
needs: cut-release-branch
uses: ./.github/workflows/pr-test-xpu.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
run_all_tests: true
secrets: inherit
run-nightly-tests-nvidia:
needs: cut-release-branch
uses: ./.github/workflows/nightly-test-nvidia.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
secrets: inherit
run-nightly-tests-amd:
needs: cut-release-branch
uses: ./.github/workflows/nightly-test-amd.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
secrets: inherit
run-nightly-tests-npu:
needs: cut-release-branch
uses: ./.github/workflows/nightly-test-npu.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
secrets: inherit
run-nightly-tests-intel:
needs: cut-release-branch
uses: ./.github/workflows/nightly-test-intel.yml
with:
ref: ${{ needs.cut-release-branch.outputs.branch_name }}
secrets: inherit
release-docker matrix .github/workflows/release-docker.yml
View raw YAML
name: Release Docker Images
#
# This workflow builds and publishes framework Docker images (full development environment):
# - lmsysorg/sglang:v{version}, lmsysorg/sglang:latest
# - lmsysorg/sglang:v{version}-cu130, lmsysorg/sglang:latest-cu130
#
on:
push:
tags:
- "v[0-9]+.*"
workflow_dispatch:
inputs:
version:
description: "Version to build (without v prefix, e.g., 0.5.7)"
required: true
jobs:
publish-x86:
if: github.repository == 'sgl-project/sglang'
environment: "prod"
outputs:
digest-cu129: ${{ steps.build-cu129.outputs.digest }}
digest-cu130: ${{ steps.build-cu130.outputs.digest }}
strategy:
matrix:
variant:
- cuda_version: "12.9.1"
build_type: "all"
grace_blackwell: 0
runs-on: x64-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
docker-images: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build AMD64 Framework
id: build-cu129
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target framework \
--platform linux/amd64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu129-framework.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-framework.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
- name: Build and Push AMD64 Framework (CUDA 13)
id: build-cu130
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target framework \
--platform linux/amd64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=13.0.1 \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg GRACE_BLACKWELL=0 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu130-framework.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-framework.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
publish-arm64:
if: github.repository == 'sgl-project/sglang'
environment: "prod"
outputs:
digest-cu129: ${{ steps.build-cu129.outputs.digest }}
digest-cu130: ${{ steps.build-cu130.outputs.digest }}
strategy:
matrix:
variant:
- cuda_version: "12.9.1"
build_type: "all"
grace_blackwell: 1
runs-on: arm-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build ARM64 Framework
id: build-cu129
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target framework \
--platform linux/arm64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu129-framework.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-framework.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
- name: Build and Push ARM64 Framework (CUDA 13)
id: build-cu130
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target framework \
--platform linux/arm64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=13.0.1 \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg GRACE_BLACKWELL=1 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu130-framework.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-framework.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "digest=${DIGEST}" >> $GITHUB_OUTPUT
create-manifests:
runs-on: ubuntu-22.04
needs: [publish-x86, publish-arm64]
if: github.repository == 'sgl-project/sglang'
environment: "prod"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Create multi-arch manifests
run: |
version=${{ steps.version.outputs.version }}
CU129_AMD64_FW=${{ needs.publish-x86.outputs.digest-cu129 }}
CU130_AMD64_FW=${{ needs.publish-x86.outputs.digest-cu130 }}
CU129_ARM64_FW=${{ needs.publish-arm64.outputs.digest-cu129 }}
CU130_ARM64_FW=${{ needs.publish-arm64.outputs.digest-cu130 }}
# Create versioned framework manifest (default)
docker buildx imagetools create \
-t lmsysorg/sglang:v${version} \
lmsysorg/sglang@${CU129_AMD64_FW} \
lmsysorg/sglang@${CU129_ARM64_FW}
# Create latest framework manifest (default)
docker buildx imagetools create \
-t lmsysorg/sglang:latest \
lmsysorg/sglang@${CU129_AMD64_FW} \
lmsysorg/sglang@${CU129_ARM64_FW}
# Create versioned CUDA 13 framework manifest
docker buildx imagetools create \
-t lmsysorg/sglang:v${version}-cu130 \
lmsysorg/sglang@${CU130_AMD64_FW} \
lmsysorg/sglang@${CU130_ARM64_FW}
# Create latest CUDA 13 framework manifest
docker buildx imagetools create \
-t lmsysorg/sglang:latest-cu130 \
lmsysorg/sglang@${CU130_AMD64_FW} \
lmsysorg/sglang@${CU130_ARM64_FW}
release-docker-amd matrix .github/workflows/release-docker-amd.yml
View raw YAML
name: Release Docker Images (AMD)
on:
push:
tags:
- 'v[0-9]+.*'
workflow_dispatch:
inputs:
version:
description: 'Version to build (without v prefix, e.g., 0.5.7)'
required: true
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: amd-docker-scale
environment: 'prod'
strategy:
matrix:
rocm_version: ['rocm700', 'rocm720']
gpu_arch: ['gfx942', 'gfx950']
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build and Push
run: |
version=${{ steps.version.outputs.version }}
echo "Version: ${version}"
gpu_arch_suffix=""
if [ "${{ matrix.rocm_version }}" = "rocm700" ]; then
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm700-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
rocm_tag="rocm700-mi35x"
else
echo "Unsupported gfx arch"
exit 1
fi
elif [ "${{ matrix.rocm_version }}" = "rocm720" ]; then
gpu_arch_suffix="-${{ matrix.rocm_version }}"
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm720-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
rocm_tag="rocm720-mi35x"
else
echo "Unsupported gfx arch"
exit 1
fi
else
echo "Unsupported rocm version"
exit 1
fi
tag=v${version}-${rocm_tag}
# rocm.Dockerfile expects SGL_BRANCH with 'v' prefix for git tag checkout
docker build . -f docker/rocm.Dockerfile --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }}${gpu_arch_suffix} --build-arg SGL_BRANCH=v${version} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic -t lmsysorg/sglang:${tag} --no-cache
docker push lmsysorg/sglang:${tag}
release-docker-amd-nightly matrix .github/workflows/release-docker-amd-nightly.yml
View raw YAML
name: Release Docker Images Nightly (AMD)
on:
workflow_dispatch:
schedule:
- cron: '0 12 * * *'
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: amd-docker-scale
environment: 'prod'
strategy:
fail-fast: false
matrix:
gpu_arch: ['gfx942', 'gfx950']
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags
- name: "Set Date"
run: |
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
- name: Get version from latest tag
id: version
run: |
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')
if [ -z "$VERSION" ]; then
echo "::error::Could not determine version from git tags"
exit 1
fi
# Get short commit hash of current HEAD
COMMIT_HASH=$(git rev-parse --short HEAD)
# Compose pretend version for setuptools_scm: e.g., 0.5.8.dev20260129+g1a2b3c4
PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}"
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT
echo "Detected version: ${VERSION}"
echo "Pretend version for pip: ${PRETEND_VERSION}"
- name: Login to Docker Hub (AMD)
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}
- name: Build and Push to rocm/sgl-dev
run: |
version=${{ steps.version.outputs.version }}
pretend_version=${{ steps.version.outputs.pretend_version }}
echo "Version: ${version}"
echo "Pretend version: ${pretend_version}"
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm700-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
rocm_tag="rocm700-mi35x"
else
echo "Unsupported gfx arch"
exit 1
fi
tag=v${version}-${rocm_tag}
echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV
docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache
docker push rocm/sgl-dev:${tag}-${{ env.DATE }}
- name: Login to Docker Hub (lmsys)
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Push to lmsysorg/sglang-rocm
run: |
docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
# Temporarily disable docker cache seeding until performant storage is in place
cache:
if: false
# if: always() && github.repository == 'sgl-project/sglang'
runs-on: linux-mi300-gpu-1
environment: 'prod'
needs: publish
strategy:
fail-fast: false
matrix:
gpu_arch: ['gfx942']
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags
- name: "Set Date"
run: |
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
- name: Get version from latest tag
id: version
run: |
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')
if [ -z "$VERSION" ]; then
echo "::error::Could not determine version from git tags"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "Detected version: ${VERSION}"
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}
- name: Pull and Save Docker Image to Cache
run: |
set -euxo pipefail
version=${{ steps.version.outputs.version }}
echo "Version: ${version}"
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm700-mi30x"
else
echo "Unsupported gfx arch"
exit 1
fi
tag=v${version}-${rocm_tag}
if [ "${{ matrix.build_type }}" = "all" ]; then
tag_suffix=""
else
echo "Unsupported build type"
exit 1
fi
image="rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix}"
# Determine target cache file name based on ROCm variant
if [[ "${rocm_tag}" == rocm700* ]]; then
final_path="/home/runner/sgl-data/docker/image-700.tar"
else
echo "Unexpected ROCm tag: ${rocm_tag}"
exit 1
fi
tmp_path="${final_path}.tmp"
echo "Pulling image: ${image}"
docker pull "${image}"
echo "Saving to temp file: ${tmp_path}"
docker save "${image}" -o "${tmp_path}"
echo "Moving to final path: ${final_path}"
mv -f "${tmp_path}" "${final_path}"
echo "Cache populated successfully at ${final_path}"
release-docker-amd-rocm720-nightly matrix .github/workflows/release-docker-amd-rocm720-nightly.yml
View raw YAML
name: Release Docker Images ROCm 7.2.0 Nightly Preview (AMD)
on:
workflow_dispatch:
schedule:
- cron: '0 12 * * *'
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: True
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: amd-docker-scale
environment: 'prod'
strategy:
fail-fast: false
matrix:
gpu_arch: ['gfx942-rocm720', 'gfx950-rocm720']
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags
- name: "Set Date"
run: |
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
- name: Get version from latest tag
id: version
run: |
# Get the latest version tag sorted by version number (e.g., v0.5.7 -> 0.5.7)
VERSION=$(git tag -l 'v[0-9]*' --sort=-v:refname | head -1 | sed 's/^v//')
if [ -z "$VERSION" ]; then
echo "::error::Could not determine version from git tags"
exit 1
fi
# Get short commit hash of current HEAD
COMMIT_HASH=$(git rev-parse --short HEAD)
# Compose pretend version for setuptools_scm: e.g., 0.5.8.post1.dev20260211+g1a2b3c4
PRETEND_VERSION="${VERSION}.dev${{ env.DATE }}+g${COMMIT_HASH}"
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "pretend_version=${PRETEND_VERSION}" >> $GITHUB_OUTPUT
echo "Detected version: ${VERSION}"
echo "Pretend version for pip: ${PRETEND_VERSION}"
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}
- name: Build and Push to rocm/sgl-dev
run: |
version=${{ steps.version.outputs.version }}
pretend_version=${{ steps.version.outputs.pretend_version }}
echo "Version: ${version}"
echo "Pretend version: ${pretend_version}"
if [ "${{ matrix.gpu_arch }}" = "gfx942-rocm720" ]; then
rocm_tag="rocm720-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950-rocm720" ]; then
rocm_tag="rocm720-mi35x"
else
echo "Unsupported gfx arch"
exit 1
fi
tag=v${version}-${rocm_tag}
echo "IMAGE_TAG=${tag}-${{ env.DATE }}" >> $GITHUB_ENV
docker build . -f docker/rocm.Dockerfile --build-arg SGL_BRANCH=${{ github.ref_name }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic --build-arg SETUPTOOLS_SCM_PRETEND_VERSION=${pretend_version} -t rocm/sgl-dev:${tag}-${{ env.DATE }} --no-cache
docker push rocm/sgl-dev:${tag}-${{ env.DATE }}
- name: Login to Docker Hub (lmsys)
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Push to lmsysorg/sglang-rocm
run: |
docker tag rocm/sgl-dev:${{ env.IMAGE_TAG }} lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
docker push lmsysorg/sglang-rocm:${{ env.IMAGE_TAG }}
release-docker-cu13-framework .github/workflows/release-docker-cu13-framework.yml
View raw YAML
name: Release CUDA 13 Framework Docker Images (Temporary)
# Temporary workflow to build only versioned cu13 framework images
# Can be deleted after use
on:
workflow_dispatch:
inputs:
version:
description: "Version to build (without v prefix, e.g., 0.5.8)"
required: true
jobs:
publish-x86:
if: github.repository == 'sgl-project/sglang'
runs-on: x64-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
docker-images: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Validate version
id: version
run: |
VERSION="${{ github.event.inputs.version }}"
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build and Push AMD64 Framework (CUDA 13)
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target framework \
--platform linux/amd64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=13.0.1 \
--build-arg BUILD_TYPE=all \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg GRACE_BLACKWELL=0 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "${DIGEST}" > /tmp/digest-cu130-amd64-framework.txt
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digest-cu130-amd64
path: /tmp/digest-cu130-amd64-framework.txt
retention-days: 1
publish-arm64:
if: github.repository == 'sgl-project/sglang'
runs-on: arm-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Validate version
id: version
run: |
VERSION="${{ github.event.inputs.version }}"
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build and Push ARM64 Framework (CUDA 13)
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target framework \
--platform linux/arm64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=13.0.1 \
--build-arg BUILD_TYPE=all \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg GRACE_BLACKWELL=1 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "${DIGEST}" > /tmp/digest-cu130-arm64-framework.txt
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digest-cu130-arm64
path: /tmp/digest-cu130-arm64-framework.txt
retention-days: 1
create-manifest:
runs-on: ubuntu-22.04
needs: [publish-x86, publish-arm64]
if: github.repository == 'sgl-project/sglang'
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Download amd64 digest
uses: actions/download-artifact@v4
with:
name: digest-cu130-amd64
path: /tmp/digests/amd64
- name: Download arm64 digest
uses: actions/download-artifact@v4
with:
name: digest-cu130-arm64
path: /tmp/digests/arm64
- name: Create multi-arch manifest
run: |
version=${{ github.event.inputs.version }}
AMD64_DIGEST=$(cat /tmp/digests/amd64/digest-cu130-amd64-framework.txt)
ARM64_DIGEST=$(cat /tmp/digests/arm64/digest-cu130-arm64-framework.txt)
# Create versioned CUDA 13 framework manifest
docker buildx imagetools create \
-t lmsysorg/sglang:v${version}-cu130 \
lmsysorg/sglang@${AMD64_DIGEST} \
lmsysorg/sglang@${ARM64_DIGEST}
# Create latest CUDA 13 framework manifest
docker buildx imagetools create \
-t lmsysorg/sglang:latest-cu130 \
lmsysorg/sglang@${AMD64_DIGEST} \
lmsysorg/sglang@${ARM64_DIGEST}
release-docker-dev matrix .github/workflows/release-docker-dev.yml
View raw YAML
name: Build and Push Development Docker Images
on:
workflow_dispatch:
inputs:
pr_number:
description: "PR number to build from (leave empty to use current branch)"
required: false
default: ""
tag:
description: "Custom tag suffix (overrides pr_number in tag). E.g. 'my-test' → dev-my-test, dev-cu13-my-test, etc."
required: false
default: ""
schedule:
- cron: "0 0 * * *"
concurrency:
group: release-docker-dev-${{ inputs.tag || inputs.pr_number || 'nightly' }}
cancel-in-progress: true
jobs:
build-dev:
if: ${{ github.repository == 'sgl-project/sglang' }}
runs-on: ${{ matrix.runner }}
strategy:
matrix:
include:
- runner: x64-docker-build-node
platform: linux/amd64
build_type: all
grace_blackwell: 0
arch_tag: x86
version: 12.9.1
- runner: arm-docker-build-node
platform: linux/arm64
build_type: all
grace_blackwell: 1
arch_tag: arm64
version: 12.9.1
- runner: x64-docker-build-node
platform: linux/amd64
build_type: all
grace_blackwell: 0
arch_tag: x86-cu13
version: 13.0.1
- runner: arm-docker-build-node
platform: linux/arm64
build_type: all
grace_blackwell: 1
arch_tag: arm64-cu13
version: 13.0.1
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || github.ref }}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
docker-images: true
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: Prune Docker to reclaim disk space
run: |
docker buildx prune --filter "until=72h" -f
docker system prune -af --filter "until=72h"
docker volume prune -af
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push Dev Image
run: |
# Nightly (schedule) installs latest release; manual dispatch builds from checked-out source
if [ "${{ github.event_name }}" = "schedule" ]; then
SOURCE_ARG="--build-arg USE_LATEST_SGLANG=1"
else
SOURCE_ARG="--build-arg BRANCH_TYPE=local"
fi
docker buildx build \
--platform ${{ matrix.platform }} \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
--target framework \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.version }} \
--build-arg BUILD_TYPE=${{ matrix.build_type }} \
--build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) \
--build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} \
${SOURCE_ARG} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--metadata-file /tmp/metadata.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "${DIGEST}" > /tmp/digest.txt
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digest-${{ matrix.arch_tag }}
path: /tmp/digest.txt
retention-days: 1
create-manifests:
runs-on: ubuntu-22.04
needs: [build-dev]
if: ${{ github.repository == 'sgl-project/sglang' }}
strategy:
matrix:
variant:
- base: dev
x86: x86
arm64: arm64
- base: dev-cu13
x86: x86-cu13
arm64: arm64-cu13
steps:
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Download x86 digest
uses: actions/download-artifact@v4
with:
name: digest-${{ matrix.variant.x86 }}
path: /tmp/digests/x86
- name: Download arm64 digest
uses: actions/download-artifact@v4
with:
name: digest-${{ matrix.variant.arm64 }}
path: /tmp/digests/arm64
- name: Create multi-arch manifest
run: |
X86_DIGEST=$(cat /tmp/digests/x86/digest.txt)
ARM64_DIGEST=$(cat /tmp/digests/arm64/digest.txt)
SUFFIX=""
if [ -n "${{ inputs.tag }}" ]; then
SUFFIX="-${{ inputs.tag }}"
elif [ -n "${{ inputs.pr_number }}" ]; then
SUFFIX="-pr-${{ inputs.pr_number }}"
fi
TAG="${{ matrix.variant.base }}${SUFFIX}"
# For nightly (no suffix), also stamp a dated tag
EXTRA_TAG=""
if [ -z "${SUFFIX}" ]; then
SHORT_SHA="${{ github.sha }}"
EXTRA_TAG="-t lmsysorg/sglang:nightly-${TAG}-$(date +%Y%m%d)-${SHORT_SHA:0:8}"
fi
docker buildx imagetools create \
-t lmsysorg/sglang:${TAG} \
${EXTRA_TAG} \
lmsysorg/sglang@${X86_DIGEST} \
lmsysorg/sglang@${ARM64_DIGEST}
echo "✓ Published lmsysorg/sglang:${TAG}"
- name: Cleanup Old Nightly Builds
if: ${{ !inputs.tag && !inputs.pr_number }}
run: |
TOKEN=$(curl -s -H "Content-Type: application/json" \
-X POST -d '{"username": "${{ secrets.DOCKERHUB_USERNAME }}", "password": "${{ secrets.DOCKERHUB_TOKEN }}"}' \
https://hub.docker.com/v2/users/login/ | jq -r .token)
TAGS_RESPONSE=$(curl -s -H "Authorization: JWT $TOKEN" \
"https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/?page_size=100")
TAGS=$(echo "$TAGS_RESPONSE" | jq -r \
'.results[] | select(.name | test("^nightly-${{ matrix.variant.base }}-[0-9]")) | "\(.last_updated)|\(.name)"' \
| sort -r | cut -d'|' -f2)
TAG_COUNT=$(echo "$TAGS" | wc -l)
if [ "$TAG_COUNT" -gt 14 ]; then
echo "Found $TAG_COUNT nightly builds, keeping only the 14 most recent"
TAGS_TO_DELETE=$(echo "$TAGS" | tail -n +15)
for tag in $TAGS_TO_DELETE; do
echo "Deleting tag: $tag"
curl -X DELETE -H "Authorization: JWT $TOKEN" \
"https://hub.docker.com/v2/repositories/lmsysorg/sglang/tags/$tag/"
done
else
echo "Only $TAG_COUNT nightly builds found, no cleanup needed"
fi
release-docker-gateway .github/workflows/release-docker-gateway.yml
View raw YAML
name: Release SGLang Model Gateway Docker Image
on:
push:
branches:
- main
paths:
- sgl-model-gateway/bindings/python/pyproject.toml
workflow_dispatch:
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-24.04
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push
run: |
version=$(cat sgl-model-gateway/bindings/python/src/sglang_router/version.py | cut -d'"' -f2)
tag=v${version}
docker buildx build . -f docker/gateway.Dockerfile \
--platform linux/amd64,linux/arm64 \
-t lmsysorg/sgl-model-gateway:${tag} \
-t lmsysorg/sgl-model-gateway:latest \
--push
release-docker-npu matrix .github/workflows/release-docker-npu.yml
View raw YAML
name: Release Docker Images (NPU)
on:
push:
tags:
- 'v[0-9]+.*'
workflow_dispatch:
inputs:
version:
description: 'Version to build (without v prefix, e.g., 0.5.7)'
required: true
jobs:
build:
runs-on: ubuntu-22.04-arm
strategy:
matrix:
cann_version: ["8.5.0"]
device_type: ["910b", "a3"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
# push with tag
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
lmsysorg/sglang
tags: |
type=ref,event=pr
flavor: |
latest=false
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Login to Docker Hub
uses: docker/login-action@v2
if: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=v${VERSION}" >> $GITHUB_OUTPUT
echo "TAG=lmsysorg/sglang:v${VERSION}-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT
# Enable Docker multi-architecture build environment
# Emulate non-native architectures
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
# Required for building and pushing multi-arch Docker images
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6
with:
context: docker
file: docker/npu.Dockerfile
platforms: linux/arm64,linux/amd64
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags || steps.version.outputs.TAG }}
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
SGLANG_KERNEL_NPU_TAG=2026.03.10.rc1
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
SGLANG_TAG=${{ steps.version.outputs.version }}
release-docker-npu-nightly matrix .github/workflows/release-docker-npu-nightly.yml
View raw YAML
name: Release Docker Images Nightly (NPU)
on:
pull_request:
branches:
- 'main'
paths:
- '.github/workflows/release-docker-npu-nightly.yml'
- 'docker/npu.Dockerfile'
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.sha }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-22.04-arm
strategy:
matrix:
cann_version: ["8.5.0"]
device_type: ["910b", "a3"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Setup Docker buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
lmsysorg/sglang
# push with schedule event
# push with workflow_dispatch event
tags: |
type=ref,event=pr
type=ref,event=branch
type=schedule,pattern=main
flavor: |
latest=false
suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }},onlatest=true
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into docker hub
uses: docker/login-action@v3
if: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
# Enable Docker multi-architecture build environment
# Emulate non-native architectures
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
# Required for building and pushing multi-arch Docker images
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6
with:
context: docker
file: docker/npu.Dockerfile
platforms: linux/arm64,linux/amd64
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
SGLANG_KERNEL_NPU_TAG=2026.03.10.rc1
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
release-docker-runtime matrix .github/workflows/release-docker-runtime.yml
View raw YAML
name: Release Docker Runtime Images
#
# This workflow builds and publishes runtime Docker images (production-optimized, ~50% smaller):
# - lmsysorg/sglang:v{version}-runtime, lmsysorg/sglang:latest-runtime
# - lmsysorg/sglang:v{version}-cu130-runtime, lmsysorg/sglang:latest-cu130-runtime
#
on:
push:
tags:
- "v[0-9]+.*"
workflow_dispatch:
inputs:
version:
description: "Version to build (without v prefix, e.g., 0.5.7)"
required: true
jobs:
publish-x86:
if: github.repository == 'sgl-project/sglang'
environment: "prod"
strategy:
matrix:
variant:
- cuda_version: "12.9.1"
build_type: "all"
grace_blackwell: 0
runs-on: x64-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
docker-images: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build and Push AMD64 Runtime
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target runtime \
--platform linux/amd64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu129-runtime.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "${DIGEST}" > /tmp/digest-cu129-amd64-runtime.txt
- name: Build and Push AMD64 Runtime (CUDA 13)
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target runtime \
--platform linux/amd64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=13.0.1 \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg GRACE_BLACKWELL=0 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu130-runtime.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "${DIGEST}" > /tmp/digest-cu130-amd64-runtime.txt
- name: Upload digests
uses: actions/upload-artifact@v4
with:
name: digests-amd64
path: /tmp/digest-*.txt
retention-days: 1
publish-arm64:
if: github.repository == 'sgl-project/sglang'
environment: "prod"
strategy:
matrix:
variant:
- cuda_version: "12.9.1"
build_type: "all"
grace_blackwell: 1
runs-on: arm-docker-build-node
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build and Push ARM64 Runtime
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target runtime \
--platform linux/arm64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
--build-arg INSTALL_FLASHINFER_JIT_CACHE=1 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu129-runtime.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu129-runtime.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "${DIGEST}" > /tmp/digest-cu129-arm64-runtime.txt
- name: Build and Push ARM64 Runtime (CUDA 13)
run: |
version=${{ steps.version.outputs.version }}
docker buildx build \
--target runtime \
--platform linux/arm64 \
--output type=image,name=lmsysorg/sglang,push-by-digest=true,name-canonical=true,push=true \
-f docker/Dockerfile \
--build-arg CUDA_VERSION=13.0.1 \
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
--build-arg GRACE_BLACKWELL=1 \
--build-arg SGL_VERSION=${version} \
--metadata-file /tmp/metadata-cu130-runtime.json \
--no-cache \
.
DIGEST=$(python3 -c "import json; print(json.load(open('/tmp/metadata-cu130-runtime.json'))['containerimage.digest'])")
echo "Pushed digest: ${DIGEST}"
echo "${DIGEST}" > /tmp/digest-cu130-arm64-runtime.txt
- name: Upload digests
uses: actions/upload-artifact@v4
with:
name: digests-arm64
path: /tmp/digest-*.txt
retention-days: 1
create-manifests:
runs-on: ubuntu-22.04
needs: [publish-x86, publish-arm64]
if: github.repository == 'sgl-project/sglang'
environment: "prod"
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Download amd64 digests
uses: actions/download-artifact@v4
with:
name: digests-amd64
path: /tmp/digests/amd64
- name: Download arm64 digests
uses: actions/download-artifact@v4
with:
name: digests-arm64
path: /tmp/digests/arm64
- name: Create multi-arch manifests
run: |
version=${{ steps.version.outputs.version }}
CU129_AMD64_RT=$(cat /tmp/digests/amd64/digest-cu129-amd64-runtime.txt)
CU130_AMD64_RT=$(cat /tmp/digests/amd64/digest-cu130-amd64-runtime.txt)
CU129_ARM64_RT=$(cat /tmp/digests/arm64/digest-cu129-arm64-runtime.txt)
CU130_ARM64_RT=$(cat /tmp/digests/arm64/digest-cu130-arm64-runtime.txt)
# Create versioned runtime manifest
docker buildx imagetools create \
-t lmsysorg/sglang:v${version}-runtime \
lmsysorg/sglang@${CU129_AMD64_RT} \
lmsysorg/sglang@${CU129_ARM64_RT}
# Create latest runtime manifest
docker buildx imagetools create \
-t lmsysorg/sglang:latest-runtime \
lmsysorg/sglang@${CU129_AMD64_RT} \
lmsysorg/sglang@${CU129_ARM64_RT}
# Create versioned CUDA 13 runtime manifest
docker buildx imagetools create \
-t lmsysorg/sglang:v${version}-cu130-runtime \
lmsysorg/sglang@${CU130_AMD64_RT} \
lmsysorg/sglang@${CU130_ARM64_RT}
# Create latest CUDA 13 runtime manifest
docker buildx imagetools create \
-t lmsysorg/sglang:latest-cu130-runtime \
lmsysorg/sglang@${CU130_AMD64_RT} \
lmsysorg/sglang@${CU130_ARM64_RT}
release-docker-xeon matrix .github/workflows/release-docker-xeon.yml
View raw YAML
name: Release Docker Xeon Images
on:
push:
tags:
- 'v[0-9]+.*'
workflow_dispatch:
inputs:
version:
description: 'Version to build (without v prefix, e.g., 0.5.7)'
required: true
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-24.04
environment: 'prod'
strategy:
matrix:
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version from tag
id: version
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
# Extract version from tag (e.g., v0.5.7 -> 0.5.7)
VERSION="${GITHUB_REF_NAME#v}"
fi
# Validate version format
if [ -z "$VERSION" ]; then
echo "::error::Version is empty"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z)"
exit 1
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build and Push
run: |
version=${{ steps.version.outputs.version }}
tag=v${version}-xeon
docker build . -f docker/xeon.Dockerfile \
--build-arg VER_SGLANG=v${version} \
-t lmsysorg/sglang:${tag} \
--no-cache
docker push lmsysorg/sglang:${tag}
release-docs .github/workflows/release-docs.yml
View raw YAML
name: Release Documentation
on:
release:
types: [published]
push:
branches:
- main
paths:
- "docs/**"
- "python/sglang/version.py"
- "python/sglang/**"
workflow_dispatch:
concurrency:
group: release-docs-${{ github.ref }}
cancel-in-progress: true
env:
SGLANG_IS_IN_CI: true
jobs:
execute-and-deploy:
runs-on: 1-gpu-h100
if: github.repository == 'sgl-project/sglang'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Fetch full git history for release index
if: github.event_name == 'release'
run: |
git fetch --prune --unshallow || git fetch --prune --depth=0
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
pip install -r docs/requirements.txt
apt-get update && apt-get install -y pandoc parallel retry
ln -sf "$(which python3)" /usr/bin/python
- name: Setup Jupyter Kernel
run: |
python -m ipykernel install --user --name python3 --display-name "Python 3"
- name: Execute notebooks
timeout-minutes: 40
run: |
cd docs
make clean
make compile
- name: Push HTML to sgl-project.github.io
timeout-minutes: 30
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_DOCUMENTATION }}
run: |
cd docs
make html
make markdown
python3 wrap_run_llm.py
if [[ "${{ github.event_name }}" == "release" ]]; then
python3 release_lookup/generate_index.py --output release_lookup/release_index.json
# Copy release lookup tool for official docs on published releases.
mkdir -p _build/html/release_lookup
cp release_lookup/index.html _build/html/release_lookup/
cp release_lookup/release_index.json _build/html/release_lookup/
fi
cd _build/html
git clone https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git ../sgl-project.github.io --depth 1
if [[ "${{ github.event_name }}" == "release" ]]; then
find ../sgl-project.github.io/ -mindepth 1 -not -path "../sgl-project.github.io/.git*" -not -name CNAME -not -name ".jekyll" -not -name ".nojekyll" -delete
else
find ../sgl-project.github.io/ -mindepth 1 -not -path "../sgl-project.github.io/.git*" -not -path "../sgl-project.github.io/release_lookup*" -not -name CNAME -not -name ".jekyll" -not -name ".nojekyll" -delete
fi
cp -r * ../sgl-project.github.io
cp ../../README.md ../sgl-project.github.io/README.md
cd ../sgl-project.github.io
git config user.name "sglang-bot"
git config user.email "sglangbot@gmail.com"
git add .
git commit -m "Update $(date +'%Y-%m-%d %H:%M:%S')"
git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main
cd ..
rm -rf sgl-project.github.io
release-pypi .github/workflows/release-pypi.yml
View raw YAML
name: Release PyPI
on:
push:
tags:
- 'v[0-9]+.*'
workflow_dispatch:
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
environment: "prod"
steps:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for setuptools-scm to determine version from tags
- name: Upload to pypi
run: |
cd python
cp ../README.md ../LICENSE .
pip install build wheel setuptools setuptools-scm
python3 -m build
pip install twine
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
release-pypi-gateway matrix .github/workflows/release-pypi-gateway.yml
View raw YAML
name: Release SGLang Model Gateway to PyPI
on:
push:
branches:
- main
paths:
- sgl-model-gateway/bindings/python/pyproject.toml
workflow_dispatch:
jobs:
build:
name: build on ${{ matrix.platform || matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }})
runs-on: ${{ matrix.os }}-latest
strategy:
fail-fast: false
matrix:
os: [ubuntu, macos, windows]
target: [x86_64, aarch64]
manylinux: [auto]
include:
- os: ubuntu
platform: linux
- os: windows
ls: dir
target: x86_64
python-architecture: x64
interpreter: 3.9 3.10 3.11 3.12 3.13
- os: macos
target: aarch64
interpreter: 3.9 3.10 3.11 3.12 3.13
- os: ubuntu
platform: linux
target: aarch64
# musllinux
- os: ubuntu
platform: linux
target: x86_64
manylinux: musllinux_1_1
- os: ubuntu
platform: linux
target: aarch64
manylinux: musllinux_1_1
exclude:
- os: windows
target: aarch64
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move sgl-model-gateway folder to root and delete sglang-repo
run: |
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
ls -alt
shell: bash
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
architecture: ${{ matrix.python-architecture || 'x64' }}
- name: Install twine
run: pip install -U twine
- name: Install protoc (macOS)
if: matrix.os == 'macos'
run: brew install protobuf
- name: Install protoc (Windows)
if: matrix.os == 'windows'
run: choco install protoc -y
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
working-directory: bindings/python
target: ${{ matrix.target }}
manylinux: ${{ matrix.manylinux || 'auto' }}
args: --release --out dist --features vendored-openssl --interpreter ${{ matrix.interpreter || '3.9 3.10 3.11 3.12 3.13 3.14' }}
rust-toolchain: stable
docker-options: -e CI -e CC_aarch64_unknown_linux_gnu=aarch64-linux-gnu-gcc -e CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++
before-script-linux: |
# Install build dependencies (perl/make for vendored OpenSSL, protoc for gRPC)
if command -v yum &> /dev/null; then
yum update -y && yum install -y wget unzip gcc gcc-c++ perl-core make
# Install cross-compilation toolchain for aarch64 if needed
if [ "${{ matrix.target }}" = "aarch64" ]; then
yum install -y gcc-aarch64-linux-gnu gcc-c++-aarch64-linux-gnu || true
fi
elif command -v apt-get &> /dev/null; then
apt-get update && apt-get install -y wget unzip gcc g++ perl make
# Install cross-compilation toolchain for aarch64 if needed
if [ "${{ matrix.target }}" = "aarch64" ]; then
apt-get install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu || true
fi
fi
(cd /tmp && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip && \
unzip protoc-32.0-linux-x86_64.zip -d /usr/local && \
rm protoc-32.0-linux-x86_64.zip)
protoc --version
- name: List built packages
run: ${{ matrix.ls || 'ls -lh' }} bindings/python/dist/
- name: Check packages
run: twine check --strict bindings/python/dist/*
- uses: actions/upload-artifact@v4
with:
name: packages-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux || 'auto' }}
path: bindings/python/dist/
build-sdist:
name: Build SDist
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move sgl-model-gateway folder to root and delete sglang-repo
run: |
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
ls -alt
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Build SDist
uses: PyO3/maturin-action@v1
with:
working-directory: bindings/python
command: sdist
args: --out dist
rust-toolchain: stable
- uses: actions/upload-artifact@v4
with:
name: sdist
path: bindings/python/dist/*.tar.gz
upload:
name: Upload to PyPI
if: github.repository == 'sgl-project/sglang' # Ensure this job only runs for the sgl-project/sglang repository
needs: [build, build-sdist]
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v4
with:
path: dist
merge-multiple: true
- name: Upload to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN_ROUTER }}
run: |
pip install twine
twine upload dist/* --verbose
release-pypi-nightly .github/workflows/release-pypi-nightly.yml
View raw YAML
name: Release PyPI Nightly Wheels
on:
# Run daily at 2 AM UTC
schedule:
- cron: '0 2 * * *'
# Triggered by nightly Docker workflow to use same commit
repository_dispatch:
types: [nightly-release]
# Manual trigger for testing
workflow_dispatch:
inputs:
commit_sha:
description: 'Specific commit SHA to build (leave empty for latest)'
required: false
type: string
cuda_version:
description: 'CUDA version (e.g., 129 or 130)'
required: false
default: '129'
type: string
concurrency:
group: release-pypi-nightly-${{ github.ref }}
cancel-in-progress: true
jobs:
build-nightly-wheel:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
outputs:
nightly_version: ${{ steps.build.outputs.nightly_version }}
commit_hash: ${{ steps.build.outputs.commit_hash }}
build_date: ${{ steps.build.outputs.build_date }}
steps:
- uses: actions/checkout@v4
with:
# Use commit from: 1) Docker workflow, 2) manual input, 3) latest main
ref: ${{ github.event.client_payload.commit_sha || inputs.commit_sha || github.sha }}
fetch-depth: 0 # Need full history for setuptools-scm
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install build dependencies
run: |
pip install build wheel setuptools setuptools-scm
- name: Build wheel
id: build
run: |
cd python
cp ../README.md ../LICENSE .
# Parse git describe output to get latest tag
# Use same command as pyproject.toml to ensure version consistency
DESC=$(git tag --list --sort=-version:refname 'v*.*.*' | head -1 | xargs git describe --tags --long 2>/dev/null || echo 'v0.0.0-0-g0000000')
TAG=$(echo "$DESC" | cut -d- -f1)
HASH="g$(git rev-parse --short HEAD)"
BUILD_DATE=$(date -u +%Y%m%d)
# Increment patch version for nightlies (e.g., v0.5.8 -> 0.5.9)
VERSION=${TAG#v} # Remove 'v' prefix
MAJOR=$(echo "$VERSION" | cut -d. -f1)
MINOR=$(echo "$VERSION" | cut -d. -f2)
PATCH=$(echo "$VERSION" | cut -d. -f3)
NEXT_PATCH=$((PATCH + 1))
NEXT_VERSION="${MAJOR}.${MINOR}.${NEXT_PATCH}"
# Use date-based dev number for correct chronological sorting
# e.g., 0.5.9.dev20260215+g4cf4f0859 > 0.5.9.dev20260214+g45a4697d4
FORCE_VERSION="${NEXT_VERSION}.dev${BUILD_DATE}+${HASH}"
echo "Forcing nightly version to: $FORCE_VERSION"
export SETUPTOOLS_SCM_PRETEND_VERSION="$FORCE_VERSION"
# Build wheel
python3 -m build --wheel
# Extract version from built wheel filename
WHEEL_FILE=$(ls dist/*.whl)
NIGHTLY_VERSION=$(echo "$WHEEL_FILE" | sed 's/.*sglang-\(.*\)-py3.*/\1/')
# Get commit info
COMMIT_HASH=$(git rev-parse --short HEAD)
BUILD_DATE=$(date -u +%Y-%m-%d)
echo "Built wheel: $WHEEL_FILE"
echo "Nightly version: ${NIGHTLY_VERSION}"
echo "Commit: ${COMMIT_HASH}"
echo "Build date: ${BUILD_DATE}"
echo "nightly_version=${NIGHTLY_VERSION}" >> $GITHUB_OUTPUT
echo "commit_hash=${COMMIT_HASH}" >> $GITHUB_OUTPUT
echo "build_date=${BUILD_DATE}" >> $GITHUB_OUTPUT
- name: Upload wheel artifact
uses: actions/upload-artifact@v4
with:
name: nightly-wheel
path: python/dist/*.whl
retention-days: 7
release-nightly:
needs: build-nightly-wheel
runs-on: ubuntu-latest
environment: 'prod'
steps:
- uses: actions/checkout@v4
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: nightly-wheel
path: dist/
- name: List downloaded wheels
run: |
echo "Downloaded wheel:"
ls -lh dist/
- name: Create GitHub Release for nightly wheel
uses: softprops/action-gh-release@v2
with:
tag_name: nightly-${{ needs.build-nightly-wheel.outputs.build_date }}-${{ needs.build-nightly-wheel.outputs.commit_hash }}
name: Nightly Build ${{ needs.build-nightly-wheel.outputs.build_date }} (${{ needs.build-nightly-wheel.outputs.commit_hash }})
repository: sgl-project/whl
token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
prerelease: true
body: |
Nightly build from commit ${{ github.sha }}
Build date: ${{ needs.build-nightly-wheel.outputs.build_date }}
Version: ${{ needs.build-nightly-wheel.outputs.nightly_version }}
files: |
dist/*.whl
- name: Clone wheel index repository
run: |
git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
cd sgl-whl
git config --local user.name "sglang-bot"
git config --local user.email "sglangbot@gmail.com"
env:
WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Update wheel index
run: |
python3 scripts/update_nightly_whl_index.py \
--commit-hash ${{ needs.build-nightly-wheel.outputs.commit_hash }} \
--nightly-version ${{ needs.build-nightly-wheel.outputs.nightly_version }} \
--cuda-version ${{ inputs.cuda_version || '129' }} \
--build-date ${{ needs.build-nightly-wheel.outputs.build_date }}
- name: Push wheel index
run: |
cd sgl-whl
git add -A
git diff --staged --quiet || git commit -m "Update nightly wheel index for commit ${{ needs.build-nightly-wheel.outputs.commit_hash }}"
git push
release-pypi-pr .github/workflows/release-pypi-pr.yml
View raw YAML
name: Release PyPI PR Wheels
on:
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to build wheel for (works with both internal and fork PRs)'
required: true
type: string
concurrency:
group: build-pr-wheel-${{ github.event.inputs.pr_number }}
cancel-in-progress: true
jobs:
build-pr-wheel:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
outputs:
wheel_version: ${{ steps.gen_version.outputs.wheel_version }}
commit_hash: ${{ steps.gen_version.outputs.commit_hash }}
build_date: ${{ steps.gen_version.outputs.build_date }}
steps:
- uses: actions/checkout@v4
with:
ref: refs/pull/${{ inputs.pr_number }}/head
fetch-depth: 0 # Need full history for version generation
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Generate PR wheel version
id: gen_version
run: |
# Get base version from the latest v*.*.* git tag directly
# Note: We cannot use setuptools_scm here because the [tool.setuptools_scm]
# config (with custom git_describe_command) lives in python/pyproject.toml,
# not at the repo root. Without that config, setuptools_scm falls back to
# default git describe which finds gateway-* tags instead of v*.*.* release tags.
LATEST_TAG=$(git tag --list --sort=-version:refname 'v*.*.*' | head -1)
BASE_VERSION=${LATEST_TAG#v}
echo "Latest release tag: ${LATEST_TAG}"
# Get commit info
COMMIT_HASH=$(git rev-parse --short HEAD)
COMMIT_COUNT=$(git rev-list --count HEAD)
# Get current date in YYYY-MM-DD format
BUILD_DATE=$(date -u +%Y-%m-%d)
# Always use pr-{number} format for suffix
SUFFIX="pr-${{ inputs.pr_number }}"
# Generate PR wheel version following PEP 440
# Format: {base_version}.dev{commit_count}+pr-{number}.g{commit_hash}
WHEEL_VERSION="${BASE_VERSION}.dev${COMMIT_COUNT}+${SUFFIX}.g${COMMIT_HASH}"
echo "Base version: ${BASE_VERSION}"
echo "PR wheel version: ${WHEEL_VERSION}"
echo "Commit: ${COMMIT_HASH}"
echo "Build date: ${BUILD_DATE}"
echo "wheel_version=${WHEEL_VERSION}" >> $GITHUB_OUTPUT
echo "commit_hash=${COMMIT_HASH}" >> $GITHUB_OUTPUT
echo "base_version=${BASE_VERSION}" >> $GITHUB_OUTPUT
echo "build_date=${BUILD_DATE}" >> $GITHUB_OUTPUT
- name: Update pyproject.toml with PR wheel version
run: |
cd python
WHEEL_VERSION="${{ steps.gen_version.outputs.wheel_version }}"
# Update pyproject.toml to use static version instead of dynamic
# Remove 'version' from dynamic list and add static version
sed -i 's/dynamic = \["version"\]/dynamic = []/' pyproject.toml
sed -i "/^name = \"sglang\"/a version = \"${WHEEL_VERSION}\"" pyproject.toml
# Verify update
echo "Updated version in pyproject.toml:"
grep "^version" pyproject.toml
grep "^dynamic" pyproject.toml
- name: Install build dependencies
run: |
cd python
pip install build wheel setuptools
- name: Build wheel
run: |
cd python
cp ../README.md ../LICENSE .
python3 -m build --wheel
# List built wheels
echo "Built wheel:"
ls -lh dist/
- name: Upload wheel artifact
uses: actions/upload-artifact@v4
with:
name: pr-wheel-${{ inputs.pr_number }}
path: python/dist/*.whl
retention-days: 30
release-pr-wheel:
needs: build-pr-wheel
runs-on: ubuntu-latest
environment: 'prod'
steps:
- uses: actions/checkout@v4
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: pr-wheel-${{ inputs.pr_number }}
path: dist/
- name: List downloaded wheels
run: |
echo "Downloaded wheel:"
ls -lh dist/
- name: Create GitHub Release for PR wheel
uses: softprops/action-gh-release@v2
with:
tag_name: pr-${{ inputs.pr_number }}-${{ needs.build-pr-wheel.outputs.build_date }}-${{ needs.build-pr-wheel.outputs.commit_hash }}
name: "PR #${{ inputs.pr_number }} Build (${{ needs.build-pr-wheel.outputs.commit_hash }})"
repository: sgl-project/whl
token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
prerelease: true
body: |
PR wheel build from PR #${{ inputs.pr_number }}
Commit: ${{ github.sha }}
Build date: ${{ needs.build-pr-wheel.outputs.build_date }}
Version: ${{ needs.build-pr-wheel.outputs.wheel_version }}
**Installation via index (pip):**
```bash
pip install sglang==${{ needs.build-pr-wheel.outputs.wheel_version }} --index-url https://sgl-project.github.io/whl/pr/
```
**Installation via index (uv):**
```bash
uv pip install sglang==${{ needs.build-pr-wheel.outputs.wheel_version }} --index-url https://sgl-project.github.io/whl/pr/ --extra-index-url https://pypi.org/simple --index-strategy unsafe-best-match
```
**Direct installation:**
```bash
pip install https://github.com/sgl-project/whl/releases/download/pr-${{ inputs.pr_number }}-${{ needs.build-pr-wheel.outputs.build_date }}-${{ needs.build-pr-wheel.outputs.commit_hash }}/sglang-${{ needs.build-pr-wheel.outputs.wheel_version }}-py3-none-any.whl
```
files: |
dist/*.whl
- name: Clone wheel index repository
run: |
git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
cd sgl-whl
git config --local user.name "sglang-bot"
git config --local user.email "sglangbot@gmail.com"
env:
WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Update wheel index
run: |
python3 scripts/update_pr_whl_index.py \
--pr-number ${{ inputs.pr_number }} \
--commit-hash ${{ needs.build-pr-wheel.outputs.commit_hash }} \
--wheel-version ${{ needs.build-pr-wheel.outputs.wheel_version }} \
--build-date ${{ needs.build-pr-wheel.outputs.build_date }}
- name: Push wheel index
run: |
cd sgl-whl
git add -A
git diff --staged --quiet || git commit -m "Update PR wheel index for PR #${{ inputs.pr_number }} (commit ${{ needs.build-pr-wheel.outputs.commit_hash }})"
git push
release-tag perms .github/workflows/release-tag.yml
View raw YAML
name: Release Tag
# Creates a git tag to trigger release workflows (PyPI, Docker)
# Use this after testing on a release branch is complete
on:
workflow_dispatch:
inputs:
version:
description: 'Version to tag (without v prefix, e.g., 0.5.7)'
required: true
type: string
ref:
description: 'Branch or commit to tag (e.g., release/v0.5.7, main, or commit SHA)'
required: false
default: 'main'
type: string
permissions:
contents: write
jobs:
create-tag:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-latest
environment: 'prod'
steps:
- name: Validate version format
run: |
VERSION="${{ github.event.inputs.version }}"
if [ -z "$VERSION" ]; then
echo "::error::Version is required"
exit 1
fi
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
echo "::error::Invalid version format: $VERSION (expected: X.Y.Z or X.Y.Z.postN)"
exit 1
fi
echo "Version validated: v$VERSION"
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.ref }}
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Check if tag already exists
run: |
TAG="v${{ github.event.inputs.version }}"
if git rev-parse "$TAG" >/dev/null 2>&1; then
echo "::error::Tag $TAG already exists"
exit 1
fi
echo "Tag $TAG does not exist, proceeding..."
- name: Create and push tag
run: |
TAG="v${{ github.event.inputs.version }}"
REF="${{ github.event.inputs.ref }}"
git config user.name "sglang-bot"
git config user.email "sglang-bot@users.noreply.github.com"
echo "Creating tag $TAG on ref $REF (commit: $(git rev-parse HEAD))"
git tag -a "$TAG" -m "Release $TAG"
git push origin "$TAG"
echo "::notice::Successfully created and pushed tag $TAG"
echo "This will trigger the release workflows (PyPI, Docker)"
release-whl-kernel matrix .github/workflows/release-whl-kernel.yml
View raw YAML
name: Release SGLang Kernels
on:
push:
branches:
- main
paths:
- sgl-kernel/python/sgl_kernel/version.py
workflow_dispatch:
inputs:
target:
type: choice
description: 'Build target'
required: false
default: 'all'
options:
- 'all'
- 'cu129'
- 'cu130'
- 'rocm700'
- 'rocm720'
- 'musa43'
tag_name:
type: string
required: false
pr_number:
description: "PR number to build from (e.g. 12345)"
type: string
required: false
concurrency:
group: release-sglang-kernels-${{ github.ref }}
cancel-in-progress: true
jobs:
build-cu129-matrix:
if: |
github.repository == 'sgl-project/sglang' &&
(github.event.inputs.target == 'all' || github.event.inputs.target == 'cu129')
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["12.9"]
arch: [x86_64, aarch64]
include:
- arch: x86_64
runner: x64-kernel-build-node
- arch: aarch64
runner: arm-kernel-build-node
runs-on: ${{ matrix.runner }}
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" ${{ matrix.arch == 'aarch64' && 'aarch64' || '' }}
env:
BUILD_JOBS: 64
NVCC_THREADS: 8
- name: Upload to PyPI
working-directory: sgl-kernel
run: |
pip install twine
python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN_SGLANG_KERNEL }}
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}${{ matrix.arch == 'aarch64' && '-aarch64' || '' }}
path: sgl-kernel/dist/*
release-cu129:
needs: build-cu129-matrix
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 129
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "sglang-bot"
git config --local user.email "sglangbot@gmail.com"
git add -A
git commit -m "update whl index"
git push
# for now we do not release CUDA 13.0 wheels to pypi
build-cu130-matrix:
if: |
github.repository == 'sgl-project/sglang' &&
(github.event.inputs.target == 'all' || github.event.inputs.target == 'cu130')
strategy:
matrix:
python-version: ["3.10"]
cuda-version: ["13.0"]
arch: [x86_64, aarch64]
include:
- arch: x86_64
runner: x64-kernel-build-node
- arch: aarch64
runner: arm-kernel-build-node
runs-on: ${{ matrix.runner }}
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cd sgl-kernel
chmod +x ./build.sh
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" ${{ matrix.arch == 'aarch64' && 'aarch64' || '' }}
env:
BUILD_JOBS: 64
NVCC_THREADS: 8
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}${{ matrix.arch == 'aarch64' && '-aarch64' || '' }}
path: sgl-kernel/dist/*
release-cu130:
needs: build-cu130-matrix
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --cuda 130
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "sglang-bot"
git config --local user.email "sglangbot@gmail.com"
git add -A
git commit -m "update whl index"
git push
build-rocm-matrix:
if: |
github.repository == 'sgl-project/sglang' &&
(github.event.inputs.target == 'all' || github.event.inputs.target == 'rocm700' || github.event.inputs.target == 'rocm720')
runs-on: amd-docker-scale
strategy:
matrix:
python-version: ["3.10"]
rocm-version: ["700", "720"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheels
run: |
cp 3rdparty/amd/wheel/sgl-kernel/* sgl-kernel/
cd sgl-kernel
chmod +x ./build_rocm.sh
./build_rocm.sh "${{ matrix.rocm-version }}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-rocm${{ matrix.rocm-version }}
path: sgl-kernel/dist/*
release-rocm700:
needs: build-rocm-matrix
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*-rocm700
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --rocm 700
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "sglang-bot"
git config --local user.email "sglangbot@gmail.com"
git add -A
git commit -m "update whl index"
git push
release-rocm720:
needs: build-rocm-matrix
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_number && format('refs/pull/{0}/head', inputs.pr_number) || '' }}
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*-rocm720
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --rocm 720
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "sglang-bot"
git config --local user.email "sglangbot@gmail.com"
git add -A
git commit -m "update whl index"
git push
build-musa43:
if: |
github.repository == 'sgl-project/sglang' &&
(github.event.inputs.target == 'all' || github.event.inputs.target == 'musa43')
runs-on: kernel-build-node-musa
strategy:
matrix:
python-version: ["3.10"]
musa-version: ["43"]
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: Build wheels
run: |
cd sgl-kernel
mv pyproject_musa.toml pyproject.toml
python setup_musa.py sdist bdist_wheel
- name: Rename MUSA wheels
run: |
bash scripts/ci/musa/rename_wheels_musa.sh ${{ matrix.musa-version }} sgl-kernel/dist
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-musa${{ matrix.musa-version }}
path: sgl-kernel/dist/*
release-musa43:
needs: build-musa43
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-*
- name: Set tag name
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
fi
- name: Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
repository: sgl-project/whl
token: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
files: |
sgl-kernel/dist/*
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
env:
WHL_TOKEN: ${{ secrets.GH_PAT_FOR_WHL_RELEASE }}
- name: Update wheel index
run: python3 scripts/update_kernel_whl_index.py --musa 43
- name: Push wheel index
run: |
cd sgl-whl
git config --local user.name "sglang-bot"
git config --local user.email "sglangbot@gmail.com"
git add -A
git commit -m "update whl index"
git push
rerun-ut perms .github/workflows/rerun-ut.yml
View raw YAML
name: Rerun UT
run-name: ${{ inputs.pr_head_sha && format('[rerun-ut] {0} {1}', inputs.test_command, inputs.pr_head_sha) || format('[rerun-ut] {0}', inputs.test_command) }}
on:
workflow_dispatch:
inputs:
test_command:
description: "Test command to run (e.g. 'registered/core/test_srt_endpoint.py TestSRTEndpoint.test_simple_decode')"
required: true
type: string
runner_label:
description: "Runner label"
required: true
type: choice
options:
- 1-gpu-h100
- 1-gpu-5090
- 2-gpu-h100
- 4-gpu-h100
- 4-gpu-a10
- 4-gpu-b200
- 8-gpu-h200
- 8-gpu-h20
- 8-gpu-b200
pr_head_sha:
description: "PR head SHA to checkout (for /rerun-ut on fork PRs)"
required: false
type: string
default: ""
use_deepep:
description: "Use ci_install_deepep.sh instead of ci_install_dependency.sh"
required: false
type: string
default: "false"
env:
SGLANG_IS_IN_CI: true
SGLANG_CUDA_COREDUMP: "1"
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
permissions:
actions: write
contents: read
issues: read
jobs:
rerun-ut-cuda:
runs-on: ${{ inputs.runner_label }}
timeout-minutes: 120
env:
RUNNER_LABELS: ${{ inputs.runner_label }}
SGLANG_CI_RDMA_ALL_DEVICES: ${{ inputs.runner_label == '8-gpu-h20' && 'mlx5_1,mlx5_2,mlx5_3,mlx5_4' || '' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || github.sha }}
- uses: ./.github/actions/check-maintenance
- name: Install dependencies
timeout-minutes: 20
run: |
if [[ "${{ inputs.runner_label }}" == "1-gpu-5090" ]]; then
source /etc/profile.d/sglang-ci.sh
fi
if [[ "${{ inputs.use_deepep }}" == "true" ]]; then
bash scripts/ci/cuda/ci_install_deepep.sh
else
bash scripts/ci/cuda/ci_install_dependency.sh
fi
- name: Run test
timeout-minutes: 60
run: |
if [[ "${{ inputs.runner_label }}" == "1-gpu-5090" ]]; then
source /etc/profile.d/sglang-ci.sh
fi
cd test/
python3 ${{ inputs.test_command }}
- uses: ./.github/actions/upload-cuda-coredumps
if: always()
retag-docker .github/workflows/retag-docker.yml
View raw YAML
name: Retag Docker Image
on:
workflow_dispatch:
inputs:
source_tag:
description: "Existing image tag (e.g., v0.4.7-cu129-amd64)"
required: true
target_tag:
description: "New tag to apply (e.g., latest)"
required: true
jobs:
retag:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-22.04
environment: "prod"
steps:
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Retag image
run: |
echo "Retagging lmsysorg/sglang:${{ inputs.source_tag }} -> lmsysorg/sglang:${{ inputs.target_tag }}"
docker buildx imagetools create \
-t lmsysorg/sglang:${{ inputs.target_tag }} \
lmsysorg/sglang:${{ inputs.source_tag }}
runner-utilization .github/workflows/runner-utilization.yml
View raw YAML
name: Runner Utilization Report
on:
schedule:
- cron: '0 8 * * *' # Daily at 8 AM UTC
pull_request:
paths:
- '.github/workflows/runner-utilization.yml'
- 'scripts/ci/utils/runner_utilization_report.py'
workflow_dispatch:
inputs:
hours:
description: 'Time window in hours'
required: false
default: '24'
type: string
filter:
description: 'Filter runner labels (e.g., 5090, h200)'
required: false
type: string
jobs:
report:
name: Generate Report
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Generate Utilization Report
timeout-minutes: 30
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python scripts/ci/utils/runner_utilization_report.py \
--repo ${{ github.repository }} \
--hours ${{ inputs.hours || '24' }} \
${{ inputs.filter && format('--filter {0}', inputs.filter) || '' }}
slash-command-handler perms .github/workflows/slash-command-handler.yml
View raw YAML
name: Slash Command Handler
on:
issue_comment:
types: [created, edited]
permissions:
contents: read
pull-requests: write # Required to add labels and reactions
actions: write # Required to rerun workflows
issues: write # Required for comment reactions in some contexts
jobs:
slash_command:
# Only run if it is a PR and the comment contains a recognized command
# Use contains() since startsWith() can't handle leading whitespace/newlines
if: >
github.event.issue.pull_request &&
(contains(github.event.comment.body, '/tag-run-ci-label') ||
contains(github.event.comment.body, '/rerun-failed-ci') ||
contains(github.event.comment.body, '/tag-and-rerun-ci') ||
contains(github.event.comment.body, '/rerun-stage') ||
contains(github.event.comment.body, '/rerun-ut'))
runs-on: ubuntu-latest
steps:
# SECURITY: This workflow runs on issue_comment trigger with elevated permissions
# (pull-requests: write, actions: write). For non-fork PRs, we can safely checkout
# the PR branch to allow testing changes to this handler. For fork PRs, we MUST
# stay on main to prevent untrusted code execution with these elevated permissions.
- name: Get PR details
id: pr
shell: bash
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
PR_DATA=$(gh pr view ${{ github.event.issue.number }} --repo ${{ github.repository }} --json headRefName,headRepositoryOwner) || {
echo "::error::Failed to fetch PR data"
exit 1
}
# Use 'empty' filter to handle null/missing values (e.g., deleted forks)
HEAD_OWNER=$(echo "$PR_DATA" | jq -r '.headRepositoryOwner.login // empty')
REPO_OWNER="${{ github.repository_owner }}"
# Treat missing/null owner as fork for security (fail-safe)
if [[ -z "$HEAD_OWNER" || "$HEAD_OWNER" != "$REPO_OWNER" ]]; then
IS_FORK="true"
else
IS_FORK="false"
fi
echo "is_fork=$IS_FORK" >> $GITHUB_OUTPUT
echo "ref=$(echo "$PR_DATA" | jq -r '.headRefName')" >> $GITHUB_OUTPUT
echo "PR owner: $HEAD_OWNER, Repo owner: $REPO_OWNER, Is fork: $IS_FORK"
- name: Checkout code
uses: actions/checkout@v4
with:
# For non-fork PRs, checkout PR branch to allow testing handler changes
# For fork PRs, stay on main for security (don't run untrusted code with elevated permissions)
ref: ${{ steps.pr.outputs.is_fork == 'false' && steps.pr.outputs.ref || '' }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: |
pip install PyGithub
- name: Handle Slash Command
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_FULL_NAME: ${{ github.repository }}
PR_NUMBER: ${{ github.event.issue.number }}
COMMENT_ID: ${{ github.event.comment.id }}
COMMENT_BODY: ${{ github.event.comment.body }}
USER_LOGIN: ${{ github.event.comment.user.login }}
run: |
python scripts/ci/utils/slash_command_handler.py
stress-test .github/workflows/stress-test.yml
View raw YAML
name: Stress Test
on:
workflow_dispatch:
inputs:
num_prompts:
description: 'Number of prompts per model'
required: true
default: '50000'
type: string
duration_minutes:
description: 'Timeout per model in minutes'
required: true
default: '45'
type: string
jobs:
stress-test:
if: github.repository == 'sgl-project/sglang'
runs-on: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run stress tests
timeout-minutes: 210
env:
NUM_PROMPTS: ${{ inputs.num_prompts }}
DURATION_MINUTES: ${{ inputs.duration_minutes }}
run: |
cd test
python3 run_suite.py --hw cuda --suite stress
- name: Upload results
if: always()
uses: actions/upload-artifact@v4
with:
name: stress-test-results
path: |
stress_test_*.jsonl
weekly-test-nvidia .github/workflows/weekly-test-nvidia.yml
View raw YAML
name: Weekly Test (Nvidia)
on:
schedule:
- cron: '0 0 * * 0' # Run every Sunday at midnight UTC
workflow_dispatch:
inputs:
job_filter:
description: 'Select which job to run (leave empty or "all" to run all jobs)'
required: false
type: choice
default: 'all'
options:
- 'all'
- 'weekly-test-8-gpu-h200'
concurrency:
group: weekly-test-nvidia-${{ github.ref }}
cancel-in-progress: true
env:
SGLANG_IS_IN_CI: true
HF_HUB_DOWNLOAD_TIMEOUT: 300
HF_HUB_ETAG_TIMEOUT: 300
jobs:
# Weekly tests - 8 GPU H200
weekly-test-8-gpu-h200:
if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'weekly-test-8-gpu-h200')
runs-on: 8-gpu-h200
timeout-minutes: 120
env:
RUNNER_LABELS: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run weekly 8-GPU H200 tests
timeout-minutes: 120
env:
GPU_CONFIG: "8-gpu-h200"
IS_H200: "1"
run: |
cd test
python3 run_suite.py --hw cuda --suite weekly-8-gpu-h200 --nightly --continue-on-error --timeout-per-file 7200