InternLM/lmdeploy

20 workflows · maturity 50% · 8 patterns · GitHub ↗

Security 5/100

Practices

✓ Matrix✓ Permissions○ Security scan○ AI review○ Cache✓ Concurrency○ Reusable workflows

Detected patterns

cross-version-compat flaky-test-retry hardware-matrix least-privilege-permissions multi-channel-release multi-stage-release per-sample-ci performance-tracking

Security dimensions

permissions

security scan

supply chain

secret handling

harden runner

Workflows (20)

api_eval matrix .github/workflows/api_eval.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest, self-hosted, linux-a100, self-hosted, linux-a100

Jobs

linux-build, download_pkgs, test_evaluation

Matrix

backend, gpu_num, pyver, transformers→ , ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}, gpu_num_1, gpu_num_2, gpu_num_4, gpu_num_8, legacy, py310

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
chmod -R 777 ${{env.TEST_CODE_PATH}} mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
cp -r ${{env.TEST_CODE_PATH}}/. . mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt

View raw YAML

name: api_eval

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM/lmdeploy'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      backend:
        required: true
        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch']"
      execution_mode:
        required: false
        description: 'Select execution mode: infer, eval, or both. Default is "both"'
        type: choice
        options:
          - both
          - infer
          - eval
        default: 'both'
      run_id:
        required: false
        description: 'Set custom run ID. If not provided, github.run_id will be used'
        type: string
        default: ''
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false

env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  REPORT_DIR: /nvme/qa_test_models/evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache
  HF_DATASETS_OFFLINE: 1
  HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets
  HF_HUB_OFFLINE: 1
  HF_EVALUATE_OFFLINE: 1
  RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}

jobs:
  linux-build:
    if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
      OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Mark as start
        run: |
          chmod -R 777 ${{env.TEST_CODE_PATH}}
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

  test_evaluation:
    needs: download_pkgs
    if: ${{ !cancelled() }}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 7200
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
        transformers: ["", "legacy"]
    env:
      TEST_ENV: ${{ matrix.transformers }}
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/github-actions/resources:/root/resources
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Install opencompass
        run: |
          git clone https://github.com/open-compass/opencompass.git --depth 1
          cd opencompass
          python3 -m pip install .
          python3 -m pip install langdetect
      - name: Downgrade transformers
        if: ${{matrix.transformers == 'legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Setup paths for evaluation
        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
        run: |
          overall_exit=0
          ln -s /mnt/104/opencompass-data/data ./data
          ln -s /nvme/qa_test_models/resource/nltk_data /usr/share/nltk_data
          execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"
          ulimit -n 65535
          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then
            pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
          fi
          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then
            pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
          fi
          exit $overall_exit
      - name: Clear workspace
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.REPORT_DIR}}
          export workdir=$(pwd)
          rm -rf $workdir/*

benchmark matrix .github/workflows/benchmark.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest, self-hosted, linux-a100, self-hosted, linux-a100

Jobs

linux-build, download_pkgs, benchmark

Matrix

benchmark_type, gpu_num, include, include.gpu_num, include.n, pyver, transformers→ , ${{fromJSON(github.event.inputs.benchmark_type)}}, 1, 2, 4, 8, gpu_num_1, gpu_num_2, gpu_num_4, gpu_num_8, legacy, py310

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
chmod -R 777 ${{env.TEST_CODE_PATH}} mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
cp -r ${{env.TEST_CODE_PATH}}/. . mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt

View raw YAML

name: benchmark_test

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      benchmark_type:
        required: true
        description: 'Set benchmark type. Default is "["longtext", "throughput", "api_server", "prefixcache"]"'
        type: string
        default: "['apiserver', 'mllm_apiserver', 'throughput', 'longtext', 'prefixcache']"
      backend:
        required: true
        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch']"
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false

env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
  REPORT_DIR: /nvme/qa_test_models/benchmark_report/${{ inputs.repo_ref }}_${{ github.run_id }}
  ALLURE_REPORT_DIR: /nvme/qa_test_models/benchmark_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}

jobs:
  linux-build:
    if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}

  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Mark as start
        run: |
          chmod -R 777 ${{env.TEST_CODE_PATH}}
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

  benchmark:
    needs: download_pkgs
    if: ${{github.event_name == 'schedule' || !cancelled()}}
    runs-on: [self-hosted, linux-a100]
    strategy:
      fail-fast: false
      matrix:
        benchmark_type: ${{fromJSON(github.event.inputs.benchmark_type)}}
        gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
        transformers: ["", "legacy"]
        include:
          - n: 8
            gpu_num: gpu_num_1
          - n: 4
            gpu_num: gpu_num_2
          - n: 2
            gpu_num: gpu_num_4
          - n: 1
            gpu_num: gpu_num_8
    env:
      TEST_ENV: ${{ matrix.transformers }}
    timeout-minutes: 480
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Downgrade transformers
        if: ${{matrix.transformers == 'legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
      - name: Run other benchmark - all
        if: contains(fromJson(github.event.inputs.backend), 'turbomind') && contains(fromJson(github.event.inputs.backend), 'pytorch')
        run: |
            pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function' --alluredir=${{env.ALLURE_REPORT_DIR}}
      - name: Run other benchmark - turbomind
        if: contains(fromJson(github.event.inputs.backend), 'turbomind') && !contains(fromJson(github.event.inputs.backend), 'pytorch')
        run: |
            pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and turbomind' --alluredir=${{env.ALLURE_REPORT_DIR}}
      - name: Run other benchmark - pytorch
        if: contains(fromJson(github.event.inputs.backend), 'pytorch') && !contains(fromJson(github.event.inputs.backend), 'turbomind')
        run: |
            pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and pytorch' --alluredir=${{env.ALLURE_REPORT_DIR}}
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

cuda12.8_whl_release matrix perms .github/workflows/cuda12.8_whl_release.yml

Triggers

push, workflow_dispatch

Runs on

ubuntu-latest, windows-latest, ubuntu-latest

Jobs

linux-build, windows-build, publish

Matrix

pyver→ 3.10, 3.11, 3.12, 3.13, py310, py311, py312, py313

Actions

jlumbroso/free-disk-space, softprops/action-gh-release

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
git config --global core.longpaths true
pip install build change-wheel-version
./builder/windows/setup_cuda.ps1
python -m build --wheel -o build/wheel Get-ChildItem -Path "build" -Filter "*.whl" | ForEach-Object { change_wheel_version $_.FullName --local-version cu128 --delete-old-wheel }
ver=$(cat lmdeploy/version.py | grep '__version__ =' | cut -d\' -f2) cuver=$ver+cu128 ls -lh cd artifact for file in *; do mv "$file" "`echo $file | sed "s/$ver/$cuver/g"`"; done
ls artifact/ -lh

View raw YAML

name: cuda12.8-whl-release

on:
  push:
    tags:
      - '*'
  workflow_dispatch:

permissions:
  contents: write

jobs:
  linux-build:
    strategy:
      matrix:
        pyver: [py310, py311, py312, py313]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
      OUTPUT_FOLDER: cuda12.8_dist
      CUDA_VER: 12.8
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}/*
          retention-days: 1
          name: linux-${{ matrix.pyver }}

  windows-build:
    strategy:
      matrix:
        pyver: ['3.10', '3.11', '3.12', '3.13']
    runs-on: windows-latest
    steps:
      - name: Set git for windows
        run: |
          git config --global core.longpaths true
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Set up python
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.pyver }}
      - name: Install python packages
        run: |
          pip install build change-wheel-version
      - name: Setup CUDA Toolkit
        id: cuda-toolkit
        shell: pwsh
        run: ./builder/windows/setup_cuda.ps1
        env:
            INPUT_CUDA_VERSION: '12.8.1'
      - name: Build wheel
        run: |
          python -m build --wheel -o build/wheel
          Get-ChildItem -Path "build" -Filter "*.whl" | ForEach-Object { change_wheel_version $_.FullName --local-version cu128 --delete-old-wheel }
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: build/wheel/*
          retention-days: 1
          name: windows-${{ matrix.pyver }}

  publish:
    runs-on: ubuntu-latest
    environment: 'prod'
    needs:
      - linux-build
      - windows-build
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: artifact
          merge-multiple: true
      - name: Add cuda version to package name
        run: |
          ver=$(cat lmdeploy/version.py | grep '__version__ =' | cut -d\' -f2)
          cuver=$ver+cu128
          ls -lh
          cd artifact
          for file in *; do
            mv "$file" "`echo $file | sed "s/$ver/$cuver/g"`";
          done
      - name: Display artifacts
        run: ls artifact/ -lh
      - name: Publish
        uses: softprops/action-gh-release@v1
        if: startsWith(github.ref, 'refs/tags/')
        with:
          files: artifact/*

daily_ete_test matrix .github/workflows/daily_ete_test.yml

Triggers

workflow_dispatch, schedule

Runs on

ubuntu-latest, self-hosted, linux-a100, self-hosted, linux-a100, self-hosted, linux-a100, self-hosted, linux-a100, self-hosted, linux-a100, self-hosted, linux-a100, self-hosted, linux-a100, self-hosted, linux-a100, self-hosted, linux-a100

Jobs

linux-build, download_pkgs, test_quantization, test_tools, test_restful, test_pipeline, test_benchmark, test_restful_legacy, test_pipeline_legacy, get_coverage_report

Matrix

backend, exclude, exclude.backend, exclude.function, exclude.model, function, include, include.backend, include.case_info, include.extra, include.function, include.generate_type, include.model, include.model_path, include.tp, model, model_path, pyver, transformers→ , ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}, ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}, ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}, --logprobs-mode raw_logprobs, --logprobs-mode raw_logprobs --enable-return-routed-experts, 1, 2, 8, Intern-S1, InternVL3-38B, InternVL3_5-30B-A3B, OpenGVLab/InternVL3-38B, OpenGVLab/InternVL3_5-30B-A3B, Qwen/Qwen3-30B-A3B, Qwen/Qwen3-32B, Qwen/Qwen3-8B-Base, Qwen/Qwen3-VL-30B-A3B-Instruct, Qwen/Qwen3-VL-8B-Instruct, Qwen3-30B-A3B, Qwen3-32B, Qwen3-8B-Base, Qwen3-VL-30B-A3B-Instruct, Qwen3-VL-8B-Instruct, all, base, chat, chat_completions_v1, completions_v1, generate, internlm/Intern-S1, legacy, llm, logprob, mllm, other, py310, pytorch, turbomind

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
chmod -R 777 ${{env.TEST_CODE_PATH}} mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
cp -r ${{env.TEST_CODE_PATH}}/. . mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
python3 -m pip install auto_gptq matplotlib attrdict python3 -m pip install -r requirements/lite.txt

View raw YAML

name: daily_ete_test

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      backend:
        required: true
        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch']"
      model:
        required: true
        description: 'Set testcase module filter: llm, mllm. Default contains all models'
        type: string
        default: "['llm','mllm']"
      function:
        required: true
        description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
        type: string
        default: '["pipeline", "restful", "chat"]'
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false
      regression_func:
        required: true
        description: 'regression functions'
        type: string
        default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']"
  schedule:
    - cron:  '00 14 * * 0-4'

env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  ROOT_DIR: /nvme/qa_test_models
  REPORT_DIR: /nvme/qa_test_models/test-reports/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
  DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
  RUN_ID: ${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}

jobs:
  linux-build:
    if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Mark as start
        run: |
          chmod -R 777 ${{env.TEST_CODE_PATH}}
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

  test_quantization:
    needs: download_pkgs
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 150
    strategy:
      matrix:
        transformers: ["", "legacy"]
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
      TEST_ENV: ${{ matrix.transformers }}
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install auto_gptq matplotlib attrdict
          python3 -m pip install -r requirements/lite.txt
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
          rm -rf ${{env.DEEPSEEK_VL}}/build
      - name: Check env
        run: |
          pip install transformers==4.57.6
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - quantization w4a16
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
        run: |
          pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - quantization w8a8
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
        run: |
          pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_tools:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    timeout-minutes: 300
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
        transformers: ["", "legacy"]
        function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
        exclude:
          - backend: turbomind
            model: mllm
            function: chat
          - backend: pytorch
            model: mllm
            function: chat
        include:
          - backend: turbomind
            model: llm
            function: other
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
      TEST_ENV: ${{ matrix.transformers }}
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
          rm -rf ${{env.DEEPSEEK_VL}}/build
      - name: Downgrade transformers
        if: ${{matrix.transformers == 'legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          cp -r /nvme/qa_test_models/offline_pkg/lora .
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - chat
        continue-on-error: true
        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
        run: |
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - pipeline
        continue-on-error: true
        if: matrix.function == 'pipeline'
        run: |
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - restful
        continue-on-error: true
        if: matrix.function == 'restful'
        run: |
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - local testcase
        if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'other'
        run: |
          pytest autotest/toolchain --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_restful:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        model_path: ['Qwen/Qwen3-8B-Base', 'Qwen/Qwen3-30B-A3B', 'Qwen/Qwen3-32B', 'OpenGVLab/InternVL3_5-30B-A3B', 'OpenGVLab/InternVL3-38B', 'Qwen/Qwen3-VL-8B-Instruct', 'Qwen/Qwen3-VL-30B-A3B-Instruct']
        include:
          - tp: 2
            model: Qwen3-8B-Base
            model_path: Qwen/Qwen3-8B-Base
            case_info: ['completions_v1']
            generate_type: base
          - tp: 2
            model: Qwen3-30B-A3B
            model_path: Qwen/Qwen3-30B-A3B
            case_info: ['chat_completions_v1', 'generate']
            generate_type: all
            extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
            backend: pytorch
          - tp: 2
            model: Qwen3-30B-A3B
            model_path: Qwen/Qwen3-30B-A3B
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
            backend: turbomind
          - tp: 2
            model: InternVL3_5-30B-A3B
            model_path: OpenGVLab/InternVL3_5-30B-A3B
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
            backend: pytorch
          - tp: 2
            model: InternVL3_5-30B-A3B
            model_path: OpenGVLab/InternVL3_5-30B-A3B
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
            backend: turbomind
          - tp: 2
            model: Qwen3-VL-30B-A3B-Instruct
            model_path: Qwen/Qwen3-VL-30B-A3B-Instruct
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
            backend: pytorch
          - tp: 2
            model: Qwen3-VL-30B-A3B-Instruct
            model_path: Qwen/Qwen3-VL-30B-A3B-Instruct
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
            backend: turbomind
          - tp: 2
            model: Qwen3-32B
            model_path: Qwen/Qwen3-32B
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
          - tp: 1
            model: Qwen3-VL-8B-Instruct
            model_path: Qwen/Qwen3-VL-8B-Instruct
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
            backend: pytorch
          - tp: 1
            model: Qwen3-VL-8B-Instruct
            model_path: Qwen/Qwen3-VL-8B-Instruct
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
            backend: turbomind
          - tp: 2
            model: InternVL3-38B
            model_path: OpenGVLab/InternVL3-38B
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
    timeout-minutes: 60
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Start restful api
        run: |
          lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} --allow-terminate-by-client > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 240)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              echo "health check success"
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          exit 1
      - name: Test lmdeploy - chat_completions_v1
        if:  matrix.model != 'internlm2_5-20b-chat' && matrix.model != 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - chat_completions_v1
        if: matrix.model == 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - chat_completions_v1 - internlm2_5-20b-chat
        if:  matrix.model == 'internlm2_5-20b-chat' && contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - completions_v1 - internlm2_5-20b
        if: matrix.model == 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - completions_v1 - other
        if: matrix.model != 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - base
        if:  matrix.generate_type == 'base' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not logprob and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - logprob
        if:  matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - all
        if:  matrix.generate_type == 'all' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Kill api server
        if: always()
        run: |
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_pipeline:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    timeout-minutes: 240
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
          rm -rf ${{env.DEEPSEEK_VL}}/build
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - interface pipeline case
        run: |
          pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_8 and not pr_test' -n 1 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir


  test_benchmark:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    timeout-minutes: 120
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
          rm -rf ${{env.DEEPSEEK_VL}}/build
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test benchmark script
        run: |
          pytest autotest/benchmark -n 4 -m function --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir


  test_restful_legacy:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        model_path: ['internlm/Intern-S1']
        include:
          - tp: 8
            model: Intern-S1
            model_path: internlm/Intern-S1
            case_info: ['chat_completions_v1', 'generate']
            generate_type: base
    timeout-minutes: 60
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Check env
        run: |
          pip install transformers==4.57.6
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Start restful api
        run: |
          lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} --allow-terminate-by-client > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 240)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              echo "health check success"
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          exit 1
      - name: Test lmdeploy - chat_completions_v1
        if:  matrix.model != 'internlm2_5-20b-chat' && matrix.model != 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - chat_completions_v1
        if: matrix.model == 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - chat_completions_v1 - internlm2_5-20b-chat
        if:  matrix.model == 'internlm2_5-20b-chat' && contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - completions_v1 - internlm2_5-20b
        if: matrix.model == 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - completions_v1 - other
        if: matrix.model != 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - base
        if:  matrix.generate_type == 'base' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not logprob and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - logprob
        if:  matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - all
        if:  matrix.generate_type == 'all' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Kill api server
        if: always()
        run: |
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_pipeline_legacy:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    timeout-minutes: 240
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
          rm -rf ${{env.DEEPSEEK_VL}}/build
      - name: Check env
        run: |
          pip install transformers==4.57.6
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - interface pipeline case
        run: |
          pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_8 and not pr_test' -n 1 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  get_coverage_report:
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    needs: [test_tools, test_restful, test_pipeline, test_benchmark]
    timeout-minutes: 5
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Get coverage report
        run: |
          pip install coverage
          coverage combine ${{env.REPORT_DIR}}
          coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
          coverage report -m
          mv .coverage ${{env.REPORT_DIR}}/.coverage
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 ${{env.ROOT_DIR}}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

daily_ete_test_3090 matrix .github/workflows/daily_ete_test_3090.yml

Triggers

workflow_dispatch, schedule

Runs on

ubuntu-latest, self-hosted, 3090-r1, self-hosted, 3090-r1, self-hosted, 3090-r1, self-hosted, 3090-r1, self-hosted, 3090-r1

Jobs

linux-build, download_pkgs, test_quantization, test_tools, test_restful, get_coverage_report

Matrix

backend, exclude, exclude.backend, exclude.function, exclude.model, function, include, include.case_info, include.extra, include.generate_type, include.model, include.model_path, include.tp, model, model_path, pyver, transformers→ ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}, ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}, ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}, --logprobs-mode raw_logprobs, 1, 3090, 3090_legacy, Qwen/Qwen3-8B, Qwen3-8B, base, chat, chat_completions_v1, completions_v1, generate, internlm/internlm3-8b-instruct, internlm3-8b-instruct, logprob, mllm, py310, pytorch, turbomind

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
cp -r ${{env.TEST_CODE_PATH}}/. . mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
python3 -m pip install auto_gptq matplotlib python3 -m pip install -r requirements/lite.txt

View raw YAML

name: daily_ete_test_3090

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      backend:
        required: true
        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch']"
      model:
        required: true
        description: 'Set testcase module filter: llm, mllm. Default contains all models'
        type: string
        default: "['llm','mllm']"
      function:
        required: true
        description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
        type: string
        default: '["pipeline", "restful", "chat"]'
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false
      regression_func:
        required: true
        description: 'regression functions'
        type: string
        default: "['quant', 'tools', 'restful']"
  schedule:
    - cron:  '00 14 * * 0-4'

env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  REPORT_DIR: /nvme/qa_test_models/test-reports/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
  FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
  RUN_ID: ${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}

jobs:
  linux-build:
    if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.4
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, 3090-r1]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu12
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /data1:/data1
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Mark as start
        run: |
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

  test_quantization:
    needs: download_pkgs
    if: ${{!cancelled() && contains(needs.download_pkgs.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
    runs-on: [self-hosted, 3090-r1]
    timeout-minutes: 150
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
      TEST_ENV: 3090_legacy
    container:
      image: openmmlab/lmdeploy:latest-cu12
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /data1:/data1
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install auto_gptq matplotlib
          python3 -m pip install -r requirements/lite.txt
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Check env
        run: |
          python3 -m pip list
          pip install transformers==4.57.6
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - quantization w4a16
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
        run: |
          pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - quantization w8a8
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
        run: |
          pytest autotest/tools/quantization/test_quantization_w8a8.py --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_tools:
    if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
    runs-on: [self-hosted, 3090-r1]
    needs: test_quantization
    timeout-minutes: 300
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        transformers: ["3090", "3090_legacy"]
        model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
        function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
        exclude:
          - backend: turbomind
            model: mllm
            function: chat
          - backend: pytorch
            model: mllm
            function: chat
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
      TEST_ENV: ${{matrix.transformers}}
    container:
      image: openmmlab/lmdeploy:latest-cu12
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /data1:/data1
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Downgrade transformers
        if: ${{matrix.transformers == '3090_legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - chat
        continue-on-error: true
        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
        run: |
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
      - name: Test lmdeploy - pipeline
        continue-on-error: true
        if: matrix.function == 'pipeline'
        run: |
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
      - name: Test lmdeploy - restful
        continue-on-error: true
        if: matrix.function == 'restful'
        run: |
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_restful:
    if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
    runs-on: [self-hosted, 3090-r1]
    needs: test_quantization
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        transformers: ["3090", "3090_legacy"]
        model_path: ['internlm/internlm3-8b-instruct', 'Qwen/Qwen3-8B']
        include:
          - tp: 1
            model: internlm3-8b-instruct
            model_path: internlm/internlm3-8b-instruct
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
          - tp: 1
            model: Qwen3-8B
            model_path: Qwen/Qwen3-8B
            case_info: ['completions_v1']
            generate_type: base
    timeout-minutes: 60
    container:
      image: openmmlab/lmdeploy:latest-cu12
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    env:
      TEST_ENV: ${{matrix.transformers}}
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Downgrade transformers
        if: ${{matrix.transformers == '3090_legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Start restful api
        run: |
          lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
          echo "restful_pid=$!" >> "$GITHUB_ENV"
          for i in $(seq 1 180)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              echo "health check success"
              exit 0
            fi
          done

          echo "health check fail"
          kill -15 $restful_pid 2>/dev/null || true
          exit 1
      - name: Test lmdeploy - chat_completions_v1
        if:  contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - completions_v1 - other
        if: contains(matrix.case_info, 'completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - logprob
        if:  matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Kill api server
        if: always()
        run: |
          kill -15 "$restful_pid"
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  get_coverage_report:
    if: ${{!cancelled()}}
    runs-on: [self-hosted, 3090-r1]
    needs: [test_tools, test_restful]
    timeout-minutes: 5
    container:
      image: openmmlab/lmdeploy:latest-cu12
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Get coverage report
        run: |
          pip install coverage
          coverage combine ${{env.REPORT_DIR}}
          coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
          coverage report -m
          mv .coverage ${{env.REPORT_DIR}}/.coverage
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

daily_ete_test_5080 matrix .github/workflows/daily_ete_test_5080.yml

Triggers

workflow_dispatch, schedule

Runs on

ubuntu-latest, self-hosted, 5080-r1, self-hosted, 5080-r1, self-hosted, 5080-r1, self-hosted, 5080-r1, self-hosted, 5080-r1

Jobs

linux-build, download_pkgs, test_quantization, test_tools, test_restful, get_coverage_report

Matrix

backend, exclude, exclude.backend, exclude.function, exclude.model, function, include, include.case_info, include.extra, include.generate_type, include.model, include.model_path, include.tp, model, model_path, pyver, transformers→ ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}, ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}, ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}, --logprobs-mode raw_logprobs, 1, 5080, 5080_legacy, Llama-3.2-3B-Instruct, Qwen/Qwen3-4B, Qwen3-4B, base, chat, chat_completions_v1, completions_v1, generate, logprob, meta-llama/Llama-3.2-3B-Instruct, mllm, py310, pytorch, turbomind

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
cp -r ${{env.TEST_CODE_PATH}}/. . mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
python3 -m pip install auto_gptq matplotlib python3 -m pip install -r requirements/lite.txt

View raw YAML

name: daily_ete_test_5080

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      backend:
        required: true
        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch']"
      model:
        required: true
        description: 'Set testcase module filter: llm, mllm. Default contains all models'
        type: string
        default: "['llm','mllm']"
      function:
        required: true
        description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
        type: string
        default: '["pipeline", "restful", "chat"]'
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false
      regression_func:
        required: true
        description: 'regression functions'
        type: string
        default: "['quant', 'tools', 'restful']"
  schedule:
    - cron:  '00 14 * * 0-4'

env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  REPORT_DIR: /nvme/qa_test_models/test-reports/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
  FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
  RUN_ID: ${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}

jobs:
  linux-build:
    if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, 5080-r1]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/3090:/mnt/3090
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Mark as start
        run: |
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

  test_quantization:
    needs: download_pkgs
    if: ${{!cancelled() && contains(needs.download_pkgs.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
    runs-on: [self-hosted, 5080-r1]
    timeout-minutes: 150
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
      TEST_ENV: 5080
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/3090:/mnt/3090
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install auto_gptq matplotlib
          python3 -m pip install -r requirements/lite.txt
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Check env
        run: |
          for i in $(seq 1 10); do
            output=$(lmdeploy check_env 2>&1)
            if echo "$output" | grep -q "CUDA available: False"; then
              echo "CUDA not available (attempt $i/10), retrying in 5 seconds..."
              sleep 5
            else
              echo "CUDA check passed"
              break
            fi
          done
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - quantization w4a16
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
        run: |
          pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - quantization w8a8
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
        run: |
          pytest autotest/tools/quantization/test_quantization_w8a8.py --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_tools:
    if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
    runs-on: [self-hosted, 5080-r1]
    needs: test_quantization
    timeout-minutes: 300
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
        transformers: ["5080", "5080_legacy"]
        function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
        exclude:
          - backend: turbomind
            model: mllm
            function: chat
          - backend: pytorch
            model: mllm
            function: chat
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
      TEST_ENV: ${{ matrix.transformers }}
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/3090:/mnt/3090
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Downgrade transformers
        if: ${{matrix.transformers == '5080_legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          for i in $(seq 1 10); do
            output=$(lmdeploy check_env 2>&1)
            if echo "$output" | grep -q "CUDA available: False"; then
              echo "CUDA not available (attempt $i/10), retrying in 5 seconds..."
              sleep 5
            else
              echo "CUDA check passed"
              break
            fi
          done
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - chat
        continue-on-error: true
        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
        run: |
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
      - name: Test lmdeploy - pipeline
        continue-on-error: true
        if: matrix.function == 'pipeline'
        run: |
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
      - name: Test lmdeploy - restful
        continue-on-error: true
        if: matrix.function == 'restful'
        run: |
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_restful:
    if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
    runs-on: [self-hosted, 5080-r1]
    needs: test_quantization
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        model_path: ['meta-llama/Llama-3.2-3B-Instruct', 'Qwen/Qwen3-4B']
        transformers: ["5080", "5080_legacy"]
        include:
          - tp: 1
            model: Llama-3.2-3B-Instruct
            model_path: meta-llama/Llama-3.2-3B-Instruct
            case_info: ['chat_completions_v1', 'generate']
            generate_type: logprob
            extra: '--logprobs-mode raw_logprobs'
          - tp: 1
            model: Qwen3-4B
            model_path: Qwen/Qwen3-4B
            case_info: ['completions_v1']
            generate_type: base
    timeout-minutes: 60
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/3090:/mnt/3090
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    env:
      TEST_ENV: ${{ matrix.transformers }}
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Downgrade transformers
        if: ${{matrix.transformers == '5080_legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          for i in $(seq 1 10); do
            output=$(lmdeploy check_env 2>&1)
            if echo "$output" | grep -q "CUDA available: False"; then
              echo "CUDA not available (attempt $i/10), retrying in 5 seconds..."
              sleep 5
            else
              echo "CUDA check passed"
              break
            fi
          done
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Start restful api
        run: |
          lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
          echo "restful_pid=$!" >> "$GITHUB_ENV"
          for i in $(seq 1 50)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              echo "health check success"
              exit 0
            fi
          done

          echo "health check fail"
          kill -15 $restful_pid 2>/dev/null || true
          exit 1
      - name: Test lmdeploy - chat_completions_v1
        if:  contains(matrix.case_info, 'chat_completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - completions_v1 - other
        if: contains(matrix.case_info, 'completions_v1')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test generate - logprob
        if:  matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
        timeout-minutes: 60
        run: |
          pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Kill api server
        if: always()
        run: |
          kill -15 "$restful_pid"
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  get_coverage_report:
    if: ${{!cancelled()}}
    runs-on: [self-hosted, 5080-r1]
    needs: [test_tools, test_restful]
    timeout-minutes: 5
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/3090:/mnt/3090
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Get coverage report
        run: |
          pip install coverage
          coverage combine ${{env.REPORT_DIR}}
          coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
          coverage report -m
          mv .coverage ${{env.REPORT_DIR}}/.coverage
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

docker matrix .github/workflows/docker.yml

Triggers

push, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest

Jobs

publish_docker_image, publish_ascend_docker_image

Matrix

cuda_version→ cu12, cu12.8

Actions

jlumbroso/free-disk-space, docker/login-action, jlumbroso/free-disk-space, docker/setup-qemu-action, docker/setup-buildx-action, docker/login-action

Commands

docker info # remove http extraheader git config --local --unset "http.https://github.com/.extraheader"
export TAG=$TAG_PREFIX:${{github.event.inputs.image_tag}}-${CUDA_VERSION} echo $TAG echo "TAG=${TAG}" >> $GITHUB_ENV
echo $TAG docker build . -f docker/Dockerfile -t ${TAG} --build-arg CUDA_VERSION=${CUDA_VERSION} docker push $TAG
export latest_TAG=${TAG_PREFIX}:latest echo $latest_TAG docker tag $TAG $latest_TAG docker push $latest_TAG
export RELEASE_TAG=${TAG_PREFIX}:${{github.ref_name}}-${CUDA_VERSION} echo $RELEASE_TAG docker tag $TAG $RELEASE_TAG docker push $RELEASE_TAG
docker info # remove http extraheader git config --local --unset "http.https://github.com/.extraheader"
export TAG=$TAG_PREFIX:${{github.event.inputs.image_tag}}-ascend echo $TAG echo "TAG=${TAG}" >> $GITHUB_ENV
echo $TAG docker build . -t ${TAG} -f docker/Dockerfile_ascend_a3 --platform linux/arm64 docker push $TAG

View raw YAML

name: publish-docker

on:
  push:
    paths-ignore:
      - "!.github/workflows/docker.yml"
      - ".github/**"
      - "docs/**"
      - "resources/**"
      - "benchmark/**"
      - "tests/**"
      - "**/*.md"
      - "autotest/**"
      - "builder/**"
      - "k8s/**"

    branches:
      - main
    tags:
      - "v*.*.*"
  workflow_dispatch:
    inputs:
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is ""'
        type: string
        default: 'main'
      image_tag:
        required: true
        description: 'Set docker image tag. Default is "latest"'
        type: string
        default: latest

jobs:
  publish_docker_image:
    runs-on: ubuntu-latest
    environment: 'prod'
    strategy:
      fail-fast: false
      matrix:
        cuda_version: ['cu12.8', 'cu12']
    env:
      CUDA_VERSION: ${{ matrix.cuda_version }}
      TAG_PREFIX: "openmmlab/lmdeploy"
      TAG: "openmmlab/lmdeploy:latest-${{matrix.cuda_version}}"
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{github.event.inputs.repo_ref}}
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Get docker info
        run: |
          docker info
          # remove http extraheader
          git config --local --unset "http.https://github.com/.extraheader"
      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Update docker TAG from workflow input
        if: github.event_name == 'workflow_dispatch'
        run: |
          export TAG=$TAG_PREFIX:${{github.event.inputs.image_tag}}-${CUDA_VERSION}
          echo $TAG
          echo "TAG=${TAG}" >> $GITHUB_ENV
      - name: Build and push Docker image
        run: |
          echo $TAG
          docker build . -f docker/Dockerfile -t ${TAG} --build-arg CUDA_VERSION=${CUDA_VERSION}
          docker push $TAG
      - name: Push Docker image as latest
        if: endsWith(env.TAG, 'latest-cu12') == true
        run: |
          export latest_TAG=${TAG_PREFIX}:latest
          echo $latest_TAG
          docker tag $TAG $latest_TAG
          docker push $latest_TAG
      - name: Push docker image with released tag
        if: startsWith(github.ref, 'refs/tags/') == true
        run: |
          export RELEASE_TAG=${TAG_PREFIX}:${{github.ref_name}}-${CUDA_VERSION}
          echo $RELEASE_TAG
          docker tag $TAG $RELEASE_TAG
          docker push $RELEASE_TAG

  publish_ascend_docker_image:
    runs-on: ubuntu-latest
    environment: 'prod'
    env:
      TAG_PREFIX: "openmmlab/lmdeploy"
      TAG: "openmmlab/lmdeploy:ascend"
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{github.event.inputs.repo_ref}}
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Get docker info
        run: |
          docker info
          # remove http extraheader
          git config --local --unset "http.https://github.com/.extraheader"
      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Update docker TAG from workflow input
        if: github.event_name == 'workflow_dispatch'
        run: |
          export TAG=$TAG_PREFIX:${{github.event.inputs.image_tag}}-ascend
          echo $TAG
          echo "TAG=${TAG}" >> $GITHUB_ENV
      - name: Build and push Docker image
        run: |
          echo $TAG
          docker build . -t ${TAG} -f docker/Dockerfile_ascend_a3 --platform linux/arm64
          docker push $TAG

docker-nightly .github/workflows/docker-nightly.yml

Triggers

workflow_dispatch, schedule

Runs on

ubuntu-latest, image-sync-inner

Jobs

publish_docker_image, publish_inner_docker_image

Actions

jlumbroso/free-disk-space, docker/login-action

Commands

docker info # remove http extraheader (tolerate missing key) git config --local --unset "http.https://github.com/.extraheader" || true
echo ${{ env.TAG }} docker build . -f docker/Dockerfile -t ${{ env.TAG }} --build-arg CUDA_VERSION=cu12.8 docker push ${{ env.TAG }} mkdir -p docker cat > docker/Dockerfile.nightly-extended << 'EOF' ARG BASE_IMAGE=${{ env.TAG }} FROM ${BASE_IMAGE} COPY requirements /tmp/requirements RUN apt-get update -y && \ apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \ apt-get clean -y && \ rm -rf /var/lib/apt/lists/* RUN python3 -m pip install --no-cache-dir -r /tmp/requirements/lite.txt && \ python3 -m pip install --no-cache-dir -r /tmp/requirements/test.txt && \ pip install --no-cache-dir ${{ env.OFFLINE_REQUIREMENTS }} EOF docker build . -f docker/Dockerfile.nightly-extended \ --build-arg BASE_IMAGE=${{ env.TAG }} \ -t ${{ env.DEV_TAG }} docker push ${{ env.DEV_TAG }}
docker pull ${{ env.DEV_TAG }} docker login ${{ env.INNER_REGISTRY }} -p ${{ secrets.CLUSTER_DOCKERHUB_TOKEN }} -u ${{ secrets.CLUSTER_DOCKERHUB_USERNAME }} docker tag ${{ env.DEV_TAG }} ${{ env.INNER_TAG }} docker push ${{ env.INNER_TAG }}

View raw YAML

name: publish-docker-nightly

on:
  workflow_dispatch:
    inputs:
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
  schedule:
    - cron:  '00 8 * * 0-4'

env:
  TAG: "openmmlab/lmdeploy:nightly-cu12.8"
  DEV_TAG: "openmmlab/lmdeploy:nightly-test-cu12.8"

jobs:
  publish_docker_image:
    runs-on: ubuntu-latest
    env:
      OFFLINE_REQUIREMENTS: ${{ vars.OFFLINE_REQUIREMENTS }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.inputs.repo_ref || 'main' }}
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Get docker info
        run: |
          docker info
          # remove http extraheader (tolerate missing key)
          git config --local --unset "http.https://github.com/.extraheader" || true
      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Build and push Docker image
        run: |
          echo ${{ env.TAG }}
          docker build . -f docker/Dockerfile -t ${{ env.TAG }} --build-arg CUDA_VERSION=cu12.8
          docker push ${{ env.TAG }}

          mkdir -p docker
          cat > docker/Dockerfile.nightly-extended << 'EOF'
          ARG BASE_IMAGE=${{ env.TAG }}
          FROM ${BASE_IMAGE}
          COPY requirements /tmp/requirements
          RUN apt-get update -y && \
              apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
              apt-get clean -y && \
              rm -rf /var/lib/apt/lists/*
          RUN python3 -m pip install --no-cache-dir -r /tmp/requirements/lite.txt && \
              python3 -m pip install --no-cache-dir -r /tmp/requirements/test.txt && \
              pip install --no-cache-dir ${{ env.OFFLINE_REQUIREMENTS }}
          EOF
          docker build . -f docker/Dockerfile.nightly-extended \
            --build-arg BASE_IMAGE=${{ env.TAG }} \
            -t ${{ env.DEV_TAG }}
          docker push ${{ env.DEV_TAG }}

  publish_inner_docker_image:
    runs-on: image-sync-inner
    needs: publish_docker_image
    env:
      INNER_REGISTRY: ${{ secrets.INNER_DOCKER_REGISTRY }}
      INNER_TAG: "${{ secrets.INNER_DOCKER_REGISTRY }}/ailab-puyu-puyu_gpu/lmdeploy:nightly-cu12.8"
    steps:
      - name: Pull and push to inner
        run: |
          docker pull ${{ env.DEV_TAG }}
          docker login ${{ env.INNER_REGISTRY }} -p ${{ secrets.CLUSTER_DOCKERHUB_TOKEN }} -u ${{ secrets.CLUSTER_DOCKERHUB_USERNAME }}
          docker tag ${{ env.DEV_TAG }} ${{ env.INNER_TAG }}
          docker push ${{ env.INNER_TAG }}

docker_dev .github/workflows/docker_dev.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest

Jobs

publish_dev_docker_image

Actions

jlumbroso/free-disk-space, docker/login-action

Commands

docker info # remove http extraheader git config --local --unset "http.https://github.com/.extraheader"
echo $TAG docker build . -f docker/Dockerfile_dev -t ${TAG} docker push $TAG

View raw YAML

name: publish-dev-docker

on:
  workflow_dispatch:
    inputs:
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'

jobs:
  publish_dev_docker_image:
    runs-on: ubuntu-latest
    environment: 'prod'
    env:
      TAG: "openmmlab/lmdeploy:dev-cu12.8"
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.inputs.repo_ref }}

      - name: Free disk space
        uses: jlumbroso/free-disk-space@v1.3.1
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false

      - name: Get docker info
        run: |
          docker info
          # remove http extraheader
          git config --local --unset "http.https://github.com/.extraheader"

      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Build and push Docker image
        run: |
          echo $TAG
          docker build . -f docker/Dockerfile_dev -t ${TAG}
          docker push $TAG

evaluate matrix .github/workflows/evaluate.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest, self-hosted, linux-a100

Jobs

linux-build, evaluate

Matrix

evaluate_type, pyver→ base, py310

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
export TIME_STAMP="$(date +'%Y%m%d-%H%M%S')" echo "TIME_STAMP=$TIME_STAMP" >> $GITHUB_ENV
cp -r /root/models/offline_pkg/lmdeploy/. .
python3 -m pip install -r /root/models/offline_pkg/requirements.txt
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps python3 -m pip install -r requirements/test.txt
python3 -m pip install /root/models/offline_pkg/py310/lmdeploy-*.whl --no-deps python3 -m pip install -r requirements/test.txt
git clone https://github.com/${{ github.event.inputs.oc_repo_org}}.git cd opencompass git checkout ${{ github.event.inputs.oc_repo_ref}} python3 -m pip install . echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
python3 -m pip list lmdeploy check_env

View raw YAML

name: evaluate

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM/lmdeploy'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      base_models:
        required: true
        description: 'Tested TurboMind models list. eg. [turbomind_qwen2_5_1_5b, turbomind_qwen2_5_7b, turbomind_qwen2_5_32b, turbomind_glm_4_9b, turbomind_llama_3_1_8b, turbomind_llama_3_70b, turbomind_qwen3_0_6b_base, turbomind_qwen3_8b_base, turbomind_qwen3_30b_A3B_base, pytorch_qwen2_5_1_5b, pytorch_qwen2_5_7b, pytorch_qwen2_5_32b, pytorch_gemma_2_9b, pytorch_llama_3_70b, pytorch_llama_3_1_8b, pytorch_qwen3_0_6b_base, pytorch_qwen3_8b_base, pytorch_qwen3_30b_A3B_base]'
        type: string
        default: '[turbomind_qwen2_5_1_5b, turbomind_qwen2_5_7b, turbomind_qwen2_5_32b, turbomind_glm_4_9b, turbomind_llama_3_1_8b, turbomind_llama_3_70b, turbomind_qwen3_0_6b_base, turbomind_qwen3_8b_base, turbomind_qwen3_30b_A3B_base, pytorch_qwen2_5_1_5b, pytorch_qwen2_5_7b, pytorch_qwen2_5_32b, pytorch_gemma_2_9b, pytorch_llama_3_70b, pytorch_llama_3_1_8b, pytorch_qwen3_0_6b_base, pytorch_qwen3_8b_base, pytorch_qwen3_30b_A3B_base]'
      baes_datasets:
        required: true
        description: 'Tested datasets list. eg. [*mmlu_datasets, *gsm8k_datasets]'
        type: string
        default: '[*mmlu_datasets, *gsm8k_datasets, *gpqa_datasets, *winogrande_datasets]'
      oc_repo_org:
        required: false
        description: 'Tested repository organization name. Default is open-compass/opencompass'
        type: string
        default: 'open-compass/opencompass'
      oc_repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false

env:
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache

jobs:
  linux-build:
    if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
      OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}

  evaluate:
    needs: linux-build
    if: ${{github.event_name == 'schedule' || !cancelled()}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 4320 # 72hours
    strategy:
      fail-fast: false
      matrix:
        evaluate_type: ['base']
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/github-actions/resources:/root/resources
        - /nvme/qa_test_models/evaluation_report:/root/evaluation_report
        - /nvme/qa_test_models:/root/models
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Setup systems
        run: |
          export TIME_STAMP="$(date +'%Y%m%d-%H%M%S')"
          echo "TIME_STAMP=$TIME_STAMP" >> $GITHUB_ENV
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: cp -r /root/models/offline_pkg/lmdeploy/. .
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r /root/models/offline_pkg/requirements.txt
      - name: Install lmdeploy
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Install lmdeploy - offline
        if: ${{inputs.offline_mode}}
        run: |
          python3 -m pip install /root/models/offline_pkg/py310/lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Install opencompass
        run: |
          git clone https://github.com/${{ github.event.inputs.oc_repo_org}}.git
          cd opencompass
          git checkout ${{ github.event.inputs.oc_repo_ref}}
          python3 -m pip install .
          echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
      - name: Setup paths for evaluation
        run: |
          ln -s /root/opencompass-data ./data
          python3 .github/scripts/action_tools.py create_model_links /root/models .
      - name: Evaluate base models
        if: matrix.evaluate_type == 'base'
        run: |
          echo ${{github.event.inputs.base_models}}
          echo ${{github.event.inputs.baes_datasets}}
          export LMDEPLOY_DIR=$(pwd)
          python3 .github/scripts/action_tools.py evaluate "${{github.event.inputs.base_models}}" "${{github.event.inputs.baes_datasets}}" /root/evaluation_report/${{ github.run_id }} base
      - name: Clear workspace
        if: always()
        run: |
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

lint .github/workflows/lint.yml

Triggers

push, pull_request

Runs on

ubuntu-latest

Jobs

lint

Actions

gaurav-nelson/github-action-markdown-link-check

Commands

python -m pip install pre-commit pre-commit install
pre-commit run --all-files
python -m pip install fire python .github/scripts/check_lmdeploy.py check_module_init lmdeploy
python .github/scripts/doc_link_checker.py --target README_zh-CN.md python .github/scripts/doc_link_checker.py --target README.md
python -m pip install interrogate interrogate -v --exclude ./lmdeploy/pytorch_poc/modeling/ --ignore-init-method --ignore-magic --ignore-module --ignore-private --ignore-nested-functions --ignore-nested-classes --fail-under 70 lmdeploy
python -m pip install pylint pylint lmdeploy

View raw YAML

name: lint

on: [push, pull_request]

jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python 3.10
        uses: actions/setup-python@v4
        with:
          python-version: '3.10'
      - name: Install pre-commit hook
        run: |
          python -m pip install pre-commit
          pre-commit install
      - name: Linting
        run: pre-commit run --all-files
      - name: Check markdown link
        uses: gaurav-nelson/github-action-markdown-link-check@v1
        with:
          use-quiet-mode: 'yes'
          use-verbose-mode: 'yes'
#          check-modified-files-only: 'yes'
          config-file: '.github/md-link-config.json'
          file-path: './README.md, ./LICENSE, ./README_zh-CN.md'
      - name: Check module init files
        run: |
          python -m pip install fire
          python .github/scripts/check_lmdeploy.py check_module_init lmdeploy
      - name: Check doc link
        run: |
          python .github/scripts/doc_link_checker.py --target README_zh-CN.md
          python .github/scripts/doc_link_checker.py --target README.md
      - name: Check docstring coverage
        run: |
          python -m pip install interrogate
          interrogate -v --exclude ./lmdeploy/pytorch_poc/modeling/ --ignore-init-method --ignore-magic --ignore-module --ignore-private --ignore-nested-functions --ignore-nested-classes --fail-under 70 lmdeploy
      - name: Check pylint score
        run: |
          python -m pip install pylint
          pylint lmdeploy

linux_x64_gpu matrix perms .github/workflows/linux_x64_gpu.yml

Triggers

push, pull_request

Runs on

ubuntu-latest

Jobs

build

Matrix

cudaver→ 12.4, 12.8

Actions

jlumbroso/free-disk-space

Commands

docker run --rm \ -v ${{ github.workspace }}:/work \ -w /work \ openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }} \ bash -c " git config --global --add safe.directory /work && \ source /opt/conda/bin/activate && \ conda activate py310 && \ pip install build && \ python -m build --wheel "

View raw YAML

name: linux-x64-gpu
on:
  push:
    paths:
      - '.github/workflows/linux_x64_gpu.yml'
      - 'src/**'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'examples/**'
      - '3rdparty/**'
      - 'tests/csrc/**'
  pull_request:
    paths:
      - '.github/workflows/linux_x64_gpu.yml'
      - 'src/**'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'examples/**'
      - '3rdparty/**'
      - 'tests/csrc/**'
concurrency:
  group: linux-x64-gpu-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  build:
    strategy:
      fail-fast: false
      matrix:
        cudaver: [12.4, 12.8]
    name: cuda-${{ matrix.cudaver }}
    runs-on: ubuntu-latest
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Build
        run: |
          docker run --rm \
            -v ${{ github.workspace }}:/work \
            -w /work \
            openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }} \
            bash -c "
              git config --global --add safe.directory /work && \
              source /opt/conda/bin/activate && \
              conda activate py310 && \
              pip install build && \
              python -m build --wheel
            "

mllm_api_eval matrix .github/workflows/mllm_api_eval.yml

Triggers

workflow_dispatch

Runs on

ubuntu-latest, self-hosted, linux-a100, self-hosted, linux-a100

Jobs

linux-build, download_pkgs, test_evaluation

Matrix

backend, gpu_num, pyver, transformers→ , ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}, gpu_num_1, gpu_num_2, gpu_num_4, gpu_num_8, legacy, py310

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
chmod -R 777 ${{env.TEST_CODE_PATH}} mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
cp -r ${{env.TEST_CODE_PATH}}/. . mkdir ${{env.REPORT_DIR}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt

View raw YAML

name: mllm_api_eval

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM/lmdeploy'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      backend:
        required: true
        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch']"
      execution_mode:
        required: false
        description: 'Select execution mode: infer, eval, or both. Default is "both"'
        type: choice
        options:
          - both
          - infer
          - eval
        default: 'both'
      run_id:
        required: false
        description: 'Set custom run ID. If not provided, github.run_id will be used'
        type: string
        default: ''


env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  REPORT_DIR: /nvme/qa_test_models/mllm_evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
  DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
  LMUData: /nvme/qa_test_models/LMUData
  LOCAL_LLM: turbomind_Qwen2.5-32B-Instruct_nccl_tp2_0
  OPENAI_API_KEY: sk-empty
  HF_DATASETS_OFFLINE: 1
  HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets
  HF_HUB_OFFLINE: 1
  HF_EVALUATE_OFFLINE: 1
  RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}

jobs:
  linux-build:
    if: ${{ !cancelled() }}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
      OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}

  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Mark as start
        run: |
          chmod -R 777 ${{env.TEST_CODE_PATH}}
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

  test_evaluation:
    needs: download_pkgs
    if: ${{ !cancelled() }}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 2400
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
        transformers: ["", "legacy"]
    env:
      TEST_ENV: ${{ matrix.transformers }}
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/github-actions/resources:/root/resources
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Install vlmeval
        run: |
          python3 -m pip install pandas datasets scikit-learn pylatexenc math_verify
          apt update && apt install -y libgl1 libglib2.0-0
          cp -r /nvme/qa_test_models/offline_pkg/VLMEvalKit .
          cd VLMEvalKit && pip install .
      - name: Downgrade transformers
        if: ${{matrix.transformers == 'legacy'}}
        run: |
          pip install transformers==4.57.6
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Setup paths for evaluation
        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
        run: |
          unset HTTP_PROXY;unset HTTPS_PROXY;unset http_proxy;unset https_proxy;
          cd VLMEvalKit && cp -r ../autotest .
          execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"
          ulimit -n 65535
          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then
            pytest autotest/evaluate/test_mllm_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
          fi
          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then
            pytest autotest/evaluate/test_mllm_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
          fi
          exit $overall_exit
      - name: Clear workspace
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          chmod -R 777 ${{env.REPORT_DIR}}
          export workdir=$(pwd)
          rm -rf $workdir/*

pr_ete_test .github/workflows/pr_ete_test.yml

Triggers

pull_request, workflow_dispatch

Runs on

self-hosted, linux-a100-pr

Jobs

pr_functions_test

Commands

python3 -m pip install -r requirements/lite.txt python3 -m pip install -r requirements/test.txt python3 -m pip install -e .
python3 -m pip list lmdeploy check_env mkdir ${{env.REPORT_DIR}} -p mkdir ${{env.SERVER_LOG}} -p echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
pytest autotest -m 'pr_test and gpu_num_2' -x --alluredir=${{env.REPORT_DIR}} --clean-alluredir pytest autotest -m 'pr_test and gpu_num_1' -n 2 -x --alluredir=${{env.REPORT_DIR}}
pip install transformers==4.57.3
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-32B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log 2>&1 & echo "restful_pid=$!" for i in $(seq 1 180) do sleep 5 echo "health check try $i" if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 exit 0 fi done echo "health check fail" curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 cat ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log exit 1
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 & echo "restful_pid=$!" for i in $(seq 1 180) do sleep 5 echo "health check try $i" if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 exit 0 fi done echo "health check fail" curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 cat ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log exit 1
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client> ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log 2>&1 & echo "restful_pid=$!" for i in $(seq 1 180) do sleep 5 echo "health check try $i" if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 exit 0 fi done echo "health check fail" curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 cat ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log exit 1
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log 2>&1 & echo "restful_pid=$!" for i in $(seq 1 180) do sleep 5 echo "health check try $i" if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}} curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 exit 0 fi done echo "health check fail" curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 cat ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log exit 1

View raw YAML

name: pr_ete_test

on:
  pull_request:
    paths:
      - ".github/workflows/pr_ete_test.yml"
      - "cmake/**"
      - "src/**"
      - "autotest/**"
      - "3rdparty/**"
      - "lmdeploy/**"
      - "requirements/**"
      - "requirements_cuda.txt"
      - "CMakeLists.txt"
      - "setup.py"
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true


env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA


jobs:
  pr_functions_test:
    runs-on: [self-hosted, linux-a100-pr]
    timeout-minutes: 120
    env:
      REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.head_ref }}_${{ github.run_id }}
      SERVER_LOG: /nvme/qa_test_models/server_log/${{ github.head_ref }}_${{ github.run_id }}
    container:
      image: openmmlab/lmdeploy:dev-cu12.8
      options: --gpus all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never
      volumes:
        - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
        - /nvme/share_data/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
      - name: Install lmdeploy
        run: |
          python3 -m pip install -r requirements/lite.txt
          python3 -m pip install -r requirements/test.txt
          python3 -m pip install -e .
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          mkdir ${{env.REPORT_DIR}} -p
          mkdir ${{env.SERVER_LOG}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Test lmdeploy - func
        run: |
          pytest autotest -m 'pr_test and gpu_num_2' -x --alluredir=${{env.REPORT_DIR}} --clean-alluredir
          pytest autotest -m 'pr_test and gpu_num_1' -n 2 -x --alluredir=${{env.REPORT_DIR}}
      - name: Update transformers
        run: |
          pip install transformers==4.57.3
      - name: Test restful server - turbomind Qwen3-32B
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-32B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 180)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
              pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
              curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          cat ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log
          exit 1
      - name: Test restful server - turbomind InternVL3-38B
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 180)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
              pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
              curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          cat ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log
          exit 1
      - name: Test restful server - turbomind Qwen3-30B-A3B
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend turbomind --logprobs-mode raw_logprobs  --allow-terminate-by-client> ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 180)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
              pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
              curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          cat ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log
          exit 1
      - name: Test restful server - pytorch Qwen3-30B-A3B
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 180)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
              pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}}
              curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          cat ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log
          exit 1
      - name: Test restful server - pytorch Qwen3-VL-30B-A3B-Instruct
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-VL-30B-A3B-Instruct --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 180)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
              pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}}
              curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          cat ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log
          exit 1
      - name: Test restful server - pytorch InternVL3_5-30B-A3B
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs  --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 &
          echo "restful_pid=$!"
          for i in $(seq 1 180)
          do
            sleep 5
            echo "health check try $i"
            if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
              pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
              pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}}
              curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
              exit 0
            fi
          done

          echo "health check fail"
          curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
          cat ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log
          exit 1
      - name: Clear workfile
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

pypi matrix .github/workflows/pypi.yml

Triggers

push, workflow_dispatch

Runs on

ubuntu-latest, windows-latest, ubuntu-latest

Jobs

linux-build, windows-build, publish

Matrix

pyver→ 3.10, 3.11, 3.12, 3.13, py310, py311, py312, py313

Actions

jlumbroso/free-disk-space

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
git config --global core.longpaths true
pip install build change-wheel-version
./builder/windows/setup_cuda.ps1
python -m build --wheel -o build/wheel Get-ChildItem -Path "build" -Filter "*.whl" | ForEach-Object { change_wheel_version $_.FullName --local-version cu121 --delete-old-wheel }
ls artifact/ -lh
pip install twine twine upload artifact/* -u __token__ -p ${{ secrets.pypi_password }}

View raw YAML

name: publish to pypi

on:
  push:
    branches:
      - main
    paths:
      - "lmdeploy/version.py"
  workflow_dispatch:


jobs:
  linux-build:
    strategy:
      matrix:
        pyver: [py310, py311, py312, py313]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.4
      OUTPUT_FOLDER: cuda12_dist
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}/*
          retention-days: 1
          name: linux-${{ matrix.pyver }}

  windows-build:
    strategy:
      matrix:
        pyver: ['3.10', '3.11', '3.12', '3.13']
    runs-on: windows-latest
    steps:
      - name: Set git for windows
        run: |
          git config --global core.longpaths true
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Set up python
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.pyver }}
      - name: Install python packages
        run: |
          pip install build change-wheel-version
      - name: Setup CUDA Toolkit
        id: cuda-toolkit
        shell: pwsh
        run: ./builder/windows/setup_cuda.ps1
        env:
            INPUT_CUDA_VERSION: '12.6.2'
      - name: Build wheel
        run: |
          python -m build --wheel -o build/wheel
          Get-ChildItem -Path "build" -Filter "*.whl" | ForEach-Object { change_wheel_version $_.FullName --local-version cu121 --delete-old-wheel }
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: build/wheel/*
          retention-days: 1
          name: windows-${{ matrix.pyver }}

  publish:
    runs-on: ubuntu-latest
    environment: 'prod'
    needs:
      - linux-build
      - windows-build
    steps:
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: artifact
          merge-multiple: true
      - name: Display artifacts
        run: ls artifact/ -lh
      - name: Set up python 3.10
        uses: actions/setup-python@v4
        with:
          python-version: '3.10'
      - name: Upload to pypi
        run: |
          pip install twine
          twine upload artifact/* -u __token__ -p ${{ secrets.pypi_password }}

stable matrix .github/workflows/stable.yml

Triggers

workflow_dispatch, schedule

Runs on

ubuntu-latest, self-hosted, lmdeploy-stable

Jobs

linux-build, benchmark

Matrix

model, pyver→ internlm/internlm2_5-20b-chat, py310

Commands

echo ${PYTHON_VERSION} echo ${PLAT_NAME} echo ${DOCKER_TAG} echo ${OUTPUT_FOLDER} echo ${GITHUB_RUN_ID} # remove -it sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. .
# manually install flash attn # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
python3 -m pip install lmdeploy-*.whl --no-deps python3 -m pip install -r requirements/test.txt
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps python3 -m pip install -r requirements/test.txt
git clone --depth=1 https://github.com/open-compass/opencompass.git cd opencompass python3 -m pip install -e . cd ..
python3 -m pip list lmdeploy check_env
mkdir ${{env.REPORT_DIR}} -p CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model}} --tp 2 --max-batch-size 256 --cache-max-entry-count 0.9 --server-port 23344 > ${{env.REPORT_DIR}}/restful.log 2>&1 & echo "restful_pid=$!" >> "$GITHUB_ENV" sleep 120s

View raw YAML

name: stable_test

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false
  schedule:
    - cron:  '00 8 * * 1'

env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
  REPORT_DIR: /nvme/qa_test_models/stable_reports/${{ github.run_id }}
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  COMPASS_DATA_CACHE: /nvme/qa_test_models/dataset

jobs:
  linux-build:
    if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda11.8
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


  benchmark:
    needs: linux-build
    if: ${{github.event_name == 'schedule' || !cancelled()}}
    runs-on: [self-hosted, lmdeploy-stable]
    timeout-minutes: 10080
    strategy:
      fail-fast: false
      matrix:
        model: ['internlm/internlm2_5-20b-chat']
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 -e NO_PROXY=localhost,127.0.0.1 -e no_proxy=localhost,127.0.0.1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/187:/mnt/187
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v3
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. .
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Install lmdeploy - dependency
        run: |
          # manually install flash attn
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
          python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
      - name: Install lmdeploy
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Install lmdeploy - offline
        if: ${{inputs.offline_mode}}
        run: |
          python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Install opencompass
        run: |
          git clone --depth=1 https://github.com/open-compass/opencompass.git
          cd opencompass
          python3 -m pip install -e .
          cd ..
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
      - name: Start restful api turbomind
        run: |
          mkdir ${{env.REPORT_DIR}} -p
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model}} --tp 2 --max-batch-size 256 --cache-max-entry-count 0.9 --server-port 23344 > ${{env.REPORT_DIR}}/restful.log 2>&1  &
          echo "restful_pid=$!" >> "$GITHUB_ENV"
          sleep 120s
      - name: Run OC result
        continue-on-error: true
        run: |
          ln -s /nvme/qa_test_models/dataset/data .
          opencompass .github/scripts/eval_stable_object_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-object-1
          opencompass .github/scripts/eval_stable_subject_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-subject-1
          opencompass .github/scripts/eval_stable_object_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-object-2
          opencompass .github/scripts/eval_stable_subject_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-subject-2
          opencompass .github/scripts/eval_stable_object_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-object-3
          opencompass .github/scripts/eval_stable_subject_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-subject-3
      - name: Test lmdeploy - restful api
        run: |
          python3 benchmark/profile_restful_api.py --backend lmdeploy --base-url http://localhost:23344 --dataset-path /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10000 --output-file ${{env.REPORT_DIR}}/stable.jsonl > ${{env.REPORT_DIR}}/stable.log
          python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-1.csv > ${{env.REPORT_DIR}}/stable-internal-1.log
          python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-2.csv > ${{env.REPORT_DIR}}/stable-internal-2.log
          python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-3.csv > ${{env.REPORT_DIR}}/stable-internal-3.log
          python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-2.csv > ${{env.REPORT_DIR}}/stable-internal-4.log
          python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-3.csv > ${{env.REPORT_DIR}}/stable-internal-5.log
      - name: Attach result
        if: always()
        run: |
          python3 .github/scripts/action_tools.py generate_csv_from_profile_result ${{env.REPORT_DIR}}/stable.jsonl ${{env.REPORT_DIR}}/stable.csv
          python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable.csv
          python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-1.csv
          python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-2.csv
          python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-3.csv
          python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-4.csv
          python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-5.csv
      - name: Kill api server
        if: always()
        run: |
          kill -15 "$restful_pid"
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

stale perms .github/workflows/stale.yml

Triggers: schedule
Runs on: ubuntu-latest
Jobs: stale
Actions: actions/stale

View raw YAML

name: 'Close stale issues and PRs'

on:
  schedule:
    # check issue and pull request once at 01:30 a.m. every day
    - cron: '30 1 * * *'

permissions:
  contents: read

jobs:
  stale:
    permissions:
      issues: write
      pull-requests: write
    runs-on: ubuntu-latest
    steps:
      - uses: actions/stale@v7
        with:
          stale-issue-message: 'This issue is marked as stale because it has been marked as invalid or awaiting response for 7 days without any further response. It will be closed in 5 days if the stale label is not removed or if there is no further response.'
          stale-pr-message: 'This PR is marked as stale because there has been no activity in the past 45 days. It will be closed in 10 days if the stale label is not removed or if there is no further updates.'
          close-issue-message: 'This issue is closed because it has been stale for 5 days. Please open a new issue if you have similar issues or you have any new updates now.'
          close-pr-message: 'This PR is closed because it has been stale for 10 days. Please reopen this PR if you have any updates and want to keep contributing the code.'
          # only issues/PRS with following labels are checked
          any-of-labels: 'invalid, awaiting response, duplicate'
          days-before-issue-stale: 7
          days-before-pr-stale: 45
          days-before-issue-close: 5
          days-before-pr-close: 10
          # automatically remove the stale label when the issues or the pull requests are updated or commented
          remove-stale-when-updated: true
          operations-per-run: 50

test_docker matrix .github/workflows/test_docker.yml

Triggers

push, pull_request

Runs on

ubuntu-latest, ubuntu-22.04-arm, ubuntu-22.04-arm

Jobs

test_docker_image, test_ascend_docker_image, test_jetson_docker_image

Matrix

cuda_version, python_version→ 3.10, 3.11, 3.12, 3.13, cu12, cu13

Actions

jlumbroso/free-disk-space, MaxymVlasov/dive-action, jlumbroso/free-disk-space, docker/setup-buildx-action, MaxymVlasov/dive-action, jlumbroso/free-disk-space, docker/setup-buildx-action, MaxymVlasov/dive-action

Commands

docker info # remove http extraheader git config --local --unset "http.https://github.com/.extraheader"
docker build . -t lmdeploy:latest -f docker/Dockerfile --build-arg CUDA_VERSION=${CUDA_VERSION} --build-arg PYTHON_VERSION=${PYTHON_VERSION}
docker images docker run --rm lmdeploy:latest lmdeploy check_env
docker info # remove http extraheader git config --local --unset "http.https://github.com/.extraheader"
docker build . -t lmdeploy:ascend -f docker/Dockerfile_ascend_a3
docker info # remove http extraheader git config --local --unset "http.https://github.com/.extraheader"
docker build . -t lmdeploy:jetson -f docker/Dockerfile.jetson
docker images docker run --rm lmdeploy:jetson lmdeploy check_env

View raw YAML

name: test-docker

on:
  push:
    paths:
      - 'docker/**'
      - '.github/workflows/*docker.yml'
  pull_request:
    paths:
      - 'docker/**'
      - '.github/workflows/*docker.yml'

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  test_docker_image:
    permissions:
      pull-requests: write
    runs-on: ubuntu-latest
    strategy:
      matrix:
        cuda_version: [cu13, cu12]
        python_version: ['3.10', '3.11', '3.12', '3.13']
    env:
      CUDA_VERSION: ${{ matrix.cuda_version }}
      PYTHON_VERSION: ${{ matrix.python_version }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          ref: ${{github.event.inputs.repo_ref}}
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Get docker info
        run: |
          docker info
          # remove http extraheader
          git config --local --unset "http.https://github.com/.extraheader"
      - name: Build Docker image
        run: |
          docker build . -t lmdeploy:latest -f docker/Dockerfile --build-arg CUDA_VERSION=${CUDA_VERSION} --build-arg PYTHON_VERSION=${PYTHON_VERSION}
      - name: Test image with lmdeploy check_env
        run: |
          docker images
          docker run --rm lmdeploy:latest lmdeploy check_env
      - name: Dive
        if: ${{ matrix.cuda_version == 'cu12' }}
        uses: MaxymVlasov/dive-action@v1.5.0
        with:
          image: lmdeploy:latest
          github-token: ${{ secrets.GITHUB_TOKEN }}

  test_ascend_docker_image:
    permissions:
      pull-requests: write
    runs-on: ubuntu-22.04-arm
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          ref: ${{github.event.inputs.repo_ref}}
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Get docker info
        run: |
          docker info
          # remove http extraheader
          git config --local --unset "http.https://github.com/.extraheader"
      - name: Build Docker image
        run: |
          docker build . -t lmdeploy:ascend -f docker/Dockerfile_ascend_a3
#      - name: Test image with lmdeploy check_env
#        run: |
#          docker images
#          docker run --rm lmdeploy:ascend lmdeploy check_env
      - name: Dive
        uses: MaxymVlasov/dive-action@v1.5.0
        with:
          image: lmdeploy:ascend
          github-token: ${{ secrets.GITHUB_TOKEN }}

  test_jetson_docker_image:
    permissions:
      pull-requests: write
    runs-on: ubuntu-22.04-arm
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          ref: ${{github.event.inputs.repo_ref}}
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Get docker info
        run: |
          docker info
          # remove http extraheader
          git config --local --unset "http.https://github.com/.extraheader"
      - name: Build Docker image
        run: |
          docker build . -t lmdeploy:jetson -f docker/Dockerfile.jetson
      - name: Test image with lmdeploy check_env
        run: |
          docker images
          docker run --rm lmdeploy:jetson lmdeploy check_env
      - name: Dive
        uses: MaxymVlasov/dive-action@v1.5.0
        with:
          image: lmdeploy:jetson
          github-token: ${{ secrets.GITHUB_TOKEN }}

unit_test .github/workflows/unit_test.yml

Triggers

pull_request, push

Runs on

self-hosted, linux-a100-s2

Jobs

unit_test

Commands

python3 -m pip install -r requirements/test.txt python3 -m pip install -e .
python3 -m pip list lmdeploy check_env
coverage run --branch --source lmdeploy -m pytest -rsE tests coverage xml coverage report -m
export workdir=$(pwd) cd .. rm -rf $workdir mkdir $workdir chmod -R 777 $workdir

View raw YAML

name: unit-test

on:
  pull_request:
    paths:
      - ".github/workflows/unit_test.yml"
      - "cmake/**"
      - "src/**"
      - "tests/**"
      - "3rdparty/**"
      - "lmdeploy/**"
      - "requirements/**"
      - "requirements_cuda.txt"
      - "CMakeLists.txt"
      - "setup.py"
  push:
    branches:
      - main
    paths:
      - ".github/workflows/unit_test.yml"
      - "cmake/**"
      - "src/**"
      - "tests/**"
      - "3rdparty/**"
      - "lmdeploy/**"
      - "requirements/**"
      - "requirements_cuda.txt"
      - "CMakeLists.txt"
      - "setup.py"
    tags:
      - "v*.*.*"

jobs:
  unit_test:
    runs-on: [self-hosted, linux-a100-s2]
    timeout-minutes: 4320 # 72hours
    container:
      image: openmmlab/lmdeploy:dev-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 -e HF_HOME=/root/.cache/huggingface --pull never"
      volumes:
        - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
        - /nvme/share_data/github-actions/hf_home:/root/.cache/huggingface
        - /nvme/share_data/github-actions/packages:/root/packages
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v5
      - name: Install lmdeploy
        run: |
          python3 -m pip install -r requirements/test.txt
          python3 -m pip install -e .
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
      - name: Test lmdeploy python UT
        run: |
          coverage run --branch --source lmdeploy -m pytest -rsE tests
          coverage xml
          coverage report -m
      - name: Clear workfile
        if: always()
        run: |
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

windows_x64_gpu matrix perms .github/workflows/windows_x64_gpu.yml

Triggers

push, pull_request

Runs on

windows-latest

Jobs

build

Matrix

cudaver→ 12.6.2, 12.8.1

Commands

git config --global core.longpaths true
pip install build
./builder/windows/setup_cuda.ps1
python -m build --wheel

View raw YAML

name: windows-x64-gpu
on:
  push:
    paths:
      - '.github/workflows/windows_x64_gpu.yml'
      - 'src/**'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'examples/**'
      - '3rdparty/**'
      - 'tests/csrc/**'
  pull_request:
    paths:
      - '.github/workflows/windows_x64_gpu.yml'
      - 'src/**'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'examples/**'
      - '3rdparty/**'
      - 'tests/csrc/**'
concurrency:
  group: windows-x64-gpu-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  build:
    strategy:
      fail-fast: false
      matrix:
        cudaver: [12.6.2, 12.8.1]
    name: cuda-${{ matrix.cudaver }}
    runs-on: windows-latest
    steps:
      - name: Set git for windows
        run: |
          git config --global core.longpaths true
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Set up python
        uses: actions/setup-python@v4
        with:
          python-version: '3.10'
      - name: Install python packages
        run: |
          pip install build
      - name: Setup CUDA Toolkit
        id: cuda-toolkit
        shell: pwsh
        run: ./builder/windows/setup_cuda.ps1
        env:
            INPUT_CUDA_VERSION: ${{ matrix.cudaver }}
      - name: Build wheel
        run: |
          python -m build --wheel