InternLM/lmdeploy
20 workflows · maturity 50% · 8 patterns · GitHub ↗
Practices
✓ Matrix✓ Permissions○ Security scan○ AI review○ Cache✓ Concurrency○ Reusable workflows
Detected patterns
Security dimensions
Workflows (20)
api_eval matrix .github/workflows/api_eval.yml
View raw YAML
name: api_eval
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
execution_mode:
required: false
description: 'Select execution mode: infer, eval, or both. Default is "both"'
type: choice
options:
- both
- infer
- eval
default: 'both'
run_id:
required: false
description: 'Set custom run ID. If not provided, github.run_id will be used'
type: string
default: ''
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache
HF_DATASETS_OFFLINE: 1
HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets
HF_HUB_OFFLINE: 1
HF_EVALUATE_OFFLINE: 1
RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}
jobs:
linux-build:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.8
OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Copy Artifacts - offline
if: ${{inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
test_evaluation:
needs: download_pkgs
if: ${{ !cancelled() }}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 7200
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
transformers: ["", "legacy"]
env:
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install opencompass
run: |
git clone https://github.com/open-compass/opencompass.git --depth 1
cd opencompass
python3 -m pip install .
python3 -m pip install langdetect
- name: Downgrade transformers
if: ${{matrix.transformers == 'legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Setup paths for evaluation
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
run: |
overall_exit=0
ln -s /mnt/104/opencompass-data/data ./data
ln -s /nvme/qa_test_models/resource/nltk_data /usr/share/nltk_data
execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"
ulimit -n 65535
if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then
pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
fi
if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then
pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
fi
exit $overall_exit
- name: Clear workspace
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.REPORT_DIR}}
export workdir=$(pwd)
rm -rf $workdir/*
benchmark matrix .github/workflows/benchmark.yml
View raw YAML
name: benchmark_test
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
benchmark_type:
required: true
description: 'Set benchmark type. Default is "["longtext", "throughput", "api_server", "prefixcache"]"'
type: string
default: "['apiserver', 'mllm_apiserver', 'throughput', 'longtext', 'prefixcache']"
backend:
required: true
description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
REPORT_DIR: /nvme/qa_test_models/benchmark_report/${{ inputs.repo_ref }}_${{ github.run_id }}
ALLURE_REPORT_DIR: /nvme/qa_test_models/benchmark_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}
jobs:
linux-build:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.8
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Copy Artifacts - offline
if: ${{inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
benchmark:
needs: download_pkgs
if: ${{github.event_name == 'schedule' || !cancelled()}}
runs-on: [self-hosted, linux-a100]
strategy:
fail-fast: false
matrix:
benchmark_type: ${{fromJSON(github.event.inputs.benchmark_type)}}
gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
transformers: ["", "legacy"]
include:
- n: 8
gpu_num: gpu_num_1
- n: 4
gpu_num: gpu_num_2
- n: 2
gpu_num: gpu_num_4
- n: 1
gpu_num: gpu_num_8
env:
TEST_ENV: ${{ matrix.transformers }}
timeout-minutes: 480
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Downgrade transformers
if: ${{matrix.transformers == 'legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Run other benchmark - all
if: contains(fromJson(github.event.inputs.backend), 'turbomind') && contains(fromJson(github.event.inputs.backend), 'pytorch')
run: |
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function' --alluredir=${{env.ALLURE_REPORT_DIR}}
- name: Run other benchmark - turbomind
if: contains(fromJson(github.event.inputs.backend), 'turbomind') && !contains(fromJson(github.event.inputs.backend), 'pytorch')
run: |
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and turbomind' --alluredir=${{env.ALLURE_REPORT_DIR}}
- name: Run other benchmark - pytorch
if: contains(fromJson(github.event.inputs.backend), 'pytorch') && !contains(fromJson(github.event.inputs.backend), 'turbomind')
run: |
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and pytorch' --alluredir=${{env.ALLURE_REPORT_DIR}}
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
cuda12.8_whl_release matrix perms .github/workflows/cuda12.8_whl_release.yml
View raw YAML
name: cuda12.8-whl-release
on:
push:
tags:
- '*'
workflow_dispatch:
permissions:
contents: write
jobs:
linux-build:
strategy:
matrix:
pyver: [py310, py311, py312, py313]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.8
OUTPUT_FOLDER: cuda12.8_dist
CUDA_VER: 12.8
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}/*
retention-days: 1
name: linux-${{ matrix.pyver }}
windows-build:
strategy:
matrix:
pyver: ['3.10', '3.11', '3.12', '3.13']
runs-on: windows-latest
steps:
- name: Set git for windows
run: |
git config --global core.longpaths true
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}
- name: Install python packages
run: |
pip install build change-wheel-version
- name: Setup CUDA Toolkit
id: cuda-toolkit
shell: pwsh
run: ./builder/windows/setup_cuda.ps1
env:
INPUT_CUDA_VERSION: '12.8.1'
- name: Build wheel
run: |
python -m build --wheel -o build/wheel
Get-ChildItem -Path "build" -Filter "*.whl" | ForEach-Object { change_wheel_version $_.FullName --local-version cu128 --delete-old-wheel }
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: build/wheel/*
retention-days: 1
name: windows-${{ matrix.pyver }}
publish:
runs-on: ubuntu-latest
environment: 'prod'
needs:
- linux-build
- windows-build
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: artifact
merge-multiple: true
- name: Add cuda version to package name
run: |
ver=$(cat lmdeploy/version.py | grep '__version__ =' | cut -d\' -f2)
cuver=$ver+cu128
ls -lh
cd artifact
for file in *; do
mv "$file" "`echo $file | sed "s/$ver/$cuver/g"`";
done
- name: Display artifacts
run: ls artifact/ -lh
- name: Publish
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: artifact/*
daily_ete_test matrix .github/workflows/daily_ete_test.yml
View raw YAML
name: daily_ete_test
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
model:
required: true
description: 'Set testcase module filter: llm, mllm. Default contains all models'
type: string
default: "['llm','mllm']"
function:
required: true
description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
type: string
default: '["pipeline", "restful", "chat"]'
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
regression_func:
required: true
description: 'regression functions'
type: string
default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']"
schedule:
- cron: '00 14 * * 0-4'
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
ROOT_DIR: /nvme/qa_test_models
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
RUN_ID: ${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
jobs:
linux-build:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.8
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Copy Artifacts - offline
if: ${{inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
test_quantization:
needs: download_pkgs
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 150
strategy:
matrix:
transformers: ["", "legacy"]
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install auto_gptq matplotlib attrdict
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
pip install ${{env.DEEPSEEK_VL}} --no-deps
rm -rf ${{env.DEEPSEEK_VL}}/build
- name: Check env
run: |
pip install transformers==4.57.6
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
run: |
pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - quantization w8a8
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
run: |
pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_tools:
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
runs-on: [self-hosted, linux-a100]
needs: test_quantization
timeout-minutes: 300
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
transformers: ["", "legacy"]
function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
exclude:
- backend: turbomind
model: mllm
function: chat
- backend: pytorch
model: mllm
function: chat
include:
- backend: turbomind
model: llm
function: other
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
pip install ${{env.DEEPSEEK_VL}} --no-deps
rm -rf ${{env.DEEPSEEK_VL}}/build
- name: Downgrade transformers
if: ${{matrix.transformers == 'legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
cp -r /nvme/qa_test_models/offline_pkg/lora .
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat
continue-on-error: true
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - pipeline
continue-on-error: true
if: matrix.function == 'pipeline'
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - restful
continue-on-error: true
if: matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_8 and not pr_test' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - local testcase
if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'other'
run: |
pytest autotest/toolchain --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_restful:
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
runs-on: [self-hosted, linux-a100]
needs: test_quantization
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model_path: ['Qwen/Qwen3-8B-Base', 'Qwen/Qwen3-30B-A3B', 'Qwen/Qwen3-32B', 'OpenGVLab/InternVL3_5-30B-A3B', 'OpenGVLab/InternVL3-38B', 'Qwen/Qwen3-VL-8B-Instruct', 'Qwen/Qwen3-VL-30B-A3B-Instruct']
include:
- tp: 2
model: Qwen3-8B-Base
model_path: Qwen/Qwen3-8B-Base
case_info: ['completions_v1']
generate_type: base
- tp: 2
model: Qwen3-30B-A3B
model_path: Qwen/Qwen3-30B-A3B
case_info: ['chat_completions_v1', 'generate']
generate_type: all
extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
backend: pytorch
- tp: 2
model: Qwen3-30B-A3B
model_path: Qwen/Qwen3-30B-A3B
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
backend: turbomind
- tp: 2
model: InternVL3_5-30B-A3B
model_path: OpenGVLab/InternVL3_5-30B-A3B
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
backend: pytorch
- tp: 2
model: InternVL3_5-30B-A3B
model_path: OpenGVLab/InternVL3_5-30B-A3B
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
backend: turbomind
- tp: 2
model: Qwen3-VL-30B-A3B-Instruct
model_path: Qwen/Qwen3-VL-30B-A3B-Instruct
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
backend: pytorch
- tp: 2
model: Qwen3-VL-30B-A3B-Instruct
model_path: Qwen/Qwen3-VL-30B-A3B-Instruct
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
backend: turbomind
- tp: 2
model: Qwen3-32B
model_path: Qwen/Qwen3-32B
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
- tp: 1
model: Qwen3-VL-8B-Instruct
model_path: Qwen/Qwen3-VL-8B-Instruct
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs --enable-return-routed-experts'
backend: pytorch
- tp: 1
model: Qwen3-VL-8B-Instruct
model_path: Qwen/Qwen3-VL-8B-Instruct
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
backend: turbomind
- tp: 2
model: InternVL3-38B
model_path: OpenGVLab/InternVL3-38B
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api
run: |
lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} --allow-terminate-by-client > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 240)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
echo "health check success"
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 1
- name: Test lmdeploy - chat_completions_v1
if: matrix.model != 'internlm2_5-20b-chat' && matrix.model != 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - chat_completions_v1
if: matrix.model == 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - chat_completions_v1 - internlm2_5-20b-chat
if: matrix.model == 'internlm2_5-20b-chat' && contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - completions_v1 - internlm2_5-20b
if: matrix.model == 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - completions_v1 - other
if: matrix.model != 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - base
if: matrix.generate_type == 'base' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not logprob and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - logprob
if: matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - all
if: matrix.generate_type == 'all' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Kill api server
if: always()
run: |
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_pipeline:
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}}
runs-on: [self-hosted, linux-a100]
needs: test_quantization
timeout-minutes: 240
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
pip install ${{env.DEEPSEEK_VL}} --no-deps
rm -rf ${{env.DEEPSEEK_VL}}/build
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - interface pipeline case
run: |
pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_8 and not pr_test' -n 1 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_benchmark:
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}}
runs-on: [self-hosted, linux-a100]
needs: test_quantization
timeout-minutes: 120
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
pip install ${{env.DEEPSEEK_VL}} --no-deps
rm -rf ${{env.DEEPSEEK_VL}}/build
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test benchmark script
run: |
pytest autotest/benchmark -n 4 -m function --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_restful_legacy:
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
runs-on: [self-hosted, linux-a100]
needs: test_quantization
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model_path: ['internlm/Intern-S1']
include:
- tp: 8
model: Intern-S1
model_path: internlm/Intern-S1
case_info: ['chat_completions_v1', 'generate']
generate_type: base
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
pip install transformers==4.57.6
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api
run: |
lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} --allow-terminate-by-client > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 240)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
echo "health check success"
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 1
- name: Test lmdeploy - chat_completions_v1
if: matrix.model != 'internlm2_5-20b-chat' && matrix.model != 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - chat_completions_v1
if: matrix.model == 'Intern-S1' && contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - chat_completions_v1 - internlm2_5-20b-chat
if: matrix.model == 'internlm2_5-20b-chat' && contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - completions_v1 - internlm2_5-20b
if: matrix.model == 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - completions_v1 - other
if: matrix.model != 'internlm2_5-20b' && contains(matrix.case_info, 'completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - base
if: matrix.generate_type == 'base' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not logprob and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - logprob
if: matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - all
if: matrix.generate_type == 'all' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Kill api server
if: always()
run: |
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_pipeline_legacy:
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}}
runs-on: [self-hosted, linux-a100]
needs: test_quantization
timeout-minutes: 240
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
pip install ${{env.DEEPSEEK_VL}} --no-deps
rm -rf ${{env.DEEPSEEK_VL}}/build
- name: Check env
run: |
pip install transformers==4.57.6
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - interface pipeline case
run: |
pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_8 and not pr_test' -n 1 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
get_coverage_report:
if: ${{!cancelled()}}
runs-on: [self-hosted, linux-a100]
needs: [test_tools, test_restful, test_pipeline, test_benchmark]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
run: |
pip install coverage
coverage combine ${{env.REPORT_DIR}}
coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
coverage report -m
mv .coverage ${{env.REPORT_DIR}}/.coverage
- name: Clear workfile
if: always()
run: |
chmod -R 777 ${{env.ROOT_DIR}}
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
daily_ete_test_3090 matrix .github/workflows/daily_ete_test_3090.yml
View raw YAML
name: daily_ete_test_3090
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
model:
required: true
description: 'Set testcase module filter: llm, mllm. Default contains all models'
type: string
default: "['llm','mllm']"
function:
required: true
description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
type: string
default: '["pipeline", "restful", "chat"]'
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
regression_func:
required: true
description: 'regression functions'
type: string
default: "['quant', 'tools', 'restful']"
schedule:
- cron: '00 14 * * 0-4'
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
RUN_ID: ${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
jobs:
linux-build:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.4
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, 3090-r1]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
- /data1:/data1
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Copy Artifacts - offline
if: ${{inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
test_quantization:
needs: download_pkgs
if: ${{!cancelled() && contains(needs.download_pkgs.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
runs-on: [self-hosted, 3090-r1]
timeout-minutes: 150
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: 3090_legacy
container:
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /data1:/data1
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install auto_gptq matplotlib
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
python3 -m pip list
pip install transformers==4.57.6
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
run: |
pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - quantization w8a8
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
run: |
pytest autotest/tools/quantization/test_quantization_w8a8.py --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_tools:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
runs-on: [self-hosted, 3090-r1]
needs: test_quantization
timeout-minutes: 300
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
transformers: ["3090", "3090_legacy"]
model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
exclude:
- backend: turbomind
model: mllm
function: chat
- backend: pytorch
model: mllm
function: chat
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{matrix.transformers}}
container:
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /data1:/data1
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Downgrade transformers
if: ${{matrix.transformers == '3090_legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat
continue-on-error: true
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Test lmdeploy - pipeline
continue-on-error: true
if: matrix.function == 'pipeline'
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Test lmdeploy - restful
continue-on-error: true
if: matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_restful:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
runs-on: [self-hosted, 3090-r1]
needs: test_quantization
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
transformers: ["3090", "3090_legacy"]
model_path: ['internlm/internlm3-8b-instruct', 'Qwen/Qwen3-8B']
include:
- tp: 1
model: internlm3-8b-instruct
model_path: internlm/internlm3-8b-instruct
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
- tp: 1
model: Qwen3-8B
model_path: Qwen/Qwen3-8B
case_info: ['completions_v1']
generate_type: base
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
env:
TEST_ENV: ${{matrix.transformers}}
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Downgrade transformers
if: ${{matrix.transformers == '3090_legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api
run: |
lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
for i in $(seq 1 180)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
echo "health check success"
exit 0
fi
done
echo "health check fail"
kill -15 $restful_pid 2>/dev/null || true
exit 1
- name: Test lmdeploy - chat_completions_v1
if: contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - completions_v1 - other
if: contains(matrix.case_info, 'completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - logprob
if: matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Kill api server
if: always()
run: |
kill -15 "$restful_pid"
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
get_coverage_report:
if: ${{!cancelled()}}
runs-on: [self-hosted, 3090-r1]
needs: [test_tools, test_restful]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
run: |
pip install coverage
coverage combine ${{env.REPORT_DIR}}
coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
coverage report -m
mv .coverage ${{env.REPORT_DIR}}/.coverage
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
daily_ete_test_5080 matrix .github/workflows/daily_ete_test_5080.yml
View raw YAML
name: daily_ete_test_5080
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
model:
required: true
description: 'Set testcase module filter: llm, mllm. Default contains all models'
type: string
default: "['llm','mllm']"
function:
required: true
description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
type: string
default: '["pipeline", "restful", "chat"]'
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
regression_func:
required: true
description: 'regression functions'
type: string
default: "['quant', 'tools', 'restful']"
schedule:
- cron: '00 14 * * 0-4'
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
RUN_ID: ${{ inputs.repo_ref || 'main' }}_${{ github.run_id }}
jobs:
linux-build:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.8
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, 5080-r1]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/3090:/mnt/3090
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Copy Artifacts - offline
if: ${{inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
test_quantization:
needs: download_pkgs
if: ${{!cancelled() && contains(needs.download_pkgs.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
runs-on: [self-hosted, 5080-r1]
timeout-minutes: 150
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: 5080
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/3090:/mnt/3090
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install auto_gptq matplotlib
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
for i in $(seq 1 10); do
output=$(lmdeploy check_env 2>&1)
if echo "$output" | grep -q "CUDA available: False"; then
echo "CUDA not available (attempt $i/10), retrying in 5 seconds..."
sleep 5
else
echo "CUDA check passed"
break
fi
done
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
run: |
pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - quantization w8a8
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
run: |
pytest autotest/tools/quantization/test_quantization_w8a8.py --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_tools:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
runs-on: [self-hosted, 5080-r1]
needs: test_quantization
timeout-minutes: 300
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
transformers: ["5080", "5080_legacy"]
function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
exclude:
- backend: turbomind
model: mllm
function: chat
- backend: pytorch
model: mllm
function: chat
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/3090:/mnt/3090
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Downgrade transformers
if: ${{matrix.transformers == '5080_legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
for i in $(seq 1 10); do
output=$(lmdeploy check_env 2>&1)
if echo "$output" | grep -q "CUDA available: False"; then
echo "CUDA not available (attempt $i/10), retrying in 5 seconds..."
sleep 5
else
echo "CUDA check passed"
break
fi
done
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat
continue-on-error: true
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Test lmdeploy - pipeline
continue-on-error: true
if: matrix.function == 'pipeline'
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Test lmdeploy - restful
continue-on-error: true
if: matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_restful:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
runs-on: [self-hosted, 5080-r1]
needs: test_quantization
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model_path: ['meta-llama/Llama-3.2-3B-Instruct', 'Qwen/Qwen3-4B']
transformers: ["5080", "5080_legacy"]
include:
- tp: 1
model: Llama-3.2-3B-Instruct
model_path: meta-llama/Llama-3.2-3B-Instruct
case_info: ['chat_completions_v1', 'generate']
generate_type: logprob
extra: '--logprobs-mode raw_logprobs'
- tp: 1
model: Qwen3-4B
model_path: Qwen/Qwen3-4B
case_info: ['completions_v1']
generate_type: base
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/3090:/mnt/3090
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
env:
TEST_ENV: ${{ matrix.transformers }}
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Downgrade transformers
if: ${{matrix.transformers == '5080_legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
for i in $(seq 1 10); do
output=$(lmdeploy check_env 2>&1)
if echo "$output" | grep -q "CUDA available: False"; then
echo "CUDA not available (attempt $i/10), retrying in 5 seconds..."
sleep 5
else
echo "CUDA check passed"
break
fi
done
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api
run: |
lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model_path}} --tp ${{matrix.tp}} --backend ${{matrix.backend}} ${{matrix.extra}} > ${{env.REPORT_DIR}}/${{matrix.backend}}_${{matrix.model}}_${{matrix.generate_type}}_start_restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
for i in $(seq 1 50)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
echo "health check success"
exit 0
fi
done
echo "health check fail"
kill -15 $restful_pid 2>/dev/null || true
exit 1
- name: Test lmdeploy - chat_completions_v1
if: contains(matrix.case_info, 'chat_completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not internlm2_5 and not interns1' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - completions_v1 - other
if: contains(matrix.case_info, 'completions_v1')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}} and not internlm2_5' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test generate - logprob
if: matrix.generate_type == 'logprob' && contains(matrix.case_info, 'generate')
timeout-minutes: 60
run: |
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k '${{matrix.model_path}} and ${{matrix.backend}}' -m 'not not_${{matrix.backend}} and not experts' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Kill api server
if: always()
run: |
kill -15 "$restful_pid"
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
get_coverage_report:
if: ${{!cancelled()}}
runs-on: [self-hosted, 5080-r1]
needs: [test_tools, test_restful]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/3090:/mnt/3090
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
run: |
pip install coverage
coverage combine ${{env.REPORT_DIR}}
coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
coverage report -m
mv .coverage ${{env.REPORT_DIR}}/.coverage
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
docker matrix .github/workflows/docker.yml
View raw YAML
name: publish-docker
on:
push:
paths-ignore:
- "!.github/workflows/docker.yml"
- ".github/**"
- "docs/**"
- "resources/**"
- "benchmark/**"
- "tests/**"
- "**/*.md"
- "autotest/**"
- "builder/**"
- "k8s/**"
branches:
- main
tags:
- "v*.*.*"
workflow_dispatch:
inputs:
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is ""'
type: string
default: 'main'
image_tag:
required: true
description: 'Set docker image tag. Default is "latest"'
type: string
default: latest
jobs:
publish_docker_image:
runs-on: ubuntu-latest
environment: 'prod'
strategy:
fail-fast: false
matrix:
cuda_version: ['cu12.8', 'cu12']
env:
CUDA_VERSION: ${{ matrix.cuda_version }}
TAG_PREFIX: "openmmlab/lmdeploy"
TAG: "openmmlab/lmdeploy:latest-${{matrix.cuda_version}}"
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{github.event.inputs.repo_ref}}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Get docker info
run: |
docker info
# remove http extraheader
git config --local --unset "http.https://github.com/.extraheader"
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Update docker TAG from workflow input
if: github.event_name == 'workflow_dispatch'
run: |
export TAG=$TAG_PREFIX:${{github.event.inputs.image_tag}}-${CUDA_VERSION}
echo $TAG
echo "TAG=${TAG}" >> $GITHUB_ENV
- name: Build and push Docker image
run: |
echo $TAG
docker build . -f docker/Dockerfile -t ${TAG} --build-arg CUDA_VERSION=${CUDA_VERSION}
docker push $TAG
- name: Push Docker image as latest
if: endsWith(env.TAG, 'latest-cu12') == true
run: |
export latest_TAG=${TAG_PREFIX}:latest
echo $latest_TAG
docker tag $TAG $latest_TAG
docker push $latest_TAG
- name: Push docker image with released tag
if: startsWith(github.ref, 'refs/tags/') == true
run: |
export RELEASE_TAG=${TAG_PREFIX}:${{github.ref_name}}-${CUDA_VERSION}
echo $RELEASE_TAG
docker tag $TAG $RELEASE_TAG
docker push $RELEASE_TAG
publish_ascend_docker_image:
runs-on: ubuntu-latest
environment: 'prod'
env:
TAG_PREFIX: "openmmlab/lmdeploy"
TAG: "openmmlab/lmdeploy:ascend"
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{github.event.inputs.repo_ref}}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get docker info
run: |
docker info
# remove http extraheader
git config --local --unset "http.https://github.com/.extraheader"
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Update docker TAG from workflow input
if: github.event_name == 'workflow_dispatch'
run: |
export TAG=$TAG_PREFIX:${{github.event.inputs.image_tag}}-ascend
echo $TAG
echo "TAG=${TAG}" >> $GITHUB_ENV
- name: Build and push Docker image
run: |
echo $TAG
docker build . -t ${TAG} -f docker/Dockerfile_ascend_a3 --platform linux/arm64
docker push $TAG
docker-nightly .github/workflows/docker-nightly.yml
View raw YAML
name: publish-docker-nightly
on:
workflow_dispatch:
inputs:
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
schedule:
- cron: '00 8 * * 0-4'
env:
TAG: "openmmlab/lmdeploy:nightly-cu12.8"
DEV_TAG: "openmmlab/lmdeploy:nightly-test-cu12.8"
jobs:
publish_docker_image:
runs-on: ubuntu-latest
env:
OFFLINE_REQUIREMENTS: ${{ vars.OFFLINE_REQUIREMENTS }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
ref: ${{ github.event.inputs.repo_ref || 'main' }}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Get docker info
run: |
docker info
# remove http extraheader (tolerate missing key)
git config --local --unset "http.https://github.com/.extraheader" || true
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push Docker image
run: |
echo ${{ env.TAG }}
docker build . -f docker/Dockerfile -t ${{ env.TAG }} --build-arg CUDA_VERSION=cu12.8
docker push ${{ env.TAG }}
mkdir -p docker
cat > docker/Dockerfile.nightly-extended << 'EOF'
ARG BASE_IMAGE=${{ env.TAG }}
FROM ${BASE_IMAGE}
COPY requirements /tmp/requirements
RUN apt-get update -y && \
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
apt-get clean -y && \
rm -rf /var/lib/apt/lists/*
RUN python3 -m pip install --no-cache-dir -r /tmp/requirements/lite.txt && \
python3 -m pip install --no-cache-dir -r /tmp/requirements/test.txt && \
pip install --no-cache-dir ${{ env.OFFLINE_REQUIREMENTS }}
EOF
docker build . -f docker/Dockerfile.nightly-extended \
--build-arg BASE_IMAGE=${{ env.TAG }} \
-t ${{ env.DEV_TAG }}
docker push ${{ env.DEV_TAG }}
publish_inner_docker_image:
runs-on: image-sync-inner
needs: publish_docker_image
env:
INNER_REGISTRY: ${{ secrets.INNER_DOCKER_REGISTRY }}
INNER_TAG: "${{ secrets.INNER_DOCKER_REGISTRY }}/ailab-puyu-puyu_gpu/lmdeploy:nightly-cu12.8"
steps:
- name: Pull and push to inner
run: |
docker pull ${{ env.DEV_TAG }}
docker login ${{ env.INNER_REGISTRY }} -p ${{ secrets.CLUSTER_DOCKERHUB_TOKEN }} -u ${{ secrets.CLUSTER_DOCKERHUB_USERNAME }}
docker tag ${{ env.DEV_TAG }} ${{ env.INNER_TAG }}
docker push ${{ env.INNER_TAG }}
docker_dev .github/workflows/docker_dev.yml
View raw YAML
name: publish-dev-docker
on:
workflow_dispatch:
inputs:
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
jobs:
publish_dev_docker_image:
runs-on: ubuntu-latest
environment: 'prod'
env:
TAG: "openmmlab/lmdeploy:dev-cu12.8"
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.repo_ref }}
- name: Free disk space
uses: jlumbroso/free-disk-space@v1.3.1
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Get docker info
run: |
docker info
# remove http extraheader
git config --local --unset "http.https://github.com/.extraheader"
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push Docker image
run: |
echo $TAG
docker build . -f docker/Dockerfile_dev -t ${TAG}
docker push $TAG
evaluate matrix .github/workflows/evaluate.yml
View raw YAML
name: evaluate
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
base_models:
required: true
description: 'Tested TurboMind models list. eg. [turbomind_qwen2_5_1_5b, turbomind_qwen2_5_7b, turbomind_qwen2_5_32b, turbomind_glm_4_9b, turbomind_llama_3_1_8b, turbomind_llama_3_70b, turbomind_qwen3_0_6b_base, turbomind_qwen3_8b_base, turbomind_qwen3_30b_A3B_base, pytorch_qwen2_5_1_5b, pytorch_qwen2_5_7b, pytorch_qwen2_5_32b, pytorch_gemma_2_9b, pytorch_llama_3_70b, pytorch_llama_3_1_8b, pytorch_qwen3_0_6b_base, pytorch_qwen3_8b_base, pytorch_qwen3_30b_A3B_base]'
type: string
default: '[turbomind_qwen2_5_1_5b, turbomind_qwen2_5_7b, turbomind_qwen2_5_32b, turbomind_glm_4_9b, turbomind_llama_3_1_8b, turbomind_llama_3_70b, turbomind_qwen3_0_6b_base, turbomind_qwen3_8b_base, turbomind_qwen3_30b_A3B_base, pytorch_qwen2_5_1_5b, pytorch_qwen2_5_7b, pytorch_qwen2_5_32b, pytorch_gemma_2_9b, pytorch_llama_3_70b, pytorch_llama_3_1_8b, pytorch_qwen3_0_6b_base, pytorch_qwen3_8b_base, pytorch_qwen3_30b_A3B_base]'
baes_datasets:
required: true
description: 'Tested datasets list. eg. [*mmlu_datasets, *gsm8k_datasets]'
type: string
default: '[*mmlu_datasets, *gsm8k_datasets, *gpqa_datasets, *winogrande_datasets]'
oc_repo_org:
required: false
description: 'Tested repository organization name. Default is open-compass/opencompass'
type: string
default: 'open-compass/opencompass'
oc_repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache
jobs:
linux-build:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.8
OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v6
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
evaluate:
needs: linux-build
if: ${{github.event_name == 'schedule' || !cancelled()}}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 4320 # 72hours
strategy:
fail-fast: false
matrix:
evaluate_type: ['base']
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/qa_test_models/evaluation_report:/root/evaluation_report
- /nvme/qa_test_models:/root/models
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Setup systems
run: |
export TIME_STAMP="$(date +'%Y%m%d-%H%M%S')"
echo "TIME_STAMP=$TIME_STAMP" >> $GITHUB_ENV
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: cp -r /root/models/offline_pkg/lmdeploy/. .
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r /root/models/offline_pkg/requirements.txt
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
python3 -m pip install /root/models/offline_pkg/py310/lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install opencompass
run: |
git clone https://github.com/${{ github.event.inputs.oc_repo_org}}.git
cd opencompass
git checkout ${{ github.event.inputs.oc_repo_ref}}
python3 -m pip install .
echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Setup paths for evaluation
run: |
ln -s /root/opencompass-data ./data
python3 .github/scripts/action_tools.py create_model_links /root/models .
- name: Evaluate base models
if: matrix.evaluate_type == 'base'
run: |
echo ${{github.event.inputs.base_models}}
echo ${{github.event.inputs.baes_datasets}}
export LMDEPLOY_DIR=$(pwd)
python3 .github/scripts/action_tools.py evaluate "${{github.event.inputs.base_models}}" "${{github.event.inputs.baes_datasets}}" /root/evaluation_report/${{ github.run_id }} base
- name: Clear workspace
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
lint .github/workflows/lint.yml
View raw YAML
name: lint
on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install pre-commit hook
run: |
python -m pip install pre-commit
pre-commit install
- name: Linting
run: pre-commit run --all-files
- name: Check markdown link
uses: gaurav-nelson/github-action-markdown-link-check@v1
with:
use-quiet-mode: 'yes'
use-verbose-mode: 'yes'
# check-modified-files-only: 'yes'
config-file: '.github/md-link-config.json'
file-path: './README.md, ./LICENSE, ./README_zh-CN.md'
- name: Check module init files
run: |
python -m pip install fire
python .github/scripts/check_lmdeploy.py check_module_init lmdeploy
- name: Check doc link
run: |
python .github/scripts/doc_link_checker.py --target README_zh-CN.md
python .github/scripts/doc_link_checker.py --target README.md
- name: Check docstring coverage
run: |
python -m pip install interrogate
interrogate -v --exclude ./lmdeploy/pytorch_poc/modeling/ --ignore-init-method --ignore-magic --ignore-module --ignore-private --ignore-nested-functions --ignore-nested-classes --fail-under 70 lmdeploy
- name: Check pylint score
run: |
python -m pip install pylint
pylint lmdeploy
linux_x64_gpu matrix perms .github/workflows/linux_x64_gpu.yml
View raw YAML
name: linux-x64-gpu
on:
push:
paths:
- '.github/workflows/linux_x64_gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
pull_request:
paths:
- '.github/workflows/linux_x64_gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
concurrency:
group: linux-x64-gpu-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
build:
strategy:
fail-fast: false
matrix:
cudaver: [12.4, 12.8]
name: cuda-${{ matrix.cudaver }}
runs-on: ubuntu-latest
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
- name: Build
run: |
docker run --rm \
-v ${{ github.workspace }}:/work \
-w /work \
openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }} \
bash -c "
git config --global --add safe.directory /work && \
source /opt/conda/bin/activate && \
conda activate py310 && \
pip install build && \
python -m build --wheel
"
mllm_api_eval matrix .github/workflows/mllm_api_eval.yml
View raw YAML
name: mllm_api_eval
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
execution_mode:
required: false
description: 'Select execution mode: infer, eval, or both. Default is "both"'
type: choice
options:
- both
- infer
- eval
default: 'both'
run_id:
required: false
description: 'Set custom run ID. If not provided, github.run_id will be used'
type: string
default: ''
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/mllm_evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
LMUData: /nvme/qa_test_models/LMUData
LOCAL_LLM: turbomind_Qwen2.5-32B-Instruct_nccl_tp2_0
OPENAI_API_KEY: sk-empty
HF_DATASETS_OFFLINE: 1
HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets
HF_HUB_OFFLINE: 1
HF_EVALUATE_OFFLINE: 1
RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}
jobs:
linux-build:
if: ${{ !cancelled() }}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.8
OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Copy Artifacts - offline
if: ${{inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Mark as start
run: |
chmod -R 777 ${{env.TEST_CODE_PATH}}
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
test_evaluation:
needs: download_pkgs
if: ${{ !cancelled() }}
runs-on: [self-hosted, linux-a100]
timeout-minutes: 2400
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
transformers: ["", "legacy"]
env:
TEST_ENV: ${{ matrix.transformers }}
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/qa_test_models:/nvme/qa_test_models
- /nvme/huggingface_hub:/nvme/huggingface_hub
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
- name: Install lmdeploy
run: |
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install vlmeval
run: |
python3 -m pip install pandas datasets scikit-learn pylatexenc math_verify
apt update && apt install -y libgl1 libglib2.0-0
cp -r /nvme/qa_test_models/offline_pkg/VLMEvalKit .
cd VLMEvalKit && pip install .
- name: Downgrade transformers
if: ${{matrix.transformers == 'legacy'}}
run: |
pip install transformers==4.57.6
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
mkdir ${{env.REPORT_DIR}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Setup paths for evaluation
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
run: |
unset HTTP_PROXY;unset HTTPS_PROXY;unset http_proxy;unset https_proxy;
cd VLMEvalKit && cp -r ../autotest .
execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"
ulimit -n 65535
if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then
pytest autotest/evaluate/test_mllm_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
fi
if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then
pytest autotest/evaluate/test_mllm_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
fi
exit $overall_exit
- name: Clear workspace
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
chmod -R 777 ${{env.REPORT_DIR}}
export workdir=$(pwd)
rm -rf $workdir/*
pr_ete_test .github/workflows/pr_ete_test.yml
View raw YAML
name: pr_ete_test
on:
pull_request:
paths:
- ".github/workflows/pr_ete_test.yml"
- "cmake/**"
- "src/**"
- "autotest/**"
- "3rdparty/**"
- "lmdeploy/**"
- "requirements/**"
- "requirements_cuda.txt"
- "CMakeLists.txt"
- "setup.py"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
jobs:
pr_functions_test:
runs-on: [self-hosted, linux-a100-pr]
timeout-minutes: 120
env:
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.head_ref }}_${{ github.run_id }}
SERVER_LOG: /nvme/qa_test_models/server_log/${{ github.head_ref }}_${{ github.run_id }}
container:
image: openmmlab/lmdeploy:dev-cu12.8
options: --gpus all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never
volumes:
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
- /nvme/share_data/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/121:/mnt/121
- /mnt/104:/mnt/104
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
- name: Install lmdeploy
run: |
python3 -m pip install -r requirements/lite.txt
python3 -m pip install -r requirements/test.txt
python3 -m pip install -e .
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
mkdir ${{env.REPORT_DIR}} -p
mkdir ${{env.SERVER_LOG}} -p
echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
- name: Test lmdeploy - func
run: |
pytest autotest -m 'pr_test and gpu_num_2' -x --alluredir=${{env.REPORT_DIR}} --clean-alluredir
pytest autotest -m 'pr_test and gpu_num_1' -n 2 -x --alluredir=${{env.REPORT_DIR}}
- name: Update transformers
run: |
pip install transformers==4.57.3
- name: Test restful server - turbomind Qwen3-32B
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-32B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 180)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
cat ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log
exit 1
- name: Test restful server - turbomind InternVL3-38B
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 180)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
cat ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log
exit 1
- name: Test restful server - turbomind Qwen3-30B-A3B
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client> ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 180)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
cat ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log
exit 1
- name: Test restful server - pytorch Qwen3-30B-A3B
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 180)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}}
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
cat ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log
exit 1
- name: Test restful server - pytorch Qwen3-VL-30B-A3B-Instruct
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-VL-30B-A3B-Instruct --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 180)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}}
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
cat ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log
exit 1
- name: Test restful server - pytorch InternVL3_5-30B-A3B
run: |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 &
echo "restful_pid=$!"
for i in $(seq 1 180)
do
sleep 5
echo "health check try $i"
if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}}
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
exit 0
fi
done
echo "health check fail"
curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
cat ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log
exit 1
- name: Clear workfile
if: always()
run: |
echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
pypi matrix .github/workflows/pypi.yml
View raw YAML
name: publish to pypi
on:
push:
branches:
- main
paths:
- "lmdeploy/version.py"
workflow_dispatch:
jobs:
linux-build:
strategy:
matrix:
pyver: [py310, py311, py312, py313]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.4
OUTPUT_FOLDER: cuda12_dist
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Checkout repository
uses: actions/checkout@v3
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}/*
retention-days: 1
name: linux-${{ matrix.pyver }}
windows-build:
strategy:
matrix:
pyver: ['3.10', '3.11', '3.12', '3.13']
runs-on: windows-latest
steps:
- name: Set git for windows
run: |
git config --global core.longpaths true
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}
- name: Install python packages
run: |
pip install build change-wheel-version
- name: Setup CUDA Toolkit
id: cuda-toolkit
shell: pwsh
run: ./builder/windows/setup_cuda.ps1
env:
INPUT_CUDA_VERSION: '12.6.2'
- name: Build wheel
run: |
python -m build --wheel -o build/wheel
Get-ChildItem -Path "build" -Filter "*.whl" | ForEach-Object { change_wheel_version $_.FullName --local-version cu121 --delete-old-wheel }
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: build/wheel/*
retention-days: 1
name: windows-${{ matrix.pyver }}
publish:
runs-on: ubuntu-latest
environment: 'prod'
needs:
- linux-build
- windows-build
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: artifact
merge-multiple: true
- name: Display artifacts
run: ls artifact/ -lh
- name: Set up python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Upload to pypi
run: |
pip install twine
twine upload artifact/* -u __token__ -p ${{ secrets.pypi_password }}
stable matrix .github/workflows/stable.yml
View raw YAML
name: stable_test
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
schedule:
- cron: '00 8 * * 1'
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
REPORT_DIR: /nvme/qa_test_models/stable_reports/${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
COMPASS_DATA_CACHE: /nvme/qa_test_models/dataset
jobs:
linux-build:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda11.8
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
benchmark:
needs: linux-build
if: ${{github.event_name == 'schedule' || !cancelled()}}
runs-on: [self-hosted, lmdeploy-stable]
timeout-minutes: 10080
strategy:
fail-fast: false
matrix:
model: ['internlm/internlm2_5-20b-chat']
container:
image: openmmlab/lmdeploy:latest-cu11
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 -e NO_PROXY=localhost,127.0.0.1 -e no_proxy=localhost,127.0.0.1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/187:/mnt/187
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v3
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. .
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Install lmdeploy - dependency
run: |
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install opencompass
run: |
git clone --depth=1 https://github.com/open-compass/opencompass.git
cd opencompass
python3 -m pip install -e .
cd ..
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Start restful api turbomind
run: |
mkdir ${{env.REPORT_DIR}} -p
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/${{matrix.model}} --tp 2 --max-batch-size 256 --cache-max-entry-count 0.9 --server-port 23344 > ${{env.REPORT_DIR}}/restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 120s
- name: Run OC result
continue-on-error: true
run: |
ln -s /nvme/qa_test_models/dataset/data .
opencompass .github/scripts/eval_stable_object_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-object-1
opencompass .github/scripts/eval_stable_subject_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-subject-1
opencompass .github/scripts/eval_stable_object_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-object-2
opencompass .github/scripts/eval_stable_subject_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-subject-2
opencompass .github/scripts/eval_stable_object_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-object-3
opencompass .github/scripts/eval_stable_subject_config.py --reuse --dump-eval-details --work-dir ${{env.REPORT_DIR}}-subject-3
- name: Test lmdeploy - restful api
run: |
python3 benchmark/profile_restful_api.py --backend lmdeploy --base-url http://localhost:23344 --dataset-path /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10000 --output-file ${{env.REPORT_DIR}}/stable.jsonl > ${{env.REPORT_DIR}}/stable.log
python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-1.csv > ${{env.REPORT_DIR}}/stable-internal-1.log
python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-2.csv > ${{env.REPORT_DIR}}/stable-internal-2.log
python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-3.csv > ${{env.REPORT_DIR}}/stable-internal-3.log
python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-2.csv > ${{env.REPORT_DIR}}/stable-internal-4.log
python3 /nvme/qa_test_models/offline_pkg/profile_restful_api_internal.py localhost:23344 /nvme/qa_test_models/${{matrix.model}} /nvme/qa_test_models/datasets/Mixed.json --stream-output True --num-prompts 100000 --csv ${{env.REPORT_DIR}}/stable-internal-3.csv > ${{env.REPORT_DIR}}/stable-internal-5.log
- name: Attach result
if: always()
run: |
python3 .github/scripts/action_tools.py generate_csv_from_profile_result ${{env.REPORT_DIR}}/stable.jsonl ${{env.REPORT_DIR}}/stable.csv
python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable.csv
python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-1.csv
python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-2.csv
python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-3.csv
python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-4.csv
python3 .github/scripts/action_tools.py add_summary ${{env.REPORT_DIR}}/stable-internal-5.csv
- name: Kill api server
if: always()
run: |
kill -15 "$restful_pid"
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
stale perms .github/workflows/stale.yml
View raw YAML
name: 'Close stale issues and PRs'
on:
schedule:
# check issue and pull request once at 01:30 a.m. every day
- cron: '30 1 * * *'
permissions:
contents: read
jobs:
stale:
permissions:
issues: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v7
with:
stale-issue-message: 'This issue is marked as stale because it has been marked as invalid or awaiting response for 7 days without any further response. It will be closed in 5 days if the stale label is not removed or if there is no further response.'
stale-pr-message: 'This PR is marked as stale because there has been no activity in the past 45 days. It will be closed in 10 days if the stale label is not removed or if there is no further updates.'
close-issue-message: 'This issue is closed because it has been stale for 5 days. Please open a new issue if you have similar issues or you have any new updates now.'
close-pr-message: 'This PR is closed because it has been stale for 10 days. Please reopen this PR if you have any updates and want to keep contributing the code.'
# only issues/PRS with following labels are checked
any-of-labels: 'invalid, awaiting response, duplicate'
days-before-issue-stale: 7
days-before-pr-stale: 45
days-before-issue-close: 5
days-before-pr-close: 10
# automatically remove the stale label when the issues or the pull requests are updated or commented
remove-stale-when-updated: true
operations-per-run: 50
test_docker matrix .github/workflows/test_docker.yml
View raw YAML
name: test-docker
on:
push:
paths:
- 'docker/**'
- '.github/workflows/*docker.yml'
pull_request:
paths:
- 'docker/**'
- '.github/workflows/*docker.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
test_docker_image:
permissions:
pull-requests: write
runs-on: ubuntu-latest
strategy:
matrix:
cuda_version: [cu13, cu12]
python_version: ['3.10', '3.11', '3.12', '3.13']
env:
CUDA_VERSION: ${{ matrix.cuda_version }}
PYTHON_VERSION: ${{ matrix.python_version }}
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{github.event.inputs.repo_ref}}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Get docker info
run: |
docker info
# remove http extraheader
git config --local --unset "http.https://github.com/.extraheader"
- name: Build Docker image
run: |
docker build . -t lmdeploy:latest -f docker/Dockerfile --build-arg CUDA_VERSION=${CUDA_VERSION} --build-arg PYTHON_VERSION=${PYTHON_VERSION}
- name: Test image with lmdeploy check_env
run: |
docker images
docker run --rm lmdeploy:latest lmdeploy check_env
- name: Dive
if: ${{ matrix.cuda_version == 'cu12' }}
uses: MaxymVlasov/dive-action@v1.5.0
with:
image: lmdeploy:latest
github-token: ${{ secrets.GITHUB_TOKEN }}
test_ascend_docker_image:
permissions:
pull-requests: write
runs-on: ubuntu-22.04-arm
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{github.event.inputs.repo_ref}}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get docker info
run: |
docker info
# remove http extraheader
git config --local --unset "http.https://github.com/.extraheader"
- name: Build Docker image
run: |
docker build . -t lmdeploy:ascend -f docker/Dockerfile_ascend_a3
# - name: Test image with lmdeploy check_env
# run: |
# docker images
# docker run --rm lmdeploy:ascend lmdeploy check_env
- name: Dive
uses: MaxymVlasov/dive-action@v1.5.0
with:
image: lmdeploy:ascend
github-token: ${{ secrets.GITHUB_TOKEN }}
test_jetson_docker_image:
permissions:
pull-requests: write
runs-on: ubuntu-22.04-arm
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
ref: ${{github.event.inputs.repo_ref}}
- name: Free disk space
uses: jlumbroso/free-disk-space@main
with:
# This might remove tools that are actually needed, if set to "true" but frees about 6 GB
tool-cache: false
docker-images: false
# All of these default to true, but feel free to set to "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get docker info
run: |
docker info
# remove http extraheader
git config --local --unset "http.https://github.com/.extraheader"
- name: Build Docker image
run: |
docker build . -t lmdeploy:jetson -f docker/Dockerfile.jetson
- name: Test image with lmdeploy check_env
run: |
docker images
docker run --rm lmdeploy:jetson lmdeploy check_env
- name: Dive
uses: MaxymVlasov/dive-action@v1.5.0
with:
image: lmdeploy:jetson
github-token: ${{ secrets.GITHUB_TOKEN }}
unit_test .github/workflows/unit_test.yml
View raw YAML
name: unit-test
on:
pull_request:
paths:
- ".github/workflows/unit_test.yml"
- "cmake/**"
- "src/**"
- "tests/**"
- "3rdparty/**"
- "lmdeploy/**"
- "requirements/**"
- "requirements_cuda.txt"
- "CMakeLists.txt"
- "setup.py"
push:
branches:
- main
paths:
- ".github/workflows/unit_test.yml"
- "cmake/**"
- "src/**"
- "tests/**"
- "3rdparty/**"
- "lmdeploy/**"
- "requirements/**"
- "requirements_cuda.txt"
- "CMakeLists.txt"
- "setup.py"
tags:
- "v*.*.*"
jobs:
unit_test:
runs-on: [self-hosted, linux-a100-s2]
timeout-minutes: 4320 # 72hours
container:
image: openmmlab/lmdeploy:dev-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 -e HF_HOME=/root/.cache/huggingface --pull never"
volumes:
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
- /nvme/share_data/github-actions/hf_home:/root/.cache/huggingface
- /nvme/share_data/github-actions/packages:/root/packages
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v5
- name: Install lmdeploy
run: |
python3 -m pip install -r requirements/test.txt
python3 -m pip install -e .
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Test lmdeploy python UT
run: |
coverage run --branch --source lmdeploy -m pytest -rsE tests
coverage xml
coverage report -m
- name: Clear workfile
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
windows_x64_gpu matrix perms .github/workflows/windows_x64_gpu.yml
View raw YAML
name: windows-x64-gpu
on:
push:
paths:
- '.github/workflows/windows_x64_gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
pull_request:
paths:
- '.github/workflows/windows_x64_gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
concurrency:
group: windows-x64-gpu-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
build:
strategy:
fail-fast: false
matrix:
cudaver: [12.6.2, 12.8.1]
name: cuda-${{ matrix.cudaver }}
runs-on: windows-latest
steps:
- name: Set git for windows
run: |
git config --global core.longpaths true
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install python packages
run: |
pip install build
- name: Setup CUDA Toolkit
id: cuda-toolkit
shell: pwsh
run: ./builder/windows/setup_cuda.ps1
env:
INPUT_CUDA_VERSION: ${{ matrix.cudaver }}
- name: Build wheel
run: |
python -m build --wheel