From 03f82299bcdc2e00f345783618461821fea60805 Mon Sep 17 00:00:00 2001 From: mywaaagh_admin Date: Wed, 6 Aug 2025 16:15:35 +0800 Subject: [PATCH 01/10] feat: add ascend docker release Signed-off-by: mywaaagh_admin --- .../workflows/release-docker-npu-nightly.yaml | 76 ++++++++++++++++++ .github/workflows/release-docker-npu.yaml | 77 ++++++++++++++++++ docker/Dockerfile.npu | 80 +++++++++++++++++++ 3 files changed, 233 insertions(+) create mode 100644 .github/workflows/release-docker-npu-nightly.yaml create mode 100644 .github/workflows/release-docker-npu.yaml create mode 100644 docker/Dockerfile.npu diff --git a/.github/workflows/release-docker-npu-nightly.yaml b/.github/workflows/release-docker-npu-nightly.yaml new file mode 100644 index 000000000000..c2ab4e74d76e --- /dev/null +++ b/.github/workflows/release-docker-npu-nightly.yaml @@ -0,0 +1,76 @@ +name: Release Docker Images Nightly (Ascend NPU) +on: + pull_request: + branches: + - main + paths: + - ".github/workflows/release-docker-npu-nightly.yaml" + workflow_dispatch: + schedule: + - cron: '0 0 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.sha }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-22.04-arm + strategy: + matrix: + cann_version: ['8.2.rc1.alpha003'] + device_type: ['a3'] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Free up disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + with: + tool-cache: true + docker-images: false + + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v3 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ github.repository_owner }}/sglang + # push with schedule event + # push with workflow_dispatch event + tags: | + type=ref,event=pr + type=ref,event=branch + type=schedule,pattern=main + flavor: | + latest=false + suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }},onlatest=true + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into docker hub + uses: docker/login-action@v3 + if: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }} + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v6 + with: + context: docker + file: docker/Dockerfile.npu + # TODO: need add x86 platforms support when memfabric is ready + platforms: linux/arm64 + labels: ${{ steps.meta.outputs.labels }} + tags: ${{ steps.meta.outputs.tags }} + push: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }} + provenance: false + build-args: | + CANN_VERSION=${{ matrix.cann_version }} + DEVICE_TYPE=${{ matrix.device_type }} diff --git a/.github/workflows/release-docker-npu.yaml b/.github/workflows/release-docker-npu.yaml new file mode 100644 index 000000000000..6050794d0f23 --- /dev/null +++ b/.github/workflows/release-docker-npu.yaml @@ -0,0 +1,77 @@ +name: Release Docker Images (Ascend NPU) +on: + push: + tags: + - "*" # Trigger on all tags and filterred by pep440 later + workflow_dispatch: + pull_request: + branches: + - main + paths: + - ".github/workflows/release-docker-npu.yaml" + +jobs: + build: + runs-on: ubuntu-22.04-arm + strategy: + matrix: + cann_version: ['8.2.rc1.alpha003'] + device_type: ['a3'] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Free up disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + with: + tool-cache: true + docker-images: false + + # push with tag + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ github.repository_owner }}/sglang + tags: | + type=ref,event=pr + type=ref,event=tag,suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }} + flavor: | + latest=false + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v3 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Login to Docker Hub + uses: docker/login-action@v2 + if: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }} + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Get version + id: get_version + run: | + version=$(cat python/sglang/version.py | cut -d'"' -f2) + echo "TAG=${{ github.repository_owner }}/sglang:v$version-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT + kernel_tag=$(curl -s https://api.github.com/repos/sgl-project/sgl-kernel-npu/tags | jq -r '.[0].name') + echo "KERNEL_NPU_TAG=${kernel_tag}" >> $GITHUB_OUTPUT + + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v6 + with: + context: docker + file: docker/Dockerfile.npu + # TODO: need add x86 platforms support when memfabric is ready + platforms: linux/arm64 + labels: ${{ steps.meta.outputs.labels }} + tags: ${{ steps.meta.outputs.tags || steps.get_version.outputs.TAG }} + push: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }} + provenance: false + build-args: | + SGLANG_KERNEL_NPU_TAG=${{ steps.get_version.outputs.KERNEL_NPU_TAG }} + CANN_VERSION=${{ matrix.cann_version }} + DEVICE_TYPE=${{ matrix.device_type }} diff --git a/docker/Dockerfile.npu b/docker/Dockerfile.npu new file mode 100644 index 000000000000..dddb41b6afa6 --- /dev/null +++ b/docker/Dockerfile.npu @@ -0,0 +1,80 @@ +ARG CANN_VERSION=8.2.rc1.alpha003 +ARG DEVICE_TYPE=a3 +ARG OS=ubuntu22.04 +ARG PYTHON_VERSION=py3.11 + +FROM quay.io/ascend/cann:$CANN_VERSION-$DEVICE_TYPE-$OS-$PYTHON_VERSION + +# Update pip & apt sources +ARG PIP_INDEX_URL="https://pypi.org/simple/" +ARG APTMIRROR="" +ARG MEMFABRIC_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl +ARG PYTORCH_VERSION=2.6.0 +ARG TORCHVISION_VERSION=0.21.0 +ARG VLLM_TAG=v0.8.5 +ARG TRITON_ASCEND_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend-3.2.0.dev20250729-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl +ARG SGLANG_TAG=main +ARG ASCEND_CANN_PATH=/usr/local/Ascend/ascend-toolkit +ARG SGLANG_KERNEL_NPU_TAG=main + +WORKDIR /workspace + +# Define environments +ENV DEBIAN_FRONTEND=noninteractive + +RUN pip config set global.index-url $PIP_INDEX_URL +RUN if [ -n "$APTMIRROR" ];then sed -i "s|.*.ubuntu.com|$APTMIRROR|g" /etc/apt/sources.list ;fi + +# Install development tools and utilities +RUN apt-get update -y && apt upgrade -y && apt-get install -y \ + build-essential \ + cmake \ + vim \ + wget \ + curl \ + net-tools \ + zlib1g-dev \ + lld \ + clang \ + locales \ + ccache \ + ca-certificates \ + && rm -rf /var/cache/apt/* \ + && rm -rf /var/lib/apt/lists/* \ + && update-ca-certificates \ + && locale-gen en_US.UTF-8 + +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 + +# Install dependencies +# TODO: install from pypi released memfabric +# TODO: install from pypi released triton-ascend +RUN pip install $MEMFABRIC_URL --no-cache-dir \ + && pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu --no-cache-dir \ + && pip install torch-npu==$PYTORCH_VERSION --no-cache-dir \ + && pip install ${TRITON_ASCEND_URL} --no-cache-dir \ + && python3 -m pip install --no-cache-dir numpy==1.26.4 pybind11 + +# Install vLLM +RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \ + cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir && \ + cd .. && rm -rf vllm + +# Install SGLang +RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \ + cd ./sglang/python && pip install .[srt_npu] --no-cache-dir && \ + cd .. && rm -rf ./sglang + +# Install Deep-ep +RUN git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \ + && export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH && \ + source ${ASCEND_CANN_PATH}/set_env.sh && \ + cd sgl-kernel-npu && \ + bash build.sh \ + && pip install output/deep_ep*.whl --no-cache-dir \ + && cd .. && rm -rf sgl-kernel-npu \ + && cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -s deep_ep/deep_ep_cpp*.so + +CMD ["/bin/bash"] From 72b1e278de63a23449c9196af1d7235675079e1a Mon Sep 17 00:00:00 2001 From: Even Zhou Date: Wed, 6 Aug 2025 17:08:03 +0800 Subject: [PATCH 02/10] add readme --- docs/basic_usage/deepseek.md | 2 + docs/references/ascend_npu.md | 193 ++++++++++++++++++++++++++++++++++ docs/references/hardware.rst | 9 ++ 3 files changed, 204 insertions(+) create mode 100644 docs/references/ascend_npu.md create mode 100644 docs/references/hardware.rst diff --git a/docs/basic_usage/deepseek.md b/docs/basic_usage/deepseek.md index d0c04af43743..c8195d465822 100644 --- a/docs/basic_usage/deepseek.md +++ b/docs/basic_usage/deepseek.md @@ -24,6 +24,7 @@ To run DeepSeek V3/R1 models, the requirements are as follows: | **Quantized weights (int8)** | 16 x A100/800 | | | 32 x L40S | | | Xeon 6980P CPU | +| | 2 x 8 x Atlas A3 |