Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions .github/workflows/pr-test-npu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,19 @@ jobs:
github.event.pull_request.draft == false
runs-on: linux-arm64-npu-1
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host ${CACHING_URL}

bash scripts/ci/npu_ci_install_dependency.sh
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
Expand All @@ -56,13 +62,19 @@ jobs:
github.event.pull_request.draft == false
runs-on: linux-arm64-npu-2
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host ${CACHING_URL}

bash scripts/ci/npu_ci_install_dependency.sh
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
Expand All @@ -85,13 +97,19 @@ jobs:
github.event.pull_request.draft == false
runs-on: linux-arm64-npu-4
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host ${CACHING_URL}

bash scripts/ci/npu_ci_install_dependency.sh
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
Expand Down
76 changes: 76 additions & 0 deletions .github/workflows/release-docker-npu-nightly.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: Release Docker Images Nightly (Ascend NPU)
on:
pull_request:
branches:
- main
paths:
- ".github/workflows/release-docker-npu-nightly.yaml"
workflow_dispatch:
schedule:
- cron: "0 0 * * *"

concurrency:
group: ${{ github.workflow }}-${{ github.sha }}
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-22.04-arm
strategy:
matrix:
cann_version: ["8.2.rc1"]
device_type: ["a3"]
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false

- name: Setup Docker buildx
uses: docker/setup-buildx-action@v3

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ github.repository_owner }}/sglang
# push with schedule event
# push with workflow_dispatch event
tags: |
type=ref,event=pr
type=ref,event=branch
type=schedule,pattern=main
flavor: |
latest=false
suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }},onlatest=true
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into docker hub
uses: docker/login-action@v3
if: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6
with:
context: docker
file: docker/Dockerfile.npu
# TODO: need add x86 platforms support when memfabric is ready
platforms: linux/arm64
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
push: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
77 changes: 77 additions & 0 deletions .github/workflows/release-docker-npu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: Release Docker Images (Ascend NPU)
on:
push:
tags:
- "*" # Trigger on all tags and filterred by pep440 later
workflow_dispatch:
pull_request:
branches:
- main
paths:
- ".github/workflows/release-docker-npu.yaml"

jobs:
build:
runs-on: ubuntu-22.04-arm
strategy:
matrix:
cann_version: ["8.2.rc1"]
device_type: ["a3"]
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false

# push with tag
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ github.repository_owner }}/sglang
tags: |
type=ref,event=pr
type=ref,event=tag,suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }}
flavor: |
latest=false
- name: Setup Docker buildx
uses: docker/setup-buildx-action@v3

# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Login to Docker Hub
uses: docker/login-action@v2
if: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Get version
id: get_version
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
echo "TAG=${{ github.repository_owner }}/sglang:v$version-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT
kernel_tag=$(curl -s https://api.github.com/repos/sgl-project/sgl-kernel-npu/tags | jq -r '.[0].name')
echo "KERNEL_NPU_TAG=${kernel_tag}" >> $GITHUB_OUTPUT

- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6
with:
context: docker
file: docker/Dockerfile.npu
# TODO: need add x86 platforms support when memfabric is ready
platforms: linux/arm64
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags || steps.get_version.outputs.TAG }}
push: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
SGLANG_KERNEL_NPU_TAG=${{ steps.get_version.outputs.KERNEL_NPU_TAG }}
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
81 changes: 81 additions & 0 deletions docker/Dockerfile.npu
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
ARG CANN_VERSION=8.2.rc1
ARG DEVICE_TYPE=a3
ARG OS=ubuntu22.04
ARG PYTHON_VERSION=py3.11

FROM quay.io/ascend/cann:$CANN_VERSION-$DEVICE_TYPE-$OS-$PYTHON_VERSION

# Update pip & apt sources
ARG PIP_INDEX_URL="https://pypi.org/simple/"
ARG APTMIRROR=""
ARG MEMFABRIC_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl
ARG PYTORCH_VERSION=2.6.0
ARG TORCHVISION_VERSION=0.21.0
ARG PTA_URL="https://gitee.com/ascend/pytorch/releases/download/v7.1.0.1-pytorch2.6.0/torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl"
ARG VLLM_TAG=v0.8.5
ARG TRITON_ASCEND_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend-3.2.0.dev20250729-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
ARG SGLANG_TAG=main
ARG ASCEND_CANN_PATH=/usr/local/Ascend/ascend-toolkit
ARG SGLANG_KERNEL_NPU_TAG=main

WORKDIR /workspace

# Define environments
ENV DEBIAN_FRONTEND=noninteractive

RUN pip config set global.index-url $PIP_INDEX_URL
RUN if [ -n "$APTMIRROR" ];then sed -i "s|.*.ubuntu.com|$APTMIRROR|g" /etc/apt/sources.list ;fi

# Install development tools and utilities
RUN apt-get update -y && apt upgrade -y && apt-get install -y \
build-essential \
cmake \
vim \
wget \
curl \
net-tools \
zlib1g-dev \
lld \
clang \
locales \
ccache \
ca-certificates \
&& rm -rf /var/cache/apt/* \
&& rm -rf /var/lib/apt/lists/* \
&& update-ca-certificates \
&& locale-gen en_US.UTF-8

ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US:en
ENV LC_ALL=en_US.UTF-8

# Install dependencies
# TODO: install from pypi released memfabric
# TODO: install from pypi released triton-ascend
RUN pip install $MEMFABRIC_URL --no-cache-dir \
&& pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu --no-cache-dir \
&& wget ${PTA_URL} && pip install "./torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl" --no-cache-dir \
&& pip install ${TRITON_ASCEND_URL} --no-cache-dir \
&& python3 -m pip install --no-cache-dir numpy==1.26.4 pybind11

# Install vLLM
RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \
cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir && \
cd .. && rm -rf vllm

# Install SGLang
RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \
cd ./sglang/python && pip install .[srt_npu] --no-cache-dir && \
cd .. && rm -rf ./sglang

# Install Deep-ep
RUN git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \
&& export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH && \
source ${ASCEND_CANN_PATH}/set_env.sh && \
cd sgl-kernel-npu && \
bash build.sh \
&& pip install output/deep_ep*.whl --no-cache-dir \
&& cd .. && rm -rf sgl-kernel-npu \
&& cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -s deep_ep/deep_ep_cpp*.so

CMD ["/bin/bash"]
2 changes: 2 additions & 0 deletions docs/basic_usage/deepseek.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ To run DeepSeek V3/R1 models, the requirements are as follows:
| **Quantized weights (int8)** | 16 x A100/800 |
| | 32 x L40S |
| | Xeon 6980P CPU |
| | 2 x Atlas 800I A3 |

<style>
.md-typeset__table {
Expand Down Expand Up @@ -64,6 +65,7 @@ Detailed commands for reference:
- [16 x A100 (int8)](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3#example-serving-with-16-a100a800-with-int8-quantization)
- [32 x L40S (int8)](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3#example-serving-with-32-l40s-with-int8-quantization)
- [Xeon 6980P CPU](../platforms/cpu_server.md#example-running-deepseek-r1)
- [2 x Atlas 800I A3 (int8)](../platforms/ascend_npu.md#running-deepseek-v3)

### Download Weights
If you encounter errors when starting the server, ensure the weights have finished downloading. It's recommended to download them beforehand or restart multiple times until all weights are downloaded. Please refer to [DeepSeek V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base#61-inference-with-deepseek-infer-demo-example-only) official guide to download the weights.
Expand Down
Loading
Loading