Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
767b447
Create nightly-test-npu.yml
cherryblo Nov 29, 2025
311f567
Update run_suite.py
cherryblo Nov 29, 2025
7da83ad
Update ci_register.py
cherryblo Nov 29, 2025
b7ca6b8
Update ci_register.py
cherryblo Nov 29, 2025
019bf54
Create test_ascend_llm_model_mistral_7b.py
cherryblo Nov 29, 2025
5dee58b
Create test_vlm_models_gemma_3_4b_it.py
cherryblo Nov 29, 2025
37f6857
Create test_vlm_utils.py
cherryblo Nov 29, 2025
f347705
Create test_vlm_models_gemma_3_4b_it.py
cherryblo Nov 29, 2025
e5e5ca5
Update test_vlm_models_gemma_3_4b_it.py
cherryblo Nov 29, 2025
15356a5
Delete test/nightly/ascend/llm_models/vlm_models/test_vlm_models_gemm…
cherryblo Nov 29, 2025
a17477a
Update test_ascend_llm_model_mistral_7b.py
cherryblo Nov 29, 2025
c469f8b
Update and rename test_ascend_llm_model_mistral_7b.py to gsm8k_ascend…
cherryblo Nov 29, 2025
a579a46
Create test_ascend_mistral_7b.py
cherryblo Nov 29, 2025
9e9516c
Update test_ascend_mistral_7b.py
cherryblo Nov 29, 2025
6d3e5c3
Create test_ascend_glm4_9b_chat.py
cherryblo Nov 29, 2025
c3a850a
Update test_ascend_glm4_9b_chat.py
cherryblo Nov 29, 2025
8bc6f41
Update test_ascend_mistral_7b.py
cherryblo Nov 29, 2025
38b6509
Update test_vlm_utils.py
cherryblo Nov 29, 2025
af01f1e
Update test_vlm_utils.py
cherryblo Nov 29, 2025
e4ad511
Update test_vlm_utils.py
cherryblo Nov 29, 2025
1b2a8bc
Update and rename test_vlm_models_gemma_3_4b_it.py to test_ascend_gem…
cherryblo Nov 29, 2025
c4c45ac
Create test_ascend_janus_pro_7b.py
cherryblo Nov 29, 2025
506fe40
Create test_ascend_reward_models.py
cherryblo Nov 29, 2025
5db34d5
Create test_ascend_embedding_models.py
cherryblo Nov 29, 2025
063dc75
Create test_ascend_cross_encoder_models.py
cherryblo Nov 29, 2025
4503da3
Update test_ascend_embedding_models.py
cherryblo Nov 29, 2025
3217e8e
Update test_ascend_cross_encoder_models.py
cherryblo Nov 29, 2025
26e8c67
Update nightly-test-npu.yml
cherryblo Nov 29, 2025
4fb7ed0
Update nightly-test-npu.yml
cherryblo Nov 30, 2025
4ef0247
Update test_ascend_embedding_models.py
cherryblo Nov 30, 2025
610a88e
Update test_ascend_glm4_9b_chat.py
cherryblo Nov 30, 2025
7860dc3
Update test_ascend_mistral_7b.py
cherryblo Nov 30, 2025
d2e2007
Update test_ascend_cross_encoder_models.py
cherryblo Nov 30, 2025
7a10535
Update test_ascend_reward_models.py
cherryblo Nov 30, 2025
6c3b5bd
Update test_ascend_gemma_3_4b_it.py
cherryblo Nov 30, 2025
6191926
Update test_ascend_janus_pro_7b.py
cherryblo Nov 30, 2025
499f2c5
Create mmmu-val.yaml
cherryblo Nov 30, 2025
3a0a23b
Update test_vlm_utils.py
cherryblo Nov 30, 2025
6ecf748
Update nightly-test-npu.yml
cherryblo Nov 30, 2025
3ca13ee
Update nightly-test-npu.yml
cherryblo Nov 30, 2025
8224654
Update gsm8k_ascend_mixin.py
cherryblo Nov 30, 2025
93b40d0
Update test_ascend_cross_encoder_models.py
cherryblo Nov 30, 2025
9a5f061
Update nightly-test-npu.yml
cherryblo Nov 30, 2025
745d108
Create npu_log_print.sh
cherryblo Nov 30, 2025
a8d545a
Update nightly-test-npu.yml
cherryblo Nov 30, 2025
e97da90
Update test_vlm_utils.py
cherryblo Nov 30, 2025
c8582c5
Update runners.py
cherryblo Nov 30, 2025
e1d15a1
Update runners.py
cherryblo Nov 30, 2025
2f8bf49
Update nightly-test-npu.yml
cherryblo Nov 30, 2025
87a6f22
Update ci_register.py
cherryblo Nov 30, 2025
dec562b
Update test_ascend_embedding_models.py
cherryblo Nov 30, 2025
9786923
Update gsm8k_ascend_mixin.py
cherryblo Nov 30, 2025
1cdc46a
Update test_ascend_glm4_9b_chat.py
cherryblo Nov 30, 2025
4cfcdd6
Update test_ascend_mistral_7b.py
cherryblo Nov 30, 2025
cabd233
Update test_ascend_cross_encoder_models.py
cherryblo Nov 30, 2025
f9322d5
Update test_ascend_reward_models.py
cherryblo Nov 30, 2025
30a3bd6
Update test_ascend_gemma_3_4b_it.py
cherryblo Nov 30, 2025
074db60
Update test_ascend_janus_pro_7b.py
cherryblo Nov 30, 2025
b6b0244
Update test_vlm_utils.py
cherryblo Nov 30, 2025
e7599a6
Update nightly-test-npu.yml
cherryblo Nov 30, 2025
0dd4b0a
Update nightly-test-npu.yml
cherryblo Dec 1, 2025
ea39095
Update test_ascend_embedding_models.py
cherryblo Dec 1, 2025
9400c13
Merge branch 'sgl-project:main' into main
cherryblo Dec 1, 2025
2187914
Update npu_ci_install_dependency.sh
cherryblo Dec 1, 2025
28e0916
Changes
cherryblo Dec 1, 2025
40868ac
Changes
cherryblo Dec 1, 2025
61bb6d2
Changes
cherryblo Dec 2, 2025
260392f
Changes
cherryblo Dec 2, 2025
4da39b5
Changes
cherryblo Dec 2, 2025
fd47c65
Changes
cherryblo Dec 2, 2025
c7fe336
Changes
cherryblo Dec 2, 2025
7413625
Merge branch 'sgl-project:main' into main
cherryblo Dec 2, 2025
380e519
Update npu_ci_install_dependency.sh
cherryblo Dec 3, 2025
7c81f38
Update npu_log_print.sh
cherryblo Dec 3, 2025
66b54bc
Changes
cherryblo Dec 3, 2025
49f28ff
Merge remote-tracking branch 'origin/main' into main
cherryblo Dec 3, 2025
5be47e8
Changes
cherryblo Dec 3, 2025
871e933
Merge branch 'sgl-project:main' into main
cherryblo Dec 3, 2025
931211a
Merge branch 'main' into main
iforgetmyname Dec 4, 2025
d97b8aa
Merge branch 'main' into main
iforgetmyname Dec 4, 2025
da466c0
revert todevice
iforgetmyname Dec 4, 2025
4c6ef1c
second revert back
iforgetmyname Dec 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 186 additions & 0 deletions .github/workflows/nightly-test-npu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
name: Nightly Test (NPU)

on:
schedule:
- cron: '0 17 * * *' # Execute at 1:00 a.m. Beijing Time every day
pull_request:
branches:
- main
paths:
- ".github/workflows/nightly-test-npu.yml"
workflow_dispatch:

concurrency:
group: nightly-test-npu-${{ github.ref }}
cancel-in-progress: true

jobs:
nightly-1-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-2
strategy:
fail-fast: false
matrix:
part: [0, 1]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"

bash scripts/ci/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

- name: Print Log Information
run: |
bash scripts/ci/npu_log_print.sh
- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
pip install sentence_transformers accelerate
cd test
python3 run_suite.py --hw npu --suite nightly-1-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

nightly-2-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-2
strategy:
fail-fast: false
matrix:
part: [0]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"

bash scripts/ci/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

- name: Print Log Information
run: |
bash scripts/ci/npu_log_print.sh
- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
pip install sentence_transformers accelerate
cd test
python3 run_suite.py --hw npu --suite nightly-2-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1

nightly-4-npu-a3:
if: ${{ (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') }}
runs-on: linux-aarch64-a3-4
strategy:
fail-fast: false
matrix:
part: [0]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple"
pip config set global.trusted-host "${CACHING_URL} pypi.tuna.tsinghua.edu.cn"

bash scripts/ci/npu_ci_install_dependency.sh a3
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
# copy download through proxy
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl

- name: Print Log Information
run: |
bash scripts/ci/npu_log_print.sh

- name: Run test
timeout-minutes: 240
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
HF_ENDPOINT: https://hf-mirror.com
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
STREAMS_PER_DEVICE: 32
run: |
export PATH="/usr/local/Ascend/8.3.RC1/compiler/bishengir/bin:${PATH}"
hf download lmms-lab/MMMU --repo-type dataset
pip install sentence_transformers torchaudio==2.8.0 torch_npu==2.8.0
pip install protobuf==6.31.1 zss pre-commit wandb>=0.16.0 tenacity==8.3.0 loguru openpyxl latex2sympy2 zstandard transformers-stream-generator tqdm-multiprocess pycocoevalcap
pip install yt-dlp sentencepiece==0.1.99 nltk av ftfy sqlitedict==2.1.0 sacrebleu>=1.5.0 pytablewriter peft==0.2.0 black==24.1.0 isort==5.13.2 peft>=0.2.0 accelerate>=0.29.1
pip install jsonlines httpx==0.25.0 evaluate>=0.4.0 datasets==2.16.1 numexpr xgrammar==0.1.25 numpy==1.26.4 dotenv
git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
cd ./lmms-eval
nohup pip install . > lmmslog.txt 2>&1 &
sleep 120
export PYTHONPATH=$PYTHONPATH:$(pwd)
cd ../
cd test
python3 run_suite.py --hw npu --suite nightly-4-npu-a3 --nightly --continue-on-error --timeout-per-file 3600 --auto-partition-id ${{ matrix.part }} --auto-partition-size 1

check-all-jobs:
if: github.repository == 'sgl-project/sglang' && always()
needs:
- nightly-1-npu-a3
- nightly-4-npu-a3
runs-on: ubuntu-latest
container:
image: docker.m.daocloud.io/ubuntu:22.04
steps:
- name: Check if any job failed
run: |
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
echo "One or more nightly test jobs failed"
exit 1
fi
if [[ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "One or more nightly test jobs were cancelled"
exit 1
fi
echo "All nightly test jobs passed"
13 changes: 13 additions & 0 deletions python/sglang/test/ci/ci_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"register_cpu_ci",
"register_cuda_ci",
"register_amd_ci",
"register_npu_ci",
"ut_parse_one_file",
]

Expand All @@ -22,6 +23,7 @@ class HWBackend(Enum):
CPU = auto()
CUDA = auto()
AMD = auto()
NPU = auto()


@dataclass
Expand Down Expand Up @@ -58,10 +60,21 @@ def register_amd_ci(
return None


def register_npu_ci(
est_time: float,
suite: str,
nightly: bool = False,
disabled: Optional[str] = None,
):
"""Marker for NPU CI registration (parsed via AST; runtime no-op)."""
return None


REGISTER_MAPPING = {
"register_cpu_ci": HWBackend.CPU,
"register_cuda_ci": HWBackend.CUDA,
"register_amd_ci": HWBackend.AMD,
"register_npu_ci": HWBackend.NPU,
}


Expand Down
9 changes: 8 additions & 1 deletion python/sglang/test/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,15 @@
)

from sglang.srt.entrypoints.engine import Engine
from sglang.srt.utils import load_image
from sglang.srt.utils import is_npu, load_image
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER, calculate_rouge_l

if is_npu():
from sglang.srt.hardware_backend.npu.utils import init_npu_backend

init_npu_backend()

DEFAULT_PROMPTS = [
"Apple is red. Banana is Yellow. " * 800 + "Apple is",
"The capital of the United Kingdom is",
Expand Down Expand Up @@ -72,6 +77,8 @@ def get_dtype_str(torch_dtype):
return "float16"
if torch_dtype is torch.float32:
return "float32"
if torch_dtype is torch.bfloat16:
return "bfloat16"
else:
raise NotImplementedError()

Expand Down
26 changes: 26 additions & 0 deletions scripts/ci/npu_log_print.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
set -euo pipefail

# Print log information(sglang version, commit sha, sgl-kernel-npu version, sgl-kernel-npu commit sha, npu-smi info and pip list.
npu-smi info
pip list
get_version() {
[ -f "$1" ] && python3 -c 'import re, sys; print(sys.argv[2] + " version: v" + re.search(r"__version__\s*=\s*[\"'"'"'](.*?)[\"'"'"']", open(sys.argv[1]).read()).group(1))' "$1" "$2" 2>/dev/null || echo "$2 version: unknown"
}
get_version "./python/sglang/version.py" "sglang"
get_version "./sgl-kernel/python/sgl_kernel/version.py" "sgl_kernel"
SGLANG_URL="https://github.com/sgl-project/sglang.git"
SGL_KERNEL_URL="https://github.com/sgl-project/sgl-kernel-npu.git"
SGLANG_BRANCH="main"
SGL_KERNEL_BRANCH="main"
get_sha() {
local name="$1"
local url="$2"
local branch="$3"
local sha
sha=$(git ls-remote "$url" "refs/heads/$branch" | cut -f1)
echo "$name SHA for branch $branch: ${sha:-"Not Found"}"
}
get_sha "sglang" "$SGLANG_URL" "$SGLANG_BRANCH"
get_sha "sgl-kernel" "$SGL_KERNEL_URL" "$SGL_KERNEL_BRANCH"
chmod +x scripts/ci/npu_log_print.sh
108 changes: 108 additions & 0 deletions test/nightly/ascend/embedding_models/test_ascend_embedding_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import multiprocessing as mp
import unittest
from typing import Optional

import torch
from transformers import AutoConfig, AutoTokenizer

from sglang.test.ci.ci_register import register_npu_ci
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
from sglang.test.test_utils import CustomTestCase, get_similarities

register_npu_ci(
est_time=400,
suite="nightly-1-npu-a3",
nightly=True,
disabled="embeddings are not all close",
)


MODELS = [
("/root/.cache/modelscope/hub/models/iic/gte_Qwen2-1.5B-instruct", 1, 1e-5),
("/root/.cache/modelscope/hub/models/Qwen/Qwen3-Embedding-8B", 1, 1e-5),
]
TORCH_DTYPES = [torch.bfloat16]


class TestEmbeddingModels(CustomTestCase):

@classmethod
def setUpClass(cls):
mp.set_start_method("spawn", force=True)

def _truncate_prompts(self, prompts, model_path):
config = AutoConfig.from_pretrained(model_path)
max_length = getattr(config, "max_position_embeddings", 2048)

tokenizer = AutoTokenizer.from_pretrained(model_path)

truncated_prompts = []
for prompt in prompts:
tokens = tokenizer(prompt, return_tensors="pt", truncation=False)
if len(tokens.input_ids[0]) > max_length:
truncated_text = tokenizer.decode(
tokens.input_ids[0][: max_length - 1], skip_special_tokens=True
)
truncated_prompts.append(truncated_text)
else:
truncated_prompts.append(prompt)
return truncated_prompts

def assert_close_prefill_logits(
self,
prompts,
model_path,
tp_size,
torch_dtype,
prefill_tolerance,
matryoshka_dim: Optional[int] = None,
) -> None:
truncated_prompts = self._truncate_prompts(prompts, model_path)

with HFRunner(
model_path,
torch_dtype=torch_dtype,
model_type="embedding",
matryoshka_dim=matryoshka_dim,
) as hf_runner:
hf_outputs = hf_runner.forward(truncated_prompts)

attention_backend = "ascend"
with SRTRunner(
model_path,
tp_size=tp_size,
torch_dtype=torch_dtype,
model_type="embedding",
attention_backend=attention_backend,
json_model_override_args=(
{"matryoshka_dimensions": [matryoshka_dim]} if matryoshka_dim else None
),
) as srt_runner:
srt_outputs = srt_runner.forward(
truncated_prompts, dimensions=matryoshka_dim
)

for i in range(len(prompts)):
hf_logits = torch.Tensor(hf_outputs.embed_logits[i])
srt_logits = torch.Tensor(srt_outputs.embed_logits[i])

similarity = torch.tensor(get_similarities(hf_logits, srt_logits))
print("similarity diff", abs(similarity - 1))

if len(prompts[i]) <= 1000:
assert torch.all(
abs(similarity - 1) < prefill_tolerance
), "embeddings are not all close"

def test_prefill_logits(self):
models_to_test = MODELS

for model, tp_size, prefill_tolerance in models_to_test:
for torch_dtype in TORCH_DTYPES:
self.assert_close_prefill_logits(
DEFAULT_PROMPTS, model, tp_size, torch_dtype, prefill_tolerance
)


if __name__ == "__main__":
unittest.main()
Loading
Loading