Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,46 @@ jobs:
# temporarily put backend-independent cpu tests here
python3 run_suite.py --hw cpu --suite default

stage-b-test-small-1-gpu:
needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-small-1-gpu') ||
(
!inputs.target_stage &&
(github.event_name == 'schedule' || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-runner
env:
RUNNER_LABELS: 1-gpu-runner
strategy:
fail-fast: false
matrix:
partition: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4

multimodal-gen-test-1-gpu:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
Expand Down Expand Up @@ -1251,6 +1291,7 @@ jobs:
multimodal-gen-test-2-gpu,

stage-a-test-1,
stage-b-test-small-1-gpu,
quantization-test,
unit-test-backend-1-gpu,
unit-test-backend-2-gpu,
Expand Down
1 change: 1 addition & 0 deletions scripts/ci/slash_command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def handle_rerun_stage(
# Valid NVIDIA stage names that support target_stage
nvidia_stages = [
"stage-a-test-1",
"stage-b-test-small-1-gpu",
"multimodal-gen-test-1-gpu",
"multimodal-gen-test-2-gpu",
"quantization-test",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from sglang.srt.environ import envs
from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.kits.json_constrained_kit import TestJSONConstrainedMixin
from sglang.test.kits.regex_constrained_kit import TestRegexConstrainedMixin
from sglang.test.test_utils import (
Expand All @@ -13,6 +14,8 @@
popen_launch_server,
)

register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu")


class TestEagleConstrainedDecoding(
CustomTestCase, TestRegexConstrainedMixin, TestJSONConstrainedMixin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sglang as sgl
from sglang.srt.utils import kill_process_tree
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.test_utils import (
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
Expand All @@ -20,6 +21,8 @@
popen_launch_server,
)

register_cuda_ci(est_time=470, suite="stage-b-test-small-1-gpu")

torch_dtype = torch.float16
prefill_tolerance = 5e-2
decode_tolerance: float = 5e-2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import requests

from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import (
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
Expand All @@ -23,6 +24,8 @@
run_logprob_check,
)

register_cuda_ci(est_time=473, suite="stage-b-test-small-1-gpu")


class TestEAGLEServer(CustomTestCase):
PROMPTS = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from sglang.srt.environ import envs
from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.kits.matched_stop_kit import MatchedStopMixin
from sglang.test.kits.radix_cache_server_kit import run_radix_attention_test
Expand All @@ -15,6 +16,8 @@
popen_launch_server,
)

register_cuda_ci(est_time=194, suite="stage-b-test-small-1-gpu")


class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
max_running_requests = 64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
build_tree_kernel_efficient,
organize_draft_results,
)
from sglang.test.ci.ci_register import register_cuda_ci

register_cuda_ci(est_time=3, suite="stage-b-test-small-1-gpu")


class TestBuildEagleTree(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
PER_COMMIT_SUITES = {
HWBackend.CPU: ["default"],
HWBackend.AMD: ["stage-a-test-1"],
HWBackend.CUDA: ["stage-a-test-1"],
HWBackend.CUDA: ["stage-a-test-1", "stage-b-test-small-1-gpu"],
HWBackend.NPU: [],
}

Expand Down
5 changes: 0 additions & 5 deletions test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
# NOTE: please sort the test cases alphabetically by the test file name
suites = {
"per-commit-1-gpu": [
TestFile("test_eagle_constrained_decoding.py", 100),
TestFile("debug_utils/test_tensor_dump_forward_hook.py", 9),
TestFile("hicache/test_hicache_storage.py", 96),
TestFile("hicache/test_hicache_variants.py", 368),
Expand Down Expand Up @@ -58,13 +57,9 @@
# TestFile("rl/test_update_weights_from_disk.py", 210), # Temporarily disabled, see https://github.com/sgl-project/sglang/pull/13998
TestFile("rl/test_update_weights_from_tensor.py", 195),
TestFile("test_abort.py", 131),
TestFile("test_build_eagle_tree.py", 3),
TestFile("test_chunked_prefill.py", 312),
TestFile("test_create_kvindices.py", 7),
TestFile("test_deterministic.py", 228),
TestFile("test_eagle_infer_a.py", 470),
TestFile("test_eagle_infer_b.py", 473),
TestFile("test_eagle_infer_beta.py", 194),
TestFile("test_constrained_decoding.py", 111),
TestFile("test_eval_fp8_accuracy.py", 250),
TestFile("test_external_models.py", 30),
Expand Down
Loading