diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 706ef434f812..d7bb176d2343 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -382,6 +382,46 @@ jobs: # temporarily put backend-independent cpu tests here python3 run_suite.py --hw cpu --suite default + stage-b-test-small-1-gpu: + needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels] + if: | + always() && + ( + (inputs.target_stage == 'stage-b-test-small-1-gpu') || + ( + !inputs.target_stage && + (github.event_name == 'schedule' || (!failure() && !cancelled())) && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + ) + ) + runs-on: 1-gpu-runner + env: + RUNNER_LABELS: 1-gpu-runner + strategy: + fail-fast: false + matrix: + partition: [0, 1, 2, 3] + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Download artifacts + if: needs.check-changes.outputs.sgl_kernel == 'true' + uses: actions/download-artifact@v4 + with: + path: sgl-kernel/dist/ + merge-multiple: true + pattern: wheel-python3.10-cuda12.9 + + - name: Install dependencies + run: | + CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 30 + run: | + cd test/ + python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 multimodal-gen-test-1-gpu: needs: [check-changes, call-gate, sgl-kernel-build-wheels] @@ -1251,6 +1291,7 @@ jobs: multimodal-gen-test-2-gpu, stage-a-test-1, + stage-b-test-small-1-gpu, quantization-test, unit-test-backend-1-gpu, unit-test-backend-2-gpu, diff --git a/scripts/ci/slash_command_handler.py b/scripts/ci/slash_command_handler.py index 0c6e0bcd5206..ab38a2b4a039 100644 --- a/scripts/ci/slash_command_handler.py +++ b/scripts/ci/slash_command_handler.py @@ -143,6 +143,7 @@ def handle_rerun_stage( # Valid NVIDIA stage names that support target_stage nvidia_stages = [ "stage-a-test-1", + "stage-b-test-small-1-gpu", "multimodal-gen-test-1-gpu", "multimodal-gen-test-2-gpu", "quantization-test", diff --git a/test/srt/test_eagle_constrained_decoding.py b/test/registered/spec/eagle/test_eagle_constrained_decoding.py similarity index 94% rename from test/srt/test_eagle_constrained_decoding.py rename to test/registered/spec/eagle/test_eagle_constrained_decoding.py index f172618c708e..058edf46a280 100644 --- a/test/srt/test_eagle_constrained_decoding.py +++ b/test/registered/spec/eagle/test_eagle_constrained_decoding.py @@ -2,6 +2,7 @@ from sglang.srt.environ import envs from sglang.srt.utils import kill_process_tree +from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.kits.json_constrained_kit import TestJSONConstrainedMixin from sglang.test.kits.regex_constrained_kit import TestRegexConstrainedMixin from sglang.test.test_utils import ( @@ -13,6 +14,8 @@ popen_launch_server, ) +register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu") + class TestEagleConstrainedDecoding( CustomTestCase, TestRegexConstrainedMixin, TestJSONConstrainedMixin diff --git a/test/srt/test_eagle_infer_a.py b/test/registered/spec/eagle/test_eagle_infer_a.py similarity index 99% rename from test/srt/test_eagle_infer_a.py rename to test/registered/spec/eagle/test_eagle_infer_a.py index 31efd87e2aa9..0abaa1205c4e 100644 --- a/test/srt/test_eagle_infer_a.py +++ b/test/registered/spec/eagle/test_eagle_infer_a.py @@ -7,6 +7,7 @@ import sglang as sgl from sglang.srt.utils import kill_process_tree from sglang.srt.utils.hf_transformers_utils import get_tokenizer +from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import ( DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, @@ -20,6 +21,8 @@ popen_launch_server, ) +register_cuda_ci(est_time=470, suite="stage-b-test-small-1-gpu") + torch_dtype = torch.float16 prefill_tolerance = 5e-2 decode_tolerance: float = 5e-2 diff --git a/test/srt/test_eagle_infer_b.py b/test/registered/spec/eagle/test_eagle_infer_b.py similarity index 99% rename from test/srt/test_eagle_infer_b.py rename to test/registered/spec/eagle/test_eagle_infer_b.py index 5b9df163005a..a4909e3ef86a 100644 --- a/test/srt/test_eagle_infer_b.py +++ b/test/registered/spec/eagle/test_eagle_infer_b.py @@ -12,6 +12,7 @@ import requests from sglang.srt.utils import kill_process_tree +from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.few_shot_gsm8k import run_eval from sglang.test.test_utils import ( DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, @@ -23,6 +24,8 @@ run_logprob_check, ) +register_cuda_ci(est_time=473, suite="stage-b-test-small-1-gpu") + class TestEAGLEServer(CustomTestCase): PROMPTS = [ diff --git a/test/srt/test_eagle_infer_beta.py b/test/registered/spec/eagle/test_eagle_infer_beta.py similarity index 96% rename from test/srt/test_eagle_infer_beta.py rename to test/registered/spec/eagle/test_eagle_infer_beta.py index 8cf9936e5930..f79c0de6ad37 100644 --- a/test/srt/test_eagle_infer_beta.py +++ b/test/registered/spec/eagle/test_eagle_infer_beta.py @@ -3,6 +3,7 @@ from sglang.srt.environ import envs from sglang.srt.utils import kill_process_tree +from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.few_shot_gsm8k import run_eval from sglang.test.kits.matched_stop_kit import MatchedStopMixin from sglang.test.kits.radix_cache_server_kit import run_radix_attention_test @@ -15,6 +16,8 @@ popen_launch_server, ) +register_cuda_ci(est_time=194, suite="stage-b-test-small-1-gpu") + class TestEagleServerBase(CustomTestCase, MatchedStopMixin): max_running_requests = 64 diff --git a/test/srt/test_build_eagle_tree.py b/test/registered/spec/utils/test_build_eagle_tree.py similarity index 98% rename from test/srt/test_build_eagle_tree.py rename to test/registered/spec/utils/test_build_eagle_tree.py index 5372393da6db..3f5c71125686 100644 --- a/test/srt/test_build_eagle_tree.py +++ b/test/registered/spec/utils/test_build_eagle_tree.py @@ -6,6 +6,9 @@ build_tree_kernel_efficient, organize_draft_results, ) +from sglang.test.ci.ci_register import register_cuda_ci + +register_cuda_ci(est_time=3, suite="stage-b-test-small-1-gpu") class TestBuildEagleTree(unittest.TestCase): diff --git a/test/run_suite.py b/test/run_suite.py index 461a30aa0736..cef44e605c37 100644 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -17,7 +17,7 @@ PER_COMMIT_SUITES = { HWBackend.CPU: ["default"], HWBackend.AMD: ["stage-a-test-1"], - HWBackend.CUDA: ["stage-a-test-1"], + HWBackend.CUDA: ["stage-a-test-1", "stage-b-test-small-1-gpu"], HWBackend.NPU: [], } diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 314ae338415d..d7d4026fbd81 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -7,7 +7,6 @@ # NOTE: please sort the test cases alphabetically by the test file name suites = { "per-commit-1-gpu": [ - TestFile("test_eagle_constrained_decoding.py", 100), TestFile("debug_utils/test_tensor_dump_forward_hook.py", 9), TestFile("hicache/test_hicache_storage.py", 96), TestFile("hicache/test_hicache_variants.py", 368), @@ -58,13 +57,9 @@ # TestFile("rl/test_update_weights_from_disk.py", 210), # Temporarily disabled, see https://github.com/sgl-project/sglang/pull/13998 TestFile("rl/test_update_weights_from_tensor.py", 195), TestFile("test_abort.py", 131), - TestFile("test_build_eagle_tree.py", 3), TestFile("test_chunked_prefill.py", 312), TestFile("test_create_kvindices.py", 7), TestFile("test_deterministic.py", 228), - TestFile("test_eagle_infer_a.py", 470), - TestFile("test_eagle_infer_b.py", 473), - TestFile("test_eagle_infer_beta.py", 194), TestFile("test_constrained_decoding.py", 111), TestFile("test_eval_fp8_accuracy.py", 250), TestFile("test_external_models.py", 30),