sgl-project · Fridge003 · Nov 8, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025
@@ -78,7 +78,7 @@ jobs:
         timeout-minutes: 240
         run: |
           cd test/srt
-          python3 test_nightly_vlms_mmmu_eval.py
+          python3 test_nightly_vlms_mmmu_eval.py || python3 test_nightly_vlms_mmmu_eval.py || python3 test_nightly_vlms_mmmu_eval.py
 
   nightly-test-perf-vlms:
     if: github.repository == 'sgl-project/sglang'
@@ -125,7 +125,7 @@ jobs:
         timeout-minutes: 60
         run: |
           cd test/srt
-          python3 run_suite.py --suite nightly-1-gpu
+          python3 run_suite.py --suite nightly-1-gpu --continue-on-error
 
   nightly-test-4-gpu:
     if: github.repository == 'sgl-project/sglang'
@@ -143,7 +143,7 @@ jobs:
         timeout-minutes: 30
         run: |
           cd test/srt
-          python3 run_suite.py --suite nightly-4-gpu
+          python3 run_suite.py --suite nightly-4-gpu --continue-on-error
 
   nightly-test-8-gpu-h200:
     if: github.repository == 'sgl-project/sglang'
@@ -161,7 +161,7 @@ jobs:
         timeout-minutes: 30
         run: |
           cd test/srt
-          python3 run_suite.py --suite nightly-8-gpu-h200
+          python3 run_suite.py --suite nightly-8-gpu-h200 --continue-on-error
 
   nightly-test-8-gpu-h20:
     if: github.repository == 'sgl-project/sglang'
@@ -181,7 +181,7 @@ jobs:
         timeout-minutes: 30
         run: |
           cd test/srt
-          python3 run_suite.py --suite nightly-8-gpu-h20
+          python3 run_suite.py --suite nightly-8-gpu-h20 --continue-on-error
 
   nightly-test-4-gpu-b200:
     if: github.repository == 'sgl-project/sglang'
@@ -199,4 +199,4 @@ jobs:
         timeout-minutes: 60
         run: |
           cd test/srt
-          python3 run_suite.py --suite nightly-4-gpu-b200
+          python3 run_suite.py --suite nightly-4-gpu-b200 --continue-on-error
diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py
@@ -735,9 +735,22 @@ class TestFile:
     estimated_time: float = 60
 
 
-def run_unittest_files(files: List[TestFile], timeout_per_file: float):
+def run_unittest_files(
+    files: List[TestFile], timeout_per_file: float, continue_on_error: bool = False
+):
+    """
+    Run a list of test files.
+
+    Args:
+        files: List of TestFile objects to run
+        timeout_per_file: Timeout in seconds for each test file
+        continue_on_error: If True, continue running remaining tests even if one fails.
+                          If False, stop at first failure (default behavior for PR tests).
+    """
     tic = time.perf_counter()
     success = True
+    passed_tests = []
+    failed_tests = []
 
     for i, file in enumerate(files):
         filename, estimated_time = file.name, file.estimated_time
@@ -769,24 +782,52 @@ def run_one_file(filename):
             ret_code = run_with_timeout(
                 run_one_file, args=(filename,), timeout=timeout_per_file
             )
-            assert (
-                ret_code == 0
-            ), f"expected return code 0, but {filename} returned {ret_code}"
+            if ret_code != 0:
+                print(
+                    f"\n✗ FAILED: {filename} returned exit code {ret_code}\n",
+                    flush=True,
+                )
+                success = False
+                failed_tests.append((filename, f"exit code {ret_code}"))
+                if not continue_on_error:
+                    # Stop at first failure for PR tests
+                    break
+                # Otherwise continue to next test for nightly tests
+            else:
+                passed_tests.append(filename)
         except TimeoutError:
             kill_process_tree(process.pid)
             time.sleep(5)
             print(
-                f"\nTimeout after {timeout_per_file} seconds when running {filename}\n",
+                f"\n✗ TIMEOUT: {filename} after {timeout_per_file} seconds\n",
                 flush=True,
             )
             success = False
-            break
+            failed_tests.append((filename, f"timeout after {timeout_per_file}s"))
+            if not continue_on_error:
+                # Stop at first timeout for PR tests
+                break
+            # Otherwise continue to next test for nightly tests
 
     if success:
         print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
     else:
         print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
 
+    # Print summary
+    print(f"\n{'='*60}", flush=True)
+    print(f"Test Summary: {len(passed_tests)}/{len(files)} passed", flush=True)
+    print(f"{'='*60}", flush=True)
+    if passed_tests:
+        print("✓ PASSED:", flush=True)
+        for test in passed_tests:
+            print(f"  {test}", flush=True)
+    if failed_tests:
+        print("\n✗ FAILED:", flush=True)
+        for test, reason in failed_tests:
+            print(f"  {test} ({reason})", flush=True)
+    print(f"{'='*60}\n", flush=True)
+
     return 0 if success else -1
 
 

diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py
@@ -645,6 +645,12 @@ def _sanity_check_suites(suites):
         type=int,
         help="Use auto load balancing. The number of parts.",
     )
+    arg_parser.add_argument(
+        "--continue-on-error",
+        action="store_true",
+        default=False,
+        help="Continue running remaining tests even if one fails (useful for nightly tests)",
+    )
     args = arg_parser.parse_args()
     print(f"{args=}")
 
@@ -662,5 +668,5 @@ def _sanity_check_suites(suites):
 
     print("The running tests are ", [f.name for f in files])
 
-    exit_code = run_unittest_files(files, args.timeout_per_file)
+    exit_code = run_unittest_files(files, args.timeout_per_file, args.continue_on_error)
     exit(exit_code)