[diffusion] CI: send nightly-test outputs of diffusion to slack for correctness monitoring (#13833)

yhyang201 · mickqian · web-flow · commit e5c0f591333e · 2025-11-24T15:22:26.000+08:00
Co-authored-by: Mick &lt;mickjagger19@icloud.com&gt;
diff --git a/.github/workflows/nightly-test-nvidia.yml b/.github/workflows/nightly-test-nvidia.yml
@@ -193,6 +193,68 @@ jobs:
         run: |
           python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_vlms
 
+  # diffusion performance tests
+  nightly-test-multimodal-server-1-gpu:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 1-gpu-runner
+    strategy:
+      fail-fast: false
+      max-parallel: 5
+      matrix:
+        part: [0, 1]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh diffusion
+          pip install slack_sdk
+
+      - name: Run diffusion server tests
+        env:
+          SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+
+        timeout-minutes: 60
+        run: |
+          cd python
+          python3 sglang/multimodal_gen/test/run_suite.py \
+            --suite 1-gpu \
+            --partition-id ${{ matrix.part }} \
+            --total-partitions 2
+
+
+  nightly-test-multimodal-server-2-gpu:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: 2-gpu-runner
+    strategy:
+      fail-fast: false
+      max-parallel: 5
+      matrix:
+        part: [0, 1]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_dependency.sh diffusion
+          pip install slack_sdk
+
+      - name: Run diffusion server tests
+        env:
+          SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+
+        timeout-minutes: 60
+        run: |
+          cd python
+          python3 sglang/multimodal_gen/test/run_suite.py \
+            --suite 2-gpu \
+            --partition-id ${{ matrix.part }} \
+            --total-partitions 2
+
   # B200 Performance tests - 4 GPU
   nightly-test-perf-4-gpu-b200:
     if: github.repository == 'sgl-project/sglang'
@@ -275,6 +337,8 @@ jobs:
       - nightly-test-text-perf-2-gpu-runner
       - nightly-test-vlm-accuracy-2-gpu-runner
       - nightly-test-vlm-perf-2-gpu-runner
+      - nightly-test-multimodal-server-1-gpu
+      - nightly-test-multimodal-server-2-gpu
       - nightly-test-perf-4-gpu-b200
       - nightly-test-perf-8-gpu-b200
     runs-on: ubuntu-latest
diff --git a/python/sglang/multimodal_gen/test/server/test_server_common.py b/python/sglang/multimodal_gen/test/server/test_server_common.py
@@ -7,6 +7,7 @@
 
 from __future__ import annotations
 
+import base64
 import os
 import time
 from pathlib import Path
@@ -32,6 +33,7 @@
     PerformanceSummary,
     ScenarioConfig,
 )
+from sglang.multimodal_gen.test.slack_utils import upload_file_to_slack
 from sglang.multimodal_gen.test.test_utils import (
     get_dynamic_server_port,
     read_perf_logs,
@@ -225,6 +227,19 @@ def _create_and_download_video(
             resp = client.videos.download_content(video_id=video_id)  # type: ignore[attr-defined]
             content = resp.read()
             validate_openai_video(content)
+
+            tmp_path = f"{video_id}.mp4"
+            with open(tmp_path, "wb") as f:
+                f.write(content)
+            upload_file_to_slack(
+                case_id=case.id,
+                model=case.model_path,
+                prompt=case.prompt,
+                file_path=tmp_path,
+                origin_file_path=case.image_path,
+            )
+            os.remove(tmp_path)
+
             return video_id
 
         # for all tests, seconds = case.seconds or fallback 4 seconds
@@ -248,6 +263,19 @@ def generate_image() -> str:
             )
             result = response.parse()
             validate_image(result.data[0].b64_json)
+
+            img_data = base64.b64decode(result.data[0].b64_json)
+            tmp_path = f"{result.created}.png"
+            with open(tmp_path, "wb") as f:
+                f.write(img_data)
+            upload_file_to_slack(
+                case_id=case.id,
+                model=case.model_path,
+                prompt=case.prompt,
+                file_path=tmp_path,
+            )
+            os.remove(tmp_path)
+
             return str(result.created)
 
         def generate_image_edit() -> str:
@@ -276,6 +304,20 @@ def generate_image_edit() -> str:
 
             result = response.parse()
             validate_image(result.data[0].b64_json)
+
+            img_data = base64.b64decode(result.data[0].b64_json)
+            tmp_path = f"{rid}.png"
+            with open(tmp_path, "wb") as f:
+                f.write(img_data)
+            upload_file_to_slack(
+                case_id=case.id,
+                model=case.model_path,
+                prompt=case.edit_prompt,
+                file_path=tmp_path,
+                origin_file_path=case.image_path,
+            )
+            os.remove(tmp_path)
+
             return rid
 
         # -------------------------
diff --git a/python/sglang/multimodal_gen/test/slack_utils.py b/python/sglang/multimodal_gen/test/slack_utils.py
@@ -0,0 +1,186 @@
+"""
+    This file upload the media generated in diffusion-nightly-test to a slack channel of SGLang
+"""
+
+import logging
+import os
+import tempfile
+from datetime import datetime
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+import inspect
+
+try:
+    import sglang.multimodal_gen.test.server.testcase_configs as configs
+    from sglang.multimodal_gen.test.server.testcase_configs import DiffusionTestCase
+
+    ALL_CASES = []
+    for name, value in inspect.getmembers(configs):
+        if name.endswith("_CASES") or "_CASES_" in name:
+            if (
+                isinstance(value, list)
+                and len(value) > 0
+                and isinstance(value[0], DiffusionTestCase)
+            ):
+                ALL_CASES.extend(value)
+            elif isinstance(value, list) and len(value) == 0:
+                # Assume empty list with matching name is a valid case list container
+                pass
+
+    # Deduplicate cases by ID
+    seen_ids = set()
+    unique_cases = []
+    for c in ALL_CASES:
+        if c.id not in seen_ids:
+            seen_ids.add(c.id)
+            unique_cases.append(c)
+    ALL_CASES = unique_cases
+
+except Exception as e:
+    logger.warning(f"Failed to import test cases: {e}")
+    ALL_CASES = []
+
+
+def _get_status_message(run_id, current_case_id, thread_messages=None):
+    date_str = datetime.now().strftime("%d/%m")
+    base_header = f"*🧵 for nightly test of {date_str}*\n*GitHub Run ID:* {run_id}\n*Total Tasks:* {len(ALL_CASES)}"
+
+    if not ALL_CASES:
+        return base_header
+
+    default_emoji_for_case_in_progress = "⏳"
+    status_map = {c.id: default_emoji_for_case_in_progress for c in ALL_CASES}
+
+    if thread_messages:
+        for msg in thread_messages:
+            text = msg.get("text", "")
+            # Look for case_id in the message (format: *Case ID:* `case_id`)
+            for c in ALL_CASES:
+                if f"*Case ID:* `{c.id}`" in text:
+                    status_map[c.id] = "✅"
+
+    if current_case_id:
+        status_map[current_case_id] = "✅"
+
+    lines = [base_header, "", "*Tasks Status:*"]
+
+    # Calculate padding
+    max_len = max(len(c.id) for c in ALL_CASES) if ALL_CASES else 10
+    max_len = max(max_len, len("Case ID"))
+
+    # Build markdown table inside a code block
+    table_lines = ["```"]
+    table_lines.append(f"| {'Case ID'.ljust(max_len)} | Status |")
+    table_lines.append(f"| {'-' * max_len} | :----: |")
+
+    for c in ALL_CASES:
+        mark = status_map.get(c.id, default_emoji_for_case_in_progress)
+        table_lines.append(f"| {c.id.ljust(max_len)} |   {mark}   |")
+
+    table_lines.append("```")
+
+    lines.extend(table_lines)
+
+    return "\n".join(lines)
+
+
+def upload_file_to_slack(
+    case_id: str = None,
+    model: str = None,
+    prompt: str = None,
+    file_path: str = None,
+    origin_file_path: str = None,
+) -> bool:
+    temp_path = None
+    try:
+        from slack_sdk import WebClient
+
+        run_id = os.getenv("GITHUB_RUN_ID", "local")
+
+        token = os.environ.get("SGLANG_DIFFUSION_SLACK_TOKEN")
+        if not token:
+            logger.info(f"Slack upload failed: no token")
+            return False
+
+        if not file_path or not os.path.exists(file_path):
+            logger.info(f"Slack upload failed: no file path")
+            return False
+
+        if origin_file_path and origin_file_path.startswith(("http", "https")):
+            suffix = os.path.splitext(urlparse(origin_file_path).path)[1] or ".tmp"
+            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tf:
+                with urlopen(origin_file_path) as response:
+                    tf.write(response.read())
+                temp_path = tf.name
+                origin_file_path = temp_path
+
+        uploads = [{"file": file_path, "title": "Generated Image"}]
+        if origin_file_path and os.path.exists(origin_file_path):
+            uploads.insert(0, {"file": origin_file_path, "title": "Original Image"})
+
+        message = (
+            f"*Case ID:* `{case_id}`\n" f"*Model:* `{model}`\n" f"*Prompt:* {prompt}"
+        )
+
+        client = WebClient(token=token)
+        channel_id = "C0A02NDF7UY"
+        thread_ts = None
+
+        parent_msg_text = None
+        try:
+            history = client.conversations_history(channel=channel_id, limit=100)
+            for msg in history.get("messages", []):
+                if f"*GitHub Run ID:* {run_id}" in msg.get("text", ""):
+                    # Use thread_ts if it exists (msg is a reply), otherwise use ts (msg is a parent)
+                    thread_ts = msg.get("thread_ts") or msg.get("ts")
+                    parent_msg_text = msg.get("text", "")
+                    logger.info(f"Found thread_ts: {thread_ts}")
+                    break
+        except Exception as e:
+            logger.warning(f"Failed to search slack history: {e}")
+
+        if not thread_ts:
+            try:
+                text = _get_status_message(run_id, case_id)
+                response = client.chat_postMessage(channel=channel_id, text=text)
+                thread_ts = response["ts"]
+            except Exception as e:
+                logger.warning(f"Failed to create parent thread: {e}")
+
+        # Upload first to ensure it's in history
+        client.files_upload_v2(
+            channel=channel_id,
+            file_uploads=uploads,
+            initial_comment=message,
+            thread_ts=thread_ts,
+        )
+
+        # Then update status based on thread replies
+        if thread_ts:
+            try:
+                replies = client.conversations_replies(
+                    channel=channel_id, ts=thread_ts, limit=200
+                )
+                messages = replies.get("messages", [])
+                new_text = _get_status_message(run_id, case_id, messages)
+
+                # Only update if changed significantly (ignoring timestamp diffs if any)
+                # But here we just check text content
+                if new_text != parent_msg_text:
+                    client.chat_update(channel=channel_id, ts=thread_ts, text=new_text)
+            except Exception as e:
+                logger.warning(f"Failed to update parent message: {e}")
+
+        logger.info(f"File uploaded successfully: {os.path.basename(file_path)}")
+        return True
+
+    except Exception as e:
+        logger.info(f"Slack upload failed: {e}")
+        return False
+    finally:
+        if temp_path and os.path.exists(temp_path):
+            os.remove(temp_path)