Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Configuration for the GitHub Labeler action
# Automatically adds labels to PRs based on the files changed

# Router specific (Rust code in sgl-router)
# Router specific (Rust code in sgl-model-gateway)
model-gateway:
- changed-files:
- any-glob-to-any-file: 'sgl-router/**/*'
- any-glob-to-any-file: 'sgl-model-gateway/**/*'

# Kernel specific
sgl-kernel:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ jobs:

- name: Check proto files are in sync
run: |
if ! diff -q python/sglang/srt/grpc/sglang_scheduler.proto sgl-router/src/proto/sglang_scheduler.proto; then
if ! diff -q python/sglang/srt/grpc/sglang_scheduler.proto sgl-model-gateway/src/proto/sglang_scheduler.proto; then
echo "❌ ERROR: Proto files are out of sync!"
echo ""
echo "The following files must be kept identical:"
echo " - python/sglang/srt/grpc/sglang_scheduler.proto"
echo " - sgl-router/src/proto/sglang_scheduler.proto"
echo " - sgl-model-gateway/src/proto/sglang_scheduler.proto"
echo ""
echo "Please ensure both files have the same content."
echo ""
echo "Differences:"
diff python/sglang/srt/grpc/sglang_scheduler.proto sgl-router/src/proto/sglang_scheduler.proto || true
diff python/sglang/srt/grpc/sglang_scheduler.proto sgl-model-gateway/src/proto/sglang_scheduler.proto || true
exit 1
fi
8 changes: 4 additions & 4 deletions .github/workflows/nightly-release-gateway.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ jobs:
with:
path: sglang-repo

- name: Move sgl-router folder to root and delete sglang-repo
- name: Move sgl-model-gateway folder to root and delete sglang-repo
run: |
mv sglang-repo/sgl-router/* .
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
ls -alt
shell: bash
Expand Down Expand Up @@ -138,9 +138,9 @@ jobs:
with:
path: sglang-repo

- name: Move sgl-router folder to root and delete sglang-repo
- name: Move sgl-model-gateway folder to root and delete sglang-repo
run: |
mv sglang-repo/sgl-router/* .
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
ls -alt

Expand Down
26 changes: 13 additions & 13 deletions .github/workflows/pr-benchmark-rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ on:
push:
branches: [ main ]
paths:
- "sgl-router/**"
- "sgl-model-gateway/**"
pull_request:
branches: [ main ]
paths:
- "sgl-router/**"
- "sgl-model-gateway/**"
workflow_dispatch:

concurrency:
Expand Down Expand Up @@ -45,7 +45,7 @@ jobs:
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
workspaces: sgl-model-gateway
# Share cache across all benchmark jobs
shared-key: "rust-cache"
# Save cache even on failure
Expand All @@ -56,7 +56,7 @@ jobs:
- name: Check benchmarks compile
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
cargo check --benches

- name: Show sccache stats
Expand Down Expand Up @@ -92,7 +92,7 @@ jobs:
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
workspaces: sgl-model-gateway
# Share cache across all benchmark jobs
shared-key: "rust-cache"
cache-all-crates: true
Expand All @@ -104,7 +104,7 @@ jobs:
timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
# Try to use sccache, but disable if it fails
if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
Expand All @@ -130,7 +130,7 @@ jobs:
with:
name: request-processing-results-${{ github.sha }}
path: |
sgl-router/target/criterion/benchmark_summary/
sgl-model-gateway/target/criterion/benchmark_summary/
retention-days: 30

- name: Show sccache stats
Expand Down Expand Up @@ -164,7 +164,7 @@ jobs:
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
workspaces: sgl-model-gateway
# Share cache across all benchmark jobs
shared-key: "rust-cache"
cache-all-crates: true
Expand All @@ -176,7 +176,7 @@ jobs:
timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
# Try to use sccache, but disable if it fails
if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
Expand All @@ -201,7 +201,7 @@ jobs:
with:
name: tokenizer-results-${{ github.sha }}
path: |
sgl-router/target/criterion/tokenizer*/
sgl-model-gateway/target/criterion/tokenizer*/
retention-days: 30

benchmark-tool-parser:
Expand Down Expand Up @@ -231,7 +231,7 @@ jobs:
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
workspaces: sgl-model-gateway
# Share cache across all benchmark jobs
shared-key: "rust-cache"
cache-all-crates: true
Expand All @@ -243,7 +243,7 @@ jobs:
timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
# Try to use sccache, but disable if it fails
if command -v sccache &> /dev/null; then
echo "Testing sccache availability..."
Expand All @@ -268,7 +268,7 @@ jobs:
with:
name: tool-parser-results-${{ github.sha }}
path: |
sgl-router/target/criterion/tool_parser*/
sgl-model-gateway/target/criterion/tool_parser*/
retention-days: 30

- name: Show sccache stats
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/pr-test-pd-router.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ on:
paths:
- 'python/sglang/srt/disaggregation/**'
- 'scripts/ci/ci_start_disaggregation_servers.sh'
- 'sgl-router/**'
- 'sgl-model-gateway/**'
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened, labeled]
paths:
- 'python/sglang/srt/disaggregation/**'
- 'scripts/ci/ci_start_disaggregation_servers.sh'
- 'sgl-router/**'
- 'sgl-model-gateway/**'
workflow_dispatch:

concurrency:
Expand Down Expand Up @@ -58,8 +58,8 @@ jobs:
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
sgl-router/target/
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
sgl-model-gateway/target/
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-model-gateway/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-

Expand Down Expand Up @@ -147,11 +147,11 @@ jobs:
python3 -m pip --no-cache-dir install mooncake-transfer-engine==0.3.7.post2
python3 -m pip --no-cache-dir install --user --force-reinstall genai-bench==0.0.2

- name: Build and install sgl-router
- name: Build and install sgl-model-gateway
run: |
source "$HOME/.cargo/env"
echo "Building sgl-router..."
cd sgl-router/bindings/python
echo "Building sgl-model-gateway..."
cd sgl-model-gateway/bindings/python
pip install maturin
maturin build --release --out dist --features vendored-openssl
pip install --force-reinstall dist/*.whl
Expand Down
44 changes: 23 additions & 21 deletions .github/workflows/pr-test-rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ on:
push:
branches: [ main ]
paths:
- "sgl-router/**"
- "sgl-model-gateway/**"
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened, labeled]
paths:
- "sgl-router/**"
- "sgl-model-gateway/**"
workflow_dispatch:

concurrency:
Expand All @@ -32,9 +32,9 @@ jobs:
with:
path: sglang-repo

- name: Move sgl-router folder to root
- name: Move sgl-model-gateway folder to root
run: |
mv sglang-repo/sgl-router/* .
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo

- name: Set up Python
Expand Down Expand Up @@ -96,50 +96,52 @@ jobs:
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
workspaces: sgl-model-gateway
cache-all-crates: true
cache-on-failure: true

- name: Run lint
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
rustup component add clippy
cargo clippy --all-targets --all-features -- -D warnings

- name: Run fmt
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
rustup toolchain install nightly --profile minimal
cargo +nightly fmt -- --check

- name: Generate vision golden fixtures
run: |
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

pip install transformers pillow numpy scipy
cd sgl-router/
pip install transformers pillow numpy
cd sgl-model-gateway/
python scripts/generate_vision_golden.py

- name: Run Rust tests
timeout-minutes: 20
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
cargo test

- name: Check benchmark compilation
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
cargo check --benches

- name: Quick benchmark sanity check
timeout-minutes: 15
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-model-gateway/
# Run quick benchmarks to ensure they work using Python script
python3 scripts/run_benchmarks.py --quick

Expand Down Expand Up @@ -170,7 +172,7 @@ jobs:
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
workspaces: sgl-model-gateway
cache-all-crates: true
cache-on-failure: true

Expand All @@ -182,22 +184,22 @@ jobs:
run: |
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
cd sgl-router/bindings/python
cd sgl-model-gateway/bindings/python
python3 -m pip install --upgrade pip maturin
pip uninstall -y sglang-router
maturin build --profile ci --features vendored-openssl --out dist
pip install dist/*.whl

- name: Run Python unit tests
run: |
cd sgl-router
cd sgl-model-gateway
source "$HOME/.cargo/env"
python3 -m pip install pytest pytest-cov pytest-xdist
pytest -q py_test/unit --cov=sglang_router --cov-config=bindings/python/.coveragerc --cov-report=term-missing --cov-fail-under=80

- name: Run Python integration tests
run: |
cd sgl-router
cd sgl-model-gateway
source "$HOME/.cargo/env"
# Integration tests use FastAPI/uvicorn for mock workers
python3 -m pip install fastapi uvicorn orjson
Expand All @@ -206,7 +208,7 @@ jobs:
- name: Run Python E2E tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
cd sgl-model-gateway
source "$HOME/.cargo/env"
python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker
python3 -m pip --no-cache-dir install --upgrade genai-bench==0.0.2
Expand All @@ -217,7 +219,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: genai-bench-results-all-policies
path: sgl-router/benchmark_**/
path: sgl-model-gateway/benchmark_**/

router-grpc-response-api-tests:
if: |
Expand All @@ -242,7 +244,7 @@ jobs:
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-router
workspaces: sgl-model-gateway
cache-all-crates: true
cache-on-failure: true

Expand Down Expand Up @@ -294,7 +296,7 @@ jobs:
run: |
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
cd sgl-router/bindings/python
cd sgl-model-gateway/bindings/python
python3 -m pip install --upgrade pip maturin
pip uninstall -y sglang-router
maturin build --profile ci --features vendored-openssl --out dist
Expand All @@ -304,14 +306,14 @@ jobs:
run: |
python3 -m pip install pytest-rerunfailures
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
cd sgl-model-gateway
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 pytest --reruns 3 --reruns-delay 2 py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO

- name: Run Python E2E gRPC tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
cd sgl-model-gateway
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO

Expand Down
Loading
Loading