Skip to content
Merged
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
b658433
[model-gateway] add Qwen3-30B-A3B-Thinking-2507 model with AMD verifi…
michaelzhang-ai Dec 4, 2025
e09843c
Merge branch 'main' into add-nightly-test
michaelzhang-ai Dec 4, 2025
e780833
Update test/srt/nightly/test_gsm8k_eval_amd.py
michaelzhang-ai Dec 4, 2025
4082fd9
Merge branch 'main' into add-nightly-test
michaelzhang-ai Dec 4, 2025
3c30176
Add Qwen3-30B-A3B-Thinking-2507 to nightly TP2 evaluation models
michaelzhang-ai Dec 4, 2025
7275b16
Merge branch 'main' into add-nightly-test
michaelzhang-ai Dec 5, 2025
22e8c3e
Merge branch 'main' into add-nightly-test
michaelzhang-ai Dec 5, 2025
35f6062
Add AMD-specific model for nightly evaluation on MI300X (#14508)
michaelzhang-ai Dec 5, 2025
9aac685
revert
michaelzhang-ai Dec 5, 2025
f0f5c10
Update AMD model name for nightly evaluation to use remove_failing_mo…
michaelzhang-ai Dec 5, 2025
072f98e
Merge branch 'main' into add-nightly-test
michaelzhang-ai Dec 5, 2025
547c239
Add pull request trigger for nightly AMD tests on main branch
michaelzhang-ai Dec 5, 2025
95bd361
Add AMD-specific launch configuration for models in nightly evaluatio…
michaelzhang-ai Dec 5, 2025
1ea04b7
Refine AMD launch configuration in nightly evaluation tests by simpli…
michaelzhang-ai Dec 5, 2025
646c382
Merge branch 'main' into add-nightly-test
michaelzhang-ai Dec 6, 2025
a5acdf1
Remove pull_request trigger from nightly AMD tests for merge
michaelzhang-ai Dec 9, 2025
f3eee7d
Remove AMD-specific model configuration from nightly evaluation tests…
michaelzhang-ai Dec 10, 2025
91864d0
Merge branch 'main' into add-nightly-test
michaelzhang-ai Dec 10, 2025
bd82c87
Added back the AMD_SPECIAL_CONFIG_MODELS definition and the config bl…
michaelzhang-ai Dec 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions test/srt/nightly/test_gsm8k_eval_amd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"meta-llama/Llama-3.1-70B-Instruct": 0.95,
"mistralai/Mixtral-8x7B-Instruct-v0.1": 0.64,
"Qwen/Qwen2-57B-A14B-Instruct": 0.86,
"Qwen/Qwen3-30B-A3B-Thinking-2507": 0.84, # MoE model from sanity_check.py - TP2 verified on MI300X
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8": 0.83,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8": 0.54,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8": 0.94,
Expand Down Expand Up @@ -63,6 +64,11 @@ def remove_failing_models(model_str):
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
)

# AMD-specific models verified on MI300X with tp=2
AMD_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = remove_failing_models(
"Qwen/Qwen3-30B-A3B-Thinking-2507"
)

NO_MOE_PADDING_MODELS = {"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"}
DISABLE_HF_XET_MODELS = {
"Qwen/Qwen2-57B-A14B-Instruct",
Expand All @@ -74,13 +80,28 @@ def remove_failing_models(model_str):
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
}
# AMD-specific models that need special launch config (matching in-house CI sanity_check.py)
# AMD_SPECIAL_CONFIG_MODELS = {
# "Qwen/Qwen3-30B-A3B-Thinking-2507", # default config works
# }


def popen_launch_server_wrapper(base_url, model, is_tp2):
other_args = ["--log-level-http", "warning", "--trust-remote-code"]
if is_tp2:
other_args.extend(["--tp", "2"])

# Use same config as sanity_check.py for AMD-specific models (scaled for tp=2)
# Original tp=8: chunked-prefill-size=130172, max-running-requests=128
# Scaled tp=2: chunked-prefill-size=32543, max-running-requests=32
# if model in AMD_SPECIAL_CONFIG_MODELS:
# other_args.extend([
# "--chunked-prefill-size", "32543",
# "--max-running-requests", "32",
# "--mem-fraction-static", "0.85",
# "--attention-backend", "aiter",
# ])

process = popen_launch_server(
model,
base_url,
Expand Down Expand Up @@ -128,6 +149,8 @@ def setUpClass(cls):
(parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2), False, True),
(parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1), True, False),
(parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2), True, True),
# AMD-specific models verified on MI300X
(parse_models(AMD_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2), False, True),
]
cls.base_url = DEFAULT_URL_FOR_TEST

Expand Down
Loading