Skip to content
4 changes: 4 additions & 0 deletions python/sglang/srt/layers/attention/flashinfer_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ def __init__(
"Qwen2ForCausalLM" in model_runner.model_config.hf_config.architectures
or "Qwen3ForCausalLM" in model_runner.model_config.hf_config.architectures
or "MiMoForCausalLM" in model_runner.model_config.hf_config.architectures
or "Qwen3VLForConditionalGeneration"
in model_runner.model_config.hf_config.architectures
or "Qwen3VLMoeForConditionalGeneration"
in model_runner.model_config.hf_config.architectures
):
envs.SGLANG_FLASHINFER_WORKSPACE_SIZE.set(512 * 1024 * 1024)

Expand Down
Loading