We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f607c17 commit 991c63dCopy full SHA for 991c63d
python/sglang/srt/layers/attention/flashinfer_backend.py
@@ -161,6 +161,10 @@ def __init__(
161
"Qwen2ForCausalLM" in model_runner.model_config.hf_config.architectures
162
or "Qwen3ForCausalLM" in model_runner.model_config.hf_config.architectures
163
or "MiMoForCausalLM" in model_runner.model_config.hf_config.architectures
164
+ or "Qwen3VLForConditionalGeneration"
165
+ in model_runner.model_config.hf_config.architectures
166
+ or "Qwen3VLMoeForConditionalGeneration"
167
168
):
169
envs.SGLANG_FLASHINFER_WORKSPACE_SIZE.set(512 * 1024 * 1024)
170
0 commit comments