diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py index 73fe410b8c4..6398af38d3c 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py @@ -1226,7 +1226,6 @@ def forward(self, hidden_states: torch.Tensor, topk_output: TopKOutput): local_expert_offset=self.moe_ep_rank * self.num_local_experts, local_num_experts=self.num_local_experts, routed_scaling_factor=self.moe_runner_config.routed_scaling_factor, - tile_tokens_dim=None, routing_method_type=routing_method_type, do_finalize=True, tune_max_num_tokens=next_power_of_2(hs_fp4.shape[0]), diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index 830a6752cd6..243af5087f4 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -1382,7 +1382,6 @@ def apply_with_router_logits( if routed_scaling_factor is not None else 1.0 ), - tile_tokens_dim=None, routing_method_type=routing_method_type, use_shuffled_weight=False, tune_max_num_tokens=next_power_of_2(a_q.shape[0]), diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index d4faa2ddf55..74fdc22f59c 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -785,7 +785,6 @@ def apply( else 1.0 ), use_routing_scales_on_input=use_routing_scales_on_input, - tile_tokens_dim=None, routing_method_type=routing_method_type, tune_max_num_tokens=next_power_of_2(x.shape[0]), ) diff --git a/python/sglang/srt/layers/quantization/mxfp4.py b/python/sglang/srt/layers/quantization/mxfp4.py index 84424458c97..84bcabdb19b 100644 --- a/python/sglang/srt/layers/quantization/mxfp4.py +++ b/python/sglang/srt/layers/quantization/mxfp4.py @@ -682,7 +682,6 @@ def apply( layer.moe_ep_rank * layer.num_local_experts, # local_expert_offset layer.num_local_experts, # local num experts None, - None, # tile_tokens_dim 1, # routing_method_type, renormalize True, # do finalize tune_max_num_tokens=next_power_of_2(x_quant.shape[0]),