diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 71b7910ee4d5..2f992d1e6347 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -183,7 +183,7 @@ class ServerArgs: enable_flashmla: bool = False flashinfer_mla_disable_ragged: bool = False warmups: Optional[str] = None - n_share_experts_fusion: int = 0 + n_share_experts_fusion: Optional[int] = None disable_shared_experts_fusion: bool = False # Debug tensor dumps @@ -1106,7 +1106,7 @@ def add_cli_args(parser: argparse.ArgumentParser): parser.add_argument( "--n-share-experts-fusion", type=int, - default=0, + default=None, help="The number of shared_experts need to be replica to fuse with normal experts in deepseek v3/r1 " "we use tp_size by default.", )