Skip to content

Commit a30c96d

Browse files
committed
Add 30B fp8 config
Signed-off-by: Guyue Huang <guyueh@nvidia.com>
1 parent 6582ae4 commit a30c96d

1 file changed

Lines changed: 29 additions & 0 deletions

File tree

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# GRPO Algorithm Configuration
2+
defaults: "grpo_math_qwen30ba3b_megatron.yaml"
3+
4+
loss_fn:
5+
use_importance_sampling_correction: true
6+
7+
policy:
8+
megatron_cfg:
9+
moe_router_dtype: fp32
10+
11+
fp8_cfg:
12+
enabled: true
13+
fp8: "e4m3"
14+
fp8_recipe: "blockwise"
15+
fp8_param: false
16+
17+
optimizer:
18+
use_precision_aware_optimizer: false
19+
20+
env_vars:
21+
NVTE_FP8_BLOCK_SCALING_FP32_SCALES: "1"
22+
23+
generation:
24+
vllm_cfg:
25+
tensor_parallel_size: 8
26+
expert_parallel_size: 8 # need to make moe_intermediate_size / expert_tensor_parallel_size % 128 == 0
27+
precision: "fp8"
28+
use_deep_gemm: true
29+
gpu_memory_utilization: 0.6

0 commit comments

Comments
 (0)