Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ loss_fn:
reference_policy_kl_penalty: 0.01
ratio_clip_min: 0.2
ratio_clip_max: 0.2
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-gemma3-1b-it-1n8g-fsdp2tp1
Expand Down Expand Up @@ -75,6 +77,7 @@ policy:
stop_token_ids: null
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 1
gpu_memory_utilization: 0.6
max_model_len: 512
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ loss_fn:
reference_policy_kl_penalty: 0.01
ratio_clip_min: 0.2
ratio_clip_max: 0.2
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long
Expand Down Expand Up @@ -75,6 +77,7 @@ policy:
stop_token_ids: null
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 4
gpu_memory_utilization: 0.6
max_model_len: 16384
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loss_fn:
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
Expand Down Expand Up @@ -77,6 +78,7 @@ policy:
- 128009
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 1
gpu_memory_utilization: 0.6
max_model_len: 4096
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loss_fn:
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
Expand Down Expand Up @@ -77,6 +78,7 @@ policy:
- 128009
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 1
gpu_memory_utilization: 0.6
max_model_len: 512
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loss_fn:
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
Expand Down Expand Up @@ -77,6 +78,7 @@ policy:
- 151643
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 4
gpu_memory_utilization: 0.6
max_model_len: 16384
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loss_fn:
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
Expand Down Expand Up @@ -77,6 +78,7 @@ policy:
- 151643
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 4
gpu_memory_utilization: 0.6
max_model_len: 16384
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loss_fn:
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
Expand Down Expand Up @@ -77,6 +78,7 @@ policy:
- 151645
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 1
gpu_memory_utilization: 0.6
max_model_len: 4096
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loss_fn:
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
Expand Down Expand Up @@ -77,6 +78,7 @@ policy:
- 151645
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 4
gpu_memory_utilization: 0.6
max_model_len: 4096
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loss_fn:
ratio_clip_c: null
use_on_policy_kl_approximation: false
use_importance_sampling_correction: false
token_level_loss: true
checkpointing:
enabled: true
checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
Expand Down Expand Up @@ -77,6 +78,7 @@ policy:
- 151645
stop_strings: null
vllm_cfg:
precision: ${policy.precision}
tensor_parallel_size: 1
gpu_memory_utilization: 0.6
max_model_len: 512
Expand Down