diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml old mode 100755 new mode 100644 diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml index 7ffee643c9..55a6efb5e2 100644 --- a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml +++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml @@ -13,8 +13,10 @@ loss_fn: reference_policy_kl_penalty: 0.01 ratio_clip_min: 0.2 ratio_clip_max: 0.2 + ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-gemma3-1b-it-1n8g-fsdp2tp1 @@ -75,6 +77,7 @@ policy: stop_token_ids: null stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 1 gpu_memory_utilization: 0.6 max_model_len: 512 diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml index 75f844db00..dbd752669d 100644 --- a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml +++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml @@ -13,8 +13,10 @@ loss_fn: reference_policy_kl_penalty: 0.01 ratio_clip_min: 0.2 ratio_clip_max: 0.2 + ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long @@ -75,6 +77,7 @@ policy: stop_token_ids: null stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 4 gpu_memory_utilization: 0.6 max_model_len: 16384 diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml index d0b1c9a376..c659a3d170 100644 --- a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml +++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml @@ -16,6 +16,7 @@ loss_fn: ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long @@ -77,6 +78,7 @@ policy: - 128009 stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 1 gpu_memory_utilization: 0.6 max_model_len: 4096 diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml index 84413affa9..dcf7f06fdd 100644 --- a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml +++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml @@ -16,6 +16,7 @@ loss_fn: ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1 @@ -77,6 +78,7 @@ policy: - 128009 stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 1 gpu_memory_utilization: 0.6 max_model_len: 512 diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml index 4c2ed278b0..aa95adcfbc 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml @@ -16,6 +16,7 @@ loss_fn: ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long @@ -77,6 +78,7 @@ policy: - 151643 stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 4 gpu_memory_utilization: 0.6 max_model_len: 16384 diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml index 0a842a6812..ee46d49c8f 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml @@ -16,6 +16,7 @@ loss_fn: ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt @@ -77,6 +78,7 @@ policy: - 151643 stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 4 gpu_memory_utilization: 0.6 max_model_len: 16384 diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml index 9f5507a389..bbe1de5a20 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml @@ -16,6 +16,7 @@ loss_fn: ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp1 @@ -77,6 +78,7 @@ policy: - 151645 stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 1 gpu_memory_utilization: 0.6 max_model_len: 4096 diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml index 70be560caf..da6058254e 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml @@ -16,6 +16,7 @@ loss_fn: ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp @@ -77,6 +78,7 @@ policy: - 151645 stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 4 gpu_memory_utilization: 0.6 max_model_len: 4096 diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml index 907a5497d5..dd364f728c 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml @@ -16,6 +16,7 @@ loss_fn: ratio_clip_c: null use_on_policy_kl_approximation: false use_importance_sampling_correction: false + token_level_loss: true checkpointing: enabled: true checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1 @@ -77,6 +78,7 @@ policy: - 151645 stop_strings: null vllm_cfg: + precision: ${policy.precision} tensor_parallel_size: 1 gpu_memory_utilization: 0.6 max_model_len: 512