diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
old mode 100755
new mode 100644
diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
index 7ffee643c9..55a6efb5e2 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
@@ -13,8 +13,10 @@ loss_fn:
   reference_policy_kl_penalty: 0.01
   ratio_clip_min: 0.2
   ratio_clip_max: 0.2
+  ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-gemma3-1b-it-1n8g-fsdp2tp1
@@ -75,6 +77,7 @@ policy:
     stop_token_ids: null
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
index 75f844db00..dbd752669d 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
@@ -13,8 +13,10 @@ loss_fn:
   reference_policy_kl_penalty: 0.01
   ratio_clip_min: 0.2
   ratio_clip_max: 0.2
+  ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long
@@ -75,6 +77,7 @@ policy:
     stop_token_ids: null
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 4
       gpu_memory_utilization: 0.6
       max_model_len: 16384
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
index d0b1c9a376..c659a3d170 100644
--- a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
@@ -16,6 +16,7 @@ loss_fn:
   ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
@@ -77,6 +78,7 @@ policy:
       - 128009
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
index 84413affa9..dcf7f06fdd 100644
--- a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -16,6 +16,7 @@ loss_fn:
   ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
@@ -77,6 +78,7 @@ policy:
       - 128009
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
index 4c2ed278b0..aa95adcfbc 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
@@ -16,6 +16,7 @@ loss_fn:
   ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
@@ -77,6 +78,7 @@ policy:
       - 151643
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 4
       gpu_memory_utilization: 0.6
       max_model_len: 16384
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
index 0a842a6812..ee46d49c8f 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
@@ -16,6 +16,7 @@ loss_fn:
   ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
@@ -77,6 +78,7 @@ policy:
       - 151643
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 4
       gpu_memory_utilization: 0.6
       max_model_len: 16384
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
index 9f5507a389..bbe1de5a20 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
@@ -16,6 +16,7 @@ loss_fn:
   ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
@@ -77,6 +78,7 @@ policy:
       - 151645
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
index 70be560caf..da6058254e 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
@@ -16,6 +16,7 @@ loss_fn:
   ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
@@ -77,6 +78,7 @@ policy:
       - 151645
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 4
       gpu_memory_utilization: 0.6
       max_model_len: 4096
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
index 907a5497d5..dd364f728c 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -16,6 +16,7 @@ loss_fn:
   ratio_clip_c: null
   use_on_policy_kl_approximation: false
   use_importance_sampling_correction: false
+  token_level_loss: true
 checkpointing:
   enabled: true
   checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
@@ -77,6 +78,7 @@ policy:
       - 151645
     stop_strings: null
     vllm_cfg:
+      precision: ${policy.precision}
       tensor_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512