diff --git a/tests/e2e/run_deepseek_megatron_parallelism.sh b/tests/e2e/run_deepseek_megatron_parallelism.sh index cb3ad6ccb86..8b8fcb182ef 100644 --- a/tests/e2e/run_deepseek_megatron_parallelism.sh +++ b/tests/e2e/run_deepseek_megatron_parallelism.sh @@ -21,7 +21,8 @@ python3 -m verl.trainer.main_ppo --config-path=config \ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \ actor_rollout_ref.actor.megatron.virtual_pipeline_model_parallel_size=2 \ - actor_rollout_ref.actor.megatron.tensor_model_parallel_size=4 \ + actor_rollout_ref.actor.megatron.context_parallel_size=2 \ + actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \ actor_rollout_ref.actor.use_kl_loss=False \ actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ @@ -30,6 +31,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=2 \ actor_rollout_ref.ref.megatron.virtual_pipeline_model_parallel_size=2 \ + actor_rollout_ref.ref.megatron.context_parallel_size=2 \ actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \ critic.optim.lr=2e-5 \ critic.model.path=$HOME/models/deepseek-ai/deepseek-coder-1.3b-instruct \ @@ -37,6 +39,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \ critic.ppo_micro_batch_size_per_gpu=4 \ critic.megatron.pipeline_model_parallel_size=2 \ critic.megatron.virtual_pipeline_model_parallel_size=2 \ + critic.megatron.context_parallel_size=2 \ critic.megatron.tensor_model_parallel_size=2 \ algorithm.use_kl_in_reward=True \ algorithm.kl_penalty=kl \ diff --git a/tests/e2e/run_qwen_megatron_parallelism.sh b/tests/e2e/run_qwen_megatron_parallelism.sh index 480afe3dcda..2eb19cc4710 100644 --- a/tests/e2e/run_qwen_megatron_parallelism.sh +++ b/tests/e2e/run_qwen_megatron_parallelism.sh @@ -24,6 +24,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \ actor_rollout_ref.actor.megatron.virtual_pipeline_model_parallel_size=2 \ + actor_rollout_ref.actor.megatron.context_parallel_size=2 \ actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \ actor_rollout_ref.actor.use_kl_loss=False \ actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ @@ -33,6 +34,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=2 \ actor_rollout_ref.ref.megatron.virtual_pipeline_model_parallel_size=2 \ + actor_rollout_ref.ref.megatron.context_parallel_size=2 \ actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \ critic.optim.lr=2e-5 \ critic.model.path=$HOME/models/Qwen/Qwen2.5-0.5B \ @@ -40,6 +42,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \ critic.ppo_micro_batch_size_per_gpu=4 \ critic.megatron.pipeline_model_parallel_size=2 \ critic.megatron.virtual_pipeline_model_parallel_size=2 \ + critic.megatron.context_parallel_size=2 \ critic.megatron.tensor_model_parallel_size=2 \ algorithm.use_kl_in_reward=True \ algorithm.kl_penalty=kl \