-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain_sesa.sh
More file actions
39 lines (32 loc) · 1.8 KB
/
train_sesa.sh
File metadata and controls
39 lines (32 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
export SWANLAB_API_KEY=''
export USE_GRPO="algorithm.adv_estimator=grpo agent_proxy.reward_normalization.method=mean_std actor_rollout_ref.actor.use_kl_loss=True"
export USE_BASE="algorithm.kl_ctrl.kl_coef=0.001 actor_rollout_ref.actor.kl_loss_coef=0.001 actor_rollout_ref.actor.clip_ratio_high=0.2 actor_rollout_ref.rollout.rollout_filter_ratio=1"
export HYDRA_FULL_ERROR=1
export ENABLE_SERIAL_GENERATION=1
MKL_SERVICE_FORCE_INTEL=1 python train.py --config-name _2_sokoban system.CUDA_VISIBLE_DEVICES=\"0,1,2,3\" trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
trainer.experiment_name=sokoban $USE_GRPO $USE_BASE \
es_manager.train.env_groups=8 es_manager.train.group_size=4 es_manager.train.env_configs.n_groups=[8] \
trainer.nnodes=1 \
trainer.logger=['console','swanlab'] \
actor_rollout_ref.rollout.tp_size_check=False \
actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
trainer.val_before_train=True \
trainer.save_freq=40 \
trainer.test_freq=5 \
trainer.resume_mode=auto \
trainer.project_name=test \
export ENABLE_SERIAL_GENERATION=1
MKL_SERVICE_FORCE_INTEL=1 python train.py --config-name _3_frozen_lake system.CUDA_VISIBLE_DEVICES=\"0,1,2,3\" trainer.n_gpus_per_node=4 \
actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
trainer.experiment_name=frozen-lake $USE_GRPO $USE_BASE \
es_manager.train.env_groups=8 es_manager.train.group_size=4 es_manager.train.env_configs.n_groups=[8] \
trainer.nnodes=1 \
trainer.logger=['console','swanlab'] \
actor_rollout_ref.rollout.tp_size_check=False \
actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
trainer.val_before_train=True \
trainer.save_freq=40 \
trainer.test_freq=5 \
trainer.resume_mode=auto \
trainer.project_name=test \