NVIDIA-NeMo · yuki-97 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
@@ -135,7 +135,20 @@ policy:
     moe_enable_deepep: false
     moe_token_dispatcher_type: "alltoall"
     moe_shared_expert_overlap: false
-
+
+    peft:
+      enabled: false
+      target_modules: []
+      exclude_modules: []
+      dim: 8
+      alpha: 32
+      dropout: 0.0
+      dropout_position: "post"
+      lora_A_init_method: "xavier"
+      lora_B_init_method: "zero"
+      a2a_experimental: false
+      lora_dtype: null
+
     optimizer:
       optimizer: "adam"
       lr: 5.0e-6 #4.0e-5

@@ -42,6 +42,7 @@ run_test fast uv run --no-sync bash ./tests/functional/distillation.sh
 run_test      uv run --no-sync bash ./tests/functional/distillation_megatron.sh
 run_test fast uv run --no-sync bash ./tests/functional/dpo.sh
 run_test      uv run --no-sync bash ./tests/functional/dpo_automodel_lora.sh
+run_test fast uv run --no-sync bash ./tests/functional/dpo_megatron_lora.sh
 run_test      uv run --no-sync bash ./tests/functional/dpo_megatron.sh
 run_test      uv run --no-sync bash ./tests/functional/eval.sh
 run_test      uv run --no-sync bash ./tests/functional/eval_async.sh

@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# clean up checkpoint directory on exit
+trap "rm -rf /tmp/dpo_megatron_lora_checkpoints" EXIT
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_dpo.py \
+    policy.model_name=Qwen/Qwen3-0.6B \
+    policy.tokenizer.name=Qwen/Qwen3-0.6B \
+    dpo.max_num_steps=3 \
+    dpo.val_batches=1 \
+    dpo.val_period=3 \
+    policy.train_global_batch_size=8 \
+    policy.megatron_cfg.tensor_model_parallel_size=1 \
+    policy.megatron_cfg.sequence_parallel=false \
+    policy.megatron_cfg.peft.enabled=true \
+    policy.megatron_cfg.peft.dim=32 \
+    cluster.gpus_per_node=2 \
+    cluster.num_nodes=1 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=true \
+    checkpointing.save_period=3 \
+    checkpointing.checkpoint_dir=/tmp/dpo_megatron_lora_checkpoints \
+    "$@" \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+  'data["train/loss"]["3"] < 0.8'