verl-project · vermouth1992 · May 29, 2025 · May 28, 2025 · May 28, 2025 · May 28, 2025
diff --git a/.github/workflows/sgl.yml b/.github/workflows/sgl.yml
@@ -82,3 +82,7 @@ jobs:
         run: |
           cd tests/workers/rollout
           torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_rollout_w_tools.py
+      - name: Test the latest SGLang Rollout async with sandbox fusion tool
+        run: |
+          cd tests/workers/rollout
+          pytest -s test_sglang_async_rollout_sf_tools.py
diff --git a/examples/data_preprocess/aime2024_multiturn_w_tool.py b/examples/data_preprocess/aime2024_multiturn_w_tool.py
@@ -0,0 +1,66 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Copyright 2023-2024 SGLang Team
+# Copyright 2025 ModelBest Inc. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the DAPO-Math-17k dataset to multiturn format
+"""
+
+import argparse
+import os
+
+import datasets
+
+from verl.utils.hdfs_io import copy, makedirs
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", default="~/data/retool_aime2024")
+    parser.add_argument("--hdfs_dir", default=None)
+
+    args = parser.parse_args()
+
+    data_path = "BytedTsinghua-SIA/AIME-2024"
+    dataset = datasets.load_dataset(data_path, "default")
+
+    train_dataset = dataset["train"]
+
+    # add a row to each data item that represents a unique id
+    def make_map_fn(split):
+        def process_fn(example, idx):
+            orig_extra_info = example.pop("extra_info")
+            extra_info = orig_extra_info.copy()
+            extra_info["need_tools_kwargs"] = True
+            extra_info["tools_kwargs"] = {
+                "code_interpreter": {
+                    "create_kwargs": {
+                        "ground_truth": example["reward_model"]["ground_truth"],
+                    },
+                },
+            }
+            example["extra_info"] = extra_info
+            return example
+
+        return process_fn
+
+    train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)
+
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+
+    train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))
+
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+        copy(src=local_dir, dst=hdfs_dir)
diff --git a/examples/data_preprocess/dapo_multiturn_w_tool.py b/examples/data_preprocess/dapo_multiturn_w_tool.py
@@ -0,0 +1,66 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Copyright 2023-2024 SGLang Team
+# Copyright 2025 ModelBest Inc. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Preprocess the DAPO-Math-17k dataset to multiturn format
+"""
+
+import argparse
+import os
+
+import datasets
+
+from verl.utils.hdfs_io import copy, makedirs
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", default="~/data/retool_dapo")
+    parser.add_argument("--hdfs_dir", default=None)
+
+    args = parser.parse_args()
+
+    data_path = "BytedTsinghua-SIA/DAPO-Math-17k"
+    dataset = datasets.load_dataset(data_path, "default")
+
+    train_dataset = dataset["train"]
+
+    # add a row to each data item that represents a unique id
+    def make_map_fn(split):
+        def process_fn(example, idx):
+            orig_extra_info = example.pop("extra_info")
+            extra_info = orig_extra_info.copy()
+            extra_info["need_tools_kwargs"] = True
+            extra_info["tools_kwargs"] = {
+                "code_interpreter": {
+                    "create_kwargs": {
+                        "ground_truth": example["reward_model"]["ground_truth"],
+                    },
+                },
+            }
+            example["extra_info"] = extra_info
+            return example
+
+        return process_fn
+
+    train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)
+
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+
+    train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))
+
+    if hdfs_dir is not None:
+        makedirs(hdfs_dir)
+        copy(src=local_dir, dst=hdfs_dir)
diff --git a/examples/sglang_multiturn/config/retool_multiturn_grpo.yaml b/examples/sglang_multiturn/config/retool_multiturn_grpo.yaml
@@ -0,0 +1,22 @@
+hydra:
+  searchpath:
+    - file://verl/trainer/config
+
+defaults:
+  - ppo_trainer
+  - _self_
+
+data:
+  max_prompt_length: 1024
+  max_response_length: 1024
+  train_batch_size: 256
+  return_raw_chat: True
+
+actor_rollout_ref:
+  hybrid_engine: True
+  rollout:
+    name: sglang_async
+    multi_turn:
+      enable: True
+      max_turns: 5
+      tool_config_path: "./config/tool_config/sandbox_fusion_tool_config.yaml"
diff --git a/examples/sglang_multiturn/config/tool_config/sandbox_fusion_tool_config.yaml b/examples/sglang_multiturn/config/tool_config/sandbox_fusion_tool_config.yaml
@@ -0,0 +1,22 @@
+tools:
+  - class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
+    config: 
+      sandbox_fusion_url: "https://xxx.apigateway-cn-beijing.volceapi.com/run_code"
+      num_workers: 10
+      enable_global_rate_limit: true
+      rate_limit: 10
+      default_timeout: 30
+      default_language: "python"
+
+    tool_schema:
+      type: "function"
+      function:
+        name: "code_interpreter"
+        description: "A tool for executing code."
+        parameters:
+          type: "object"
+          properties:
+            code:
+              type: "string"
+              description: "The code to execute."
+          required: ["code"]
diff --git a/tests/e2e/run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh b/tests/e2e/run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh
@@ -0,0 +1,62 @@
+# run on 8xH20
+# make sure your current working directory is the root of the project
+
+set -x
+
+
+export PYTHONUNBUFFERED=1
+export RAY_DEDUP_LOGS=0
+export RUST_BACKTRACE=1
+export HYDRA_FULL_ERROR=1
+
+ulimit -n 65535
+
+PROJECT_DIR="$(pwd)"
+CONFIG_PATH="$PROJECT_DIR/examples/sglang_multiturn/config"
+
+python3 -m verl.trainer.main_ppo \
+    --config-path="$CONFIG_PATH" \
+    --config-name='gsm8k_multiturn_sf_grpo' \
+    algorithm.adv_estimator=grpo \
+    data.train_batch_size=128 \
+    data.max_prompt_length=2048 \
+    data.max_response_length=16384 \
+    data.filter_overlong_prompts=False \
+    data.truncation='error' \
+    data.return_raw_chat=True \
+    data.train_files=$HOME/data/retool_dapo/train.parquet \
+    data.val_files=$HOME/data/retool_aime2024/train.parquet \
+    actor_rollout_ref.model.path=Qwen/Qwen3-4B \
+    actor_rollout_ref.actor.use_dynamic_bsz=True \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.model.use_liger=False \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    +actor_rollout_ref.model.enable_activation_offloading=True \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.actor.ppo_mini_batch_size=128 \
+    actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
+    actor_rollout_ref.actor.ppo_max_token_len_per_gpu=32768 \
+    actor_rollout_ref.actor.use_kl_loss=False \
+    actor_rollout_ref.actor.kl_loss_coef=0.0 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=True \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
+    actor_rollout_ref.rollout.name=sglang_async \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
+    actor_rollout_ref.rollout.n=8 \
+    actor_rollout_ref.rollout.multi_turn.tool_config_path="$PROJECT_DIR/examples/sglang_multiturn/config/tool_config/sandbox_fusion_tool_config.yaml" \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console','wandb'] \
+    trainer.project_name='retool_async_rl' \
+    trainer.experiment_name='qwen3-4b_function_rm-retool-async-sgl-no-sft-n8-v2505271300' \
+    trainer.val_before_train=False \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=100 \
+    trainer.test_freq=20 \
+    trainer.total_training_steps=1000 \
+    trainer.total_epochs=1 $@
diff --git a/tests/workers/rollout/resource/tool_configs/sandbox_fusion_tool_config b/tests/workers/rollout/resource/tool_configs/sandbox_fusion_tool_config
@@ -0,0 +1,16 @@
+tools:
+  - class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
+    config: 
+      sandbox_fusion_url: "https://xxx.apigateway-cn-beijing.volceapi.com/run_code"
+    tool_schema:
+      type: "function"
+      function:
+        name: "code_interpreter"
+        description: "A tool for executing code."
+        parameters:
+          type: "object"
+          properties:
+            code:
+              type: "string"
+              description: "The code to execute."
+          required: ["code"]