Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/sgl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,7 @@ jobs:
run: |
cd tests/workers/rollout
torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_rollout_w_tools.py
- name: Test the latest SGLang Rollout async with sandbox fusion tool
run: |
cd tests/workers/rollout
pytest -s test_sglang_async_rollout_sf_tools.py
286 changes: 286 additions & 0 deletions docs/sglang_multiturn/sandbox_fusion.rst

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions examples/data_preprocess/aime2024_multiturn_w_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2024 Bytedance Ltd. and/or its affiliates
# Copyright 2023-2024 SGLang Team
# Copyright 2025 ModelBest Inc. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Preprocess the DAPO-Math-17k dataset to multiturn format
"""

import argparse
import os

import datasets

from verl.utils.hdfs_io import copy, makedirs

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--local_dir", default="~/data/retool_aime2024")
parser.add_argument("--hdfs_dir", default=None)

args = parser.parse_args()

data_path = "BytedTsinghua-SIA/AIME-2024"
dataset = datasets.load_dataset(data_path, "default")

train_dataset = dataset["train"]

# add a row to each data item that represents a unique id
def make_map_fn(split):
def process_fn(example, idx):
orig_extra_info = example.pop("extra_info")
extra_info = orig_extra_info.copy()
extra_info["need_tools_kwargs"] = True
extra_info["tools_kwargs"] = {
"code_interpreter": {
"create_kwargs": {
"ground_truth": example["reward_model"]["ground_truth"],
},
},
}
example["extra_info"] = extra_info
return example

return process_fn

train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)

local_dir = args.local_dir
hdfs_dir = args.hdfs_dir

train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))

if hdfs_dir is not None:
makedirs(hdfs_dir)
copy(src=local_dir, dst=hdfs_dir)
66 changes: 66 additions & 0 deletions examples/data_preprocess/dapo_multiturn_w_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2024 Bytedance Ltd. and/or its affiliates
# Copyright 2023-2024 SGLang Team
# Copyright 2025 ModelBest Inc. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Preprocess the DAPO-Math-17k dataset to multiturn format
"""

import argparse
import os

import datasets

from verl.utils.hdfs_io import copy, makedirs

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--local_dir", default="~/data/retool_dapo")
parser.add_argument("--hdfs_dir", default=None)

args = parser.parse_args()

data_path = "BytedTsinghua-SIA/DAPO-Math-17k"
dataset = datasets.load_dataset(data_path, "default")

train_dataset = dataset["train"]

# add a row to each data item that represents a unique id
def make_map_fn(split):
def process_fn(example, idx):
orig_extra_info = example.pop("extra_info")
extra_info = orig_extra_info.copy()
extra_info["need_tools_kwargs"] = True
extra_info["tools_kwargs"] = {
"code_interpreter": {
"create_kwargs": {
"ground_truth": example["reward_model"]["ground_truth"],
},
},
}
example["extra_info"] = extra_info
return example

return process_fn

train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)

local_dir = args.local_dir
hdfs_dir = args.hdfs_dir

train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))

if hdfs_dir is not None:
makedirs(hdfs_dir)
copy(src=local_dir, dst=hdfs_dir)
22 changes: 22 additions & 0 deletions examples/sglang_multiturn/config/retool_multiturn_grpo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
hydra:
searchpath:
- file://verl/trainer/config

defaults:
- ppo_trainer
- _self_

data:
max_prompt_length: 1024
max_response_length: 1024
train_batch_size: 256
return_raw_chat: True

actor_rollout_ref:
hybrid_engine: True
rollout:
name: sglang_async
multi_turn:
enable: True
max_turns: 5
tool_config_path: "./config/tool_config/sandbox_fusion_tool_config.yaml"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
tools:
- class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
config:
sandbox_fusion_url: "https://xxx.apigateway-cn-beijing.volceapi.com/run_code"
num_workers: 10
enable_global_rate_limit: true
rate_limit: 10
default_timeout: 30
default_language: "python"

tool_schema:
type: "function"
function:
name: "code_interpreter"
description: "A tool for executing code."
parameters:
type: "object"
properties:
code:
type: "string"
description: "The code to execute."
required: ["code"]
62 changes: 62 additions & 0 deletions tests/e2e/run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# run on 8xH20
# make sure your current working directory is the root of the project

set -x


export PYTHONUNBUFFERED=1
export RAY_DEDUP_LOGS=0
export RUST_BACKTRACE=1
export HYDRA_FULL_ERROR=1

ulimit -n 65535

PROJECT_DIR="$(pwd)"
CONFIG_PATH="$PROJECT_DIR/examples/sglang_multiturn/config"

python3 -m verl.trainer.main_ppo \
--config-path="$CONFIG_PATH" \
--config-name='gsm8k_multiturn_sf_grpo' \
algorithm.adv_estimator=grpo \
data.train_batch_size=128 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
data.filter_overlong_prompts=False \
data.truncation='error' \
data.return_raw_chat=True \
data.train_files=$HOME/data/retool_dapo/train.parquet \
data.val_files=$HOME/data/retool_aime2024/train.parquet \
actor_rollout_ref.model.path=Qwen/Qwen3-4B \
actor_rollout_ref.actor.use_dynamic_bsz=True \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.model.use_liger=False \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
+actor_rollout_ref.model.enable_activation_offloading=True \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.actor.ppo_mini_batch_size=128 \
actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=32768 \
actor_rollout_ref.actor.use_kl_loss=False \
actor_rollout_ref.actor.kl_loss_coef=0.0 \
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
actor_rollout_ref.actor.entropy_coeff=0 \
actor_rollout_ref.actor.fsdp_config.param_offload=True \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=sglang_async \
actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.rollout.multi_turn.tool_config_path="$PROJECT_DIR/examples/sglang_multiturn/config/tool_config/sandbox_fusion_tool_config.yaml" \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
algorithm.use_kl_in_reward=False \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='retool_async_rl' \
trainer.experiment_name='qwen3-4b_function_rm-retool-async-sgl-no-sft-n8-v2505271300' \
trainer.val_before_train=False \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=100 \
trainer.test_freq=20 \
trainer.total_training_steps=1000 \
trainer.total_epochs=1 $@
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
tools:
- class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
config:
sandbox_fusion_url: "https://xxx.apigateway-cn-beijing.volceapi.com/run_code"
tool_schema:
type: "function"
function:
name: "code_interpreter"
description: "A tool for executing code."
parameters:
type: "object"
properties:
code:
type: "string"
description: "The code to execute."
required: ["code"]
Loading
Loading