Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
498d3df
SFT data iterator
angkywilliam Nov 13, 2025
3bd818f
Add SFT LR utils
angkywilliam Nov 14, 2025
66ec620
train_sft skeleton
angkywilliam Nov 14, 2025
4aeda2f
SFT Shape 0.1
angkywilliam Nov 14, 2025
4ff152b
Add shuffle to SFTConfig
angkywilliam Nov 14, 2025
b6f0380
change SFT args order
angkywilliam Nov 14, 2025
e32db37
Refactor SFT to accept batched trajectories
angkywilliam Nov 18, 2025
9138b07
Tokenize SFT Batch
angkywilliam Nov 19, 2025
18a7897
Add num_trainable_tokens to SFTBatch
angkywilliam Nov 19, 2025
90bf94b
draft train_sft
angkywilliam Nov 19, 2025
12e2142
Flatten trajectory for train_sft
angkywilliam Nov 21, 2025
4ea6c5e
Tokenize SFT Batches support flat list and add padding
angkywilliam Nov 21, 2025
f7bb203
Fix max_length duplicate name issue
angkywilliam Nov 21, 2025
d59e524
Remove unused file
angkywilliam Nov 21, 2025
7f6309a
remove unused typing
angkywilliam Nov 21, 2025
5ec5575
sft iterator
angkywilliam Nov 22, 2025
d6688cf
SFT Iterator
angkywilliam Nov 22, 2025
6c63af5
Use Unsloth for train on response
angkywilliam Nov 25, 2025
d2b39d5
Merge branch 'main' of github.com:OpenPipe/ART into sft
Kovbo Jan 14, 2026
ca5177b
refactoring
Kovbo Jan 14, 2026
c3a06b4
implement local backend SFT training
Kovbo Jan 15, 2026
9cf747d
Add SFT to Local Backend
Kovbo Jan 15, 2026
28205cb
avg loss
Kovbo Jan 15, 2026
64454b1
refactor, sft works good
Kovbo Jan 17, 2026
739eb45
Merge branch 'sft' of github.com:OpenPipe/ART into sft
Kovbo Jan 17, 2026
9918f65
Merge remote-tracking branch 'origin/main' into sft
Kovbo Jan 20, 2026
fb706f9
remove logging
Kovbo Jan 20, 2026
08d87d1
move tokenizer, update backend
Kovbo Jan 20, 2026
0573bc8
update lr schedule and tests
Kovbo Jan 20, 2026
904c3ff
refactor sft training from file
Kovbo Jan 20, 2026
2078d5e
change batch sft
Kovbo Jan 21, 2026
381ac7d
refactor step count based on checkpoints
Kovbo Jan 21, 2026
4bc79ed
update sft warmup script
Kovbo Jan 21, 2026
db6833c
fix model registration
Kovbo Jan 21, 2026
9544df9
make local random
Kovbo Jan 22, 2026
c6b2874
refactor backend
Kovbo Jan 22, 2026
834b37e
refactor
Kovbo Jan 22, 2026
736f259
Merge branch 'main' of github.com:OpenPipe/ART into sft
Kovbo Jan 22, 2026
84e6ceb
update example
Kovbo Jan 22, 2026
e2ea1ec
Pyright fix
Kovbo Jan 22, 2026
0fa52f8
remove iterate file epochs, refactor
Kovbo Jan 22, 2026
e43cbea
refactor
Kovbo Jan 22, 2026
2fae9c8
Merge branch 'main' of github.com:OpenPipe/ART into sft-local-backend
Kovbo Jan 22, 2026
d336f18
add serverless endpoint
Kovbo Jan 22, 2026
c9f63fe
Rename training_folder_url to training_data_url
Kovbo Jan 23, 2026
61ff551
update defaults, change reporting
Kovbo Jan 23, 2026
997b69f
update lables
Kovbo Jan 24, 2026
e67accd
make sft to produce only one checkpoint step
Kovbo Jan 26, 2026
3238810
refactor train from file
Kovbo Jan 26, 2026
393495f
refactor
Kovbo Jan 29, 2026
eb39441
Merge origin/main into sft-local-backend
Kovbo Jan 30, 2026
ae21b5b
Refactor SFTTrainConfig
Kovbo Feb 2, 2026
4daedeb
refactor
Kovbo Feb 2, 2026
e5ee192
Merge remote-tracking branch 'origin/main' into sft-local-backend
Kovbo Feb 2, 2026
2991645
correctly register lora, fix unsloth proxy check
Kovbo Feb 4, 2026
d2513eb
Merge branch 'main' of github.com:OpenPipe/ART into sft-local-backend
Kovbo Feb 4, 2026
24dcc4c
add sft train from file streaming
Kovbo Feb 4, 2026
f38ff55
add openpipe qwen back
Kovbo Feb 4, 2026
e8c9f9a
lint fix
Kovbo Feb 5, 2026
5896871
calculate pbar
Kovbo Feb 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions dev/sft/dataset.jsonl

Large diffs are not rendered by default.

69 changes: 69 additions & 0 deletions dev/sft/distillation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Distillation example: Train a small model using completions from a large model."""

import asyncio
import os

from dotenv import load_dotenv
from openai import AsyncOpenAI

import art
from art.local import LocalBackend

load_dotenv()

if not os.environ.get("OPENROUTER_API_KEY"):
raise ValueError("OPENROUTER_API_KEY environment variable is required")

TEACHER_MODEL = "qwen/qwen3-235b-a22b-2507"
STUDENT_BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
PROMPT = "Explain the concept of recursion in programming with a simple example."


async def main():
# Get completion from teacher model
teacher_client = AsyncOpenAI(
api_key=os.environ["OPENROUTER_API_KEY"],
base_url="https://openrouter.ai/api/v1",
)

print(f"Getting completion from teacher model ({TEACHER_MODEL})...")
completion = await teacher_client.chat.completions.create(
model=TEACHER_MODEL,
messages=[{"role": "user", "content": PROMPT}],
)
teacher_response = completion.choices[0].message.content
print(
f"Teacher response ({len(teacher_response)} chars):\n{teacher_response[:500]}..."
)

# Create trajectories from teacher completion
trajectories = [
art.Trajectory(
messages_and_choices=[
{"role": "user", "content": PROMPT},
{"role": "assistant", "content": teacher_response},
],
reward=0.0,
)
]

# Train student model
backend = LocalBackend()
student = art.TrainableModel(
name="sft-distillation-001",
project="sft-distillation",
base_model=STUDENT_BASE_MODEL,
)
await student.register(backend)

print(f"Training student model ({STUDENT_BASE_MODEL})...")
await student.train_sft(
trajectories,
config=art.TrainSFTConfig(learning_rate=2e-4),
verbose=True,
)
print("Training complete!")


if __name__ == "__main__":
asyncio.run(main())
35 changes: 35 additions & 0 deletions dev/sft/sft-from-file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Simple SFT training script using train_sft_from_file helper."""

import asyncio
import random

import art
from art.local import LocalBackend
from art.utils.sft import train_sft_from_file


async def main():
backend = LocalBackend()

model_name = "run-" + "".join(
random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8)
)
model = art.TrainableModel(
name=model_name,
project="sft-from-file",
base_model="meta-llama/Llama-3.1-8B-Instruct",
)
await model.register(backend)

await train_sft_from_file(
model=model,
file_path="dev/sft/dataset.jsonl",
epochs=1,
peak_lr=2e-4,
)

print("Training complete!")


if __name__ == "__main__":
asyncio.run(main())
127 changes: 127 additions & 0 deletions dev/sft/sft-warmup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Minimal example demonstrating SFT -> RL -> SFT switching."""

import asyncio
import os
import random

from dotenv import load_dotenv

import art
from art.local import LocalBackend

# Simple SFT trajectories - teach model to respond "maybe"
SFT_TRAJECTORIES = [
art.Trajectory(
messages_and_choices=[
{"role": "user", "content": "respond with yes, no, or maybe"},
{"role": "assistant", "content": "maybe"},
],
reward=0.0, # reward unused for SFT
),
] * 10


async def rl_rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
"""Single RL rollout with reward based on response."""
messages: art.Messages = [{"role": "user", "content": prompt}]
client = model.openai_client()

completion = await client.chat.completions.create(
messages=messages,
model=model.get_inference_name(),
max_tokens=10,
timeout=30,
)
choice = completion.choices[0]
content = choice.message.content or ""

# Reward: "maybe" > "no" > "yes" > other
reward = {"maybe": 1.0, "no": 0.75, "yes": 0.5}.get(content.strip().lower(), 0.0)
return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)


async def main():
load_dotenv()

backend = LocalBackend()
model_name = "sft-warmup-" + "".join(
random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8)
)
model = art.TrainableModel(
name=model_name,
project="sft-warmup",
base_model="Qwen/Qwen2.5-7B-Instruct",
)
await model.register(backend)

# ========================================================================
# Phase 1: SFT
# ========================================================================
print("\n[Phase 1] SFT training...")
await model.train_sft(
SFT_TRAJECTORIES,
config=art.TrainSFTConfig(learning_rate=1e-5, batch_size=1),
)
print("SFT phase 1 complete.")

# ========================================================================
# Phase 2: RL (GRPO)
# ========================================================================
print("\n[Phase 2] RL training...")
prompt = "respond with yes, no, or maybe"

for i in range(10):
print(f" RL step {i + 1}")
train_groups = await art.gather_trajectory_groups(
[
art.TrajectoryGroup(rl_rollout(model, prompt) for _ in range(6))
for _ in range(12)
]
)
await model.train(train_groups)
print("RL phase 2 complete.")

# ========================================================================
# Phase 3: SFT again
# ========================================================================
print("\n[Phase 3] SFT training again...")
await model.train_sft(
SFT_TRAJECTORIES,
config=art.TrainSFTConfig(batch_size=1, learning_rate=1e-5),
)
print("SFT phase 3 complete.")

# ========================================================================
# Phase 4: RL (GRPO) again
# ========================================================================
print("\n[Phase 4] RL training...")
prompt = "respond with yes, no, or maybe"

for i in range(10):
print(f" RL step {i + 1}")
train_groups = await art.gather_trajectory_groups(
[
art.TrajectoryGroup(rl_rollout(model, prompt) for _ in range(6))
for _ in range(12)
]
)
await model.train(train_groups)
print("RL phase 4 complete.")

# ========================================================================
# Test: Check model output
# ========================================================================
print("\n[Test] Model output after training:")
client = model.openai_client()
completion = await client.chat.completions.create(
messages=[{"role": "user", "content": "respond with yes, no, or maybe"}],
model=model.get_inference_name(),
max_tokens=10,
)
print(f"Response: {completion.choices[0].message.content}")

print("\nAll phases complete!")


if __name__ == "__main__":
asyncio.run(main())
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies = [
"openai>=2.14.0",
"typer>=0.15.2",
"litellm>=1.71.1",
"weave>=0.52.23",
"weave>=0.52.24",
"polars>=1.26.0",
"tblib>=3.0.0",
"nest-asyncio>=1.6.0",
Expand All @@ -30,7 +30,7 @@ backend = [
"accelerate==1.7.0",
"awscli>=1.38.1",
"setuptools>=78.1.0",
"wandb==0.23.1",
"wandb==0.24.0",
"transformers>=4.55.2,<=4.57.3",
"duckdb>=1.0.0",
"pyarrow>=15.0.0",
Expand Down
2 changes: 2 additions & 0 deletions src/art/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def __init__(self, **kwargs):
Tools,
TrainConfig,
TrainResult,
TrainSFTConfig,
)
from .utils import retry
from .yield_trajectory import capture_yielded_trajectory, yield_trajectory
Expand All @@ -87,6 +88,7 @@ def __init__(self, **kwargs):
"Model",
"TrainableModel",
"retry",
"TrainSFTConfig",
"TrainConfig",
"TrainResult",
"Trajectory",
Expand Down
13 changes: 11 additions & 2 deletions src/art/backend.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import TYPE_CHECKING, Any, AsyncIterator, Iterable, Protocol, TypeAlias

from . import dev
from .trajectories import TrajectoryGroup
from .types import TrainConfig, TrainResult
from .trajectories import Trajectory, TrajectoryGroup
from .types import TrainConfig, TrainResult, TrainSFTConfig

if TYPE_CHECKING:
from .model import Model, TrainableModel
Expand Down Expand Up @@ -50,3 +50,12 @@ def _train_model(
dev_config: dev.TrainConfig,
verbose: bool = False,
) -> AsyncIterator[dict[str, float]]: ...

def _train_sft(
self,
model: AnyTrainableModel,
trajectories: Iterable[Trajectory],
config: TrainSFTConfig,
dev_config: dev.TrainSFTConfig,
verbose: bool = False,
) -> AsyncIterator[dict[str, float]]: ...
3 changes: 2 additions & 1 deletion src/art/dev/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
TrainerArgs,
)
from .openai_server import OpenAIServerConfig, ServerArgs, get_openai_server_config
from .train import TrainConfig
from .train import TrainConfig, TrainSFTConfig

__all__ = [
"EngineArgs",
Expand All @@ -23,5 +23,6 @@
"get_openai_server_config",
"OpenAIServerConfig",
"ServerArgs",
"TrainSFTConfig",
"TrainConfig",
]
4 changes: 4 additions & 0 deletions src/art/dev/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,7 @@ class TrainConfig(TypedDict, total=False):
scale_learning_rate_by_reward_std_dev: bool
scale_rewards: bool
truncated_importance_sampling: float | None


class TrainSFTConfig(TypedDict, total=False):
"""Experimental SFT configuration options. Use at your own risk."""
Loading
Loading