WIP: SFT (local backend) #530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

Kovbo wants to merge 60 commits into main from sft-local-backend

dev/sft/dataset.jsonl

Large diffs are not rendered by default.

dev/sft/distillation.py

-Original file line number
+Diff line change
@@ -0,0 +1,69 @@
+    """Distillation example: Train a small model using completions from a large model."""
+    import asyncio
+    import os
+    from dotenv import load_dotenv
+    from openai import AsyncOpenAI
+    import art
+    from art.local import LocalBackend
+    load_dotenv()
+    if not os.environ.get("OPENROUTER_API_KEY"):
+        raise ValueError("OPENROUTER_API_KEY environment variable is required")
+    TEACHER_MODEL = "qwen/qwen3-235b-a22b-2507"
+    STUDENT_BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
+    PROMPT = "Explain the concept of recursion in programming with a simple example."
+    async def main():
+        # Get completion from teacher model
+        teacher_client = AsyncOpenAI(
+            api_key=os.environ["OPENROUTER_API_KEY"],
+            base_url="https://openrouter.ai/api/v1",
+        )
+        print(f"Getting completion from teacher model ({TEACHER_MODEL})...")
+        completion = await teacher_client.chat.completions.create(
+            model=TEACHER_MODEL,
+            messages=[{"role": "user", "content": PROMPT}],
+        )
+        teacher_response = completion.choices[0].message.content
+        print(
+            f"Teacher response ({len(teacher_response)} chars):\n{teacher_response[:500]}..."
+        )
+        # Create trajectories from teacher completion
+        trajectories = [
+            art.Trajectory(
+                messages_and_choices=[
+                    {"role": "user", "content": PROMPT},
+                    {"role": "assistant", "content": teacher_response},
+                ],
+                reward=0.0,
+            )
+        ]
+        # Train student model
+        backend = LocalBackend()
+        student = art.TrainableModel(
+            name="sft-distillation-001",
+            project="sft-distillation",
+            base_model=STUDENT_BASE_MODEL,
+        )
+        await student.register(backend)
+        print(f"Training student model ({STUDENT_BASE_MODEL})...")
+        await student.train_sft(
+            trajectories,
+            config=art.TrainSFTConfig(learning_rate=2e-4),
+            verbose=True,
+        )
+        print("Training complete!")
+    if __name__ == "__main__":
+        asyncio.run(main())

dev/sft/sft-from-file.py

-Original file line number
+Diff line change
@@ -0,0 +1,35 @@
+    """Simple SFT training script using train_sft_from_file helper."""
+    import asyncio
+    import random
+    import art
+    from art.local import LocalBackend
+    from art.utils.sft import train_sft_from_file
+    async def main():
+        backend = LocalBackend()
+        model_name = "run-" + "".join(
+            random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8)
+        )
+        model = art.TrainableModel(
+            name=model_name,
+            project="sft-from-file",
+            base_model="meta-llama/Llama-3.1-8B-Instruct",
+        )
+        await model.register(backend)
+        await train_sft_from_file(
+            model=model,
+            file_path="dev/sft/dataset.jsonl",
+            epochs=1,
+            peak_lr=2e-4,
+        )
+        print("Training complete!")
+    if __name__ == "__main__":
+        asyncio.run(main())

dev/sft/sft-warmup.py

-Original file line number
+Diff line change
@@ -0,0 +1,127 @@
+    """Minimal example demonstrating SFT -> RL -> SFT switching."""
+    import asyncio
+    import os
+    import random
+    from dotenv import load_dotenv
+    import art
+    from art.local import LocalBackend
+    # Simple SFT trajectories - teach model to respond "maybe"
+    SFT_TRAJECTORIES = [
+        art.Trajectory(
+            messages_and_choices=[
+                {"role": "user", "content": "respond with yes, no, or maybe"},
+                {"role": "assistant", "content": "maybe"},
+            ],
+            reward=0.0,  # reward unused for SFT
+        ),
+    ] * 10
+    async def rl_rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
+        """Single RL rollout with reward based on response."""
+        messages: art.Messages = [{"role": "user", "content": prompt}]
+        client = model.openai_client()
+        completion = await client.chat.completions.create(
+            messages=messages,
+            model=model.get_inference_name(),
+            max_tokens=10,
+            timeout=30,
+        )
+        choice = completion.choices[0]
+        content = choice.message.content or ""
+        # Reward: "maybe" > "no" > "yes" > other
+        reward = {"maybe": 1.0, "no": 0.75, "yes": 0.5}.get(content.strip().lower(), 0.0)
+        return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)
+    async def main():
+        load_dotenv()
+        backend = LocalBackend()
+        model_name = "sft-warmup-" + "".join(
+            random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8)
+        )
+        model = art.TrainableModel(
+            name=model_name,
+            project="sft-warmup",
+            base_model="Qwen/Qwen2.5-7B-Instruct",
+        )
+        await model.register(backend)
+        # ========================================================================
+        # Phase 1: SFT
+        # ========================================================================
+        print("\n[Phase 1] SFT training...")
+        await model.train_sft(
+            SFT_TRAJECTORIES,
+            config=art.TrainSFTConfig(learning_rate=1e-5, batch_size=1),
+        )
+        print("SFT phase 1 complete.")
+        # ========================================================================
+        # Phase 2: RL (GRPO)
+        # ========================================================================
+        print("\n[Phase 2] RL training...")
+        prompt = "respond with yes, no, or maybe"
+        for i in range(10):
+            print(f"  RL step {i + 1}")
+            train_groups = await art.gather_trajectory_groups(
+                [
+                    art.TrajectoryGroup(rl_rollout(model, prompt) for _ in range(6))
+                    for _ in range(12)
+                ]
+            )
+            await model.train(train_groups)
+        print("RL phase 2 complete.")
+        # ========================================================================
+        # Phase 3: SFT again
+        # ========================================================================
+        print("\n[Phase 3] SFT training again...")
+        await model.train_sft(
+            SFT_TRAJECTORIES,
+            config=art.TrainSFTConfig(batch_size=1, learning_rate=1e-5),
+        )
+        print("SFT phase 3 complete.")
+        # ========================================================================
+        # Phase 4: RL (GRPO) again
+        # ========================================================================
+        print("\n[Phase 4] RL training...")
+        prompt = "respond with yes, no, or maybe"
+        for i in range(10):
+            print(f"  RL step {i + 1}")
+            train_groups = await art.gather_trajectory_groups(
+                [
+                    art.TrajectoryGroup(rl_rollout(model, prompt) for _ in range(6))
+                    for _ in range(12)
+                ]
+            )
+            await model.train(train_groups)
+        print("RL phase 4 complete.")
+        # ========================================================================
+        # Test: Check model output
+        # ========================================================================
+        print("\n[Test] Model output after training:")
+        client = model.openai_client()
+        completion = await client.chat.completions.create(
+            messages=[{"role": "user", "content": "respond with yes, no, or maybe"}],
+            model=model.get_inference_name(),
+            max_tokens=10,
+        )
+        print(f"Response: {completion.choices[0].message.content}")
+        print("\nAll phases complete!")
+    if __name__ == "__main__":
+        asyncio.run(main())

pyproject.toml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -8,7 +8,7 @@ dependencies = [
  
        "openai>=2.14.0",

        "typer>=0.15.2",

        "litellm>=1.71.1",

        "weave>=0.52.23",

        "weave>=0.52.24",

        "polars>=1.26.0",

        "tblib>=3.0.0",

        "nest-asyncio>=1.6.0",

    @@ -30,7 +30,7 @@ backend = [
  
        "accelerate==1.7.0",

        "awscli>=1.38.1",

        "setuptools>=78.1.0",

        "wandb==0.23.1",

        "wandb==0.24.0",

        "transformers>=4.55.2,<=4.57.3",

        "duckdb>=1.0.0",

        "pyarrow>=15.0.0",

src/art/__init__.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -65,6 +65,7 @@ def __init__(self, **kwargs): @@
         Tools,
         TrainConfig,
         TrainResult,
+        TrainSFTConfig,
     )
     from .utils import retry
     from .yield_trajectory import capture_yielded_trajectory, yield_trajectory
@@ Expand All / @@ -87,6 +88,7 @@ def __init__(self, **kwargs): @@
         "Model",
         "TrainableModel",
         "retry",
+        "TrainSFTConfig",
         "TrainConfig",
         "TrainResult",
         "Trajectory",
@@ Expand Down @@

src/art/backend.py

-Original file line number
+Diff line change
@@ -1,8 +1,8 @@
     from typing import TYPE_CHECKING, Any, AsyncIterator, Iterable, Protocol, TypeAlias
     from . import dev
-    from .trajectories import TrajectoryGroup
-    from .types import TrainConfig, TrainResult
+    from .trajectories import Trajectory, TrajectoryGroup
+    from .types import TrainConfig, TrainResult, TrainSFTConfig
     if TYPE_CHECKING:
         from .model import Model, TrainableModel
@@ Expand Down Expand Up / @@ -50,3 +50,12 @@ def _train_model( @@
             dev_config: dev.TrainConfig,
             verbose: bool = False,
         ) -> AsyncIterator[dict[str, float]]: ...
+        def _train_sft(
+            self,
+            model: AnyTrainableModel,
+            trajectories: Iterable[Trajectory],
+            config: TrainSFTConfig,
+            dev_config: dev.TrainSFTConfig,
+            verbose: bool = False,
+        ) -> AsyncIterator[dict[str, float]]: ...

src/art/dev/__init__.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -9,7 +9,7 @@ @@
         TrainerArgs,
     )
     from .openai_server import OpenAIServerConfig, ServerArgs, get_openai_server_config
-    from .train import TrainConfig
+    from .train import TrainConfig, TrainSFTConfig
     __all__ = [
         "EngineArgs",
@@ Expand All / @@ -23,5 +23,6 @@ @@
         "get_openai_server_config",
         "OpenAIServerConfig",
         "ServerArgs",
+        "TrainSFTConfig",
         "TrainConfig",
     ]

src/art/dev/train.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -27,3 +27,7 @@ class TrainConfig(TypedDict, total=False): @@
         scale_learning_rate_by_reward_std_dev: bool
         scale_rewards: bool
         truncated_importance_sampling: float | None
+    class TrainSFTConfig(TypedDict, total=False):
+        """Experimental SFT configuration options. Use at your own risk."""

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

WIP: SFT (local backend) #530

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!

WIP: SFT (local backend) #530

Are you sure you want to change the base?

Uh oh!

WIP: SFT (local backend) #530

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!