google
diff --git a/‎examples/deepscaler/train_deepscaler_nb.py‎
Lines changed: 14 additions & 5 deletions b/‎examples/deepscaler/train_deepscaler_nb.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎tests/perf/experimental/export_test.py‎
Lines changed: 30 additions & 0 deletions b/‎tests/perf/experimental/export_test.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎tests/perf/experimental/perfetto_test.py‎
Lines changed: 84 additions & 0 deletions b/‎tests/perf/experimental/perfetto_test.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎tests/perf/experimental/timeline_test.py‎
Lines changed: 220 additions & 0 deletions b/‎tests/perf/experimental/timeline_test.py‎
Lines changed: 220 additions & 0 deletions
@@ -70,6 +70,8 @@
   from tunix.utils import math_rewards
   from tunix.utils import compat
   from tunix.cli.utils import data as data_lib
+  from tunix import PerfMetricsConfig
+  from tunix.perf.experimental.export import PerfMetricsExport
 
 try:
   import pathwaysutils
@@ -109,7 +111,7 @@
 # The number of times the policy generates multiple responses for a given prompt
 # within a single training step. This corresponds to `G` in Algorithm 1 in the
 # paper. The "group" in GRPO comes from here.
-NUM_GENERATIONS = 8
+NUM_GENERATIONS = 2
 
 # === other GRPO configs ===
 # The number of iterations per batch (𝜇 in GRPO algo 1).
@@ -125,15 +127,15 @@
 
 # ====== Training ======
 ENABLE_REMAT = True
-BATCH_SIZE = 128
-MINI_BATCH_SIZE = 64
+BATCH_SIZE = 4
+MINI_BATCH_SIZE = 2
 NUM_BATCHES = 100
 # Keep `NUM_TEST_BATCHES` low so that evaluation runs quickly. It can be
 # increased to a max. of 330 (if batch size is 4).
 NUM_TEST_BATCHES = 50
 
-EVAL_EVERY_N_STEPS = 1000  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
-NUM_EPOCHS = 100  # can potentially train for more epochs
+EVAL_EVERY_N_STEPS = 50  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
+NUM_EPOCHS = 10  # can potentially train for more epochs
 
 # Number of training steps.
 MAX_STEPS = int(NUM_BATCHES * NUM_ITERATIONS * TRAIN_FRACTION * NUM_EPOCHS)
@@ -529,13 +531,20 @@ def get_lora_model(base_model, model_mesh):
     max_concurrency=MAX_CONCURRENCY,
 )
 
+# Perf Metrics logging
+perf_metrics_config = PerfMetricsConfig()
+perf_metrics_config.custom_export_fn_v2 = PerfMetricsExport(
+    "/tmp/agentic_perf"
+).export_metrics
+
 # %%
 # RL cluster
 rl_cluster = rl_cluster_lib.RLCluster(
     actor=qwen2_actor,
     reference=qwen2_ref,
     tokenizer=tokenizer,
     cluster_config=cluster_config,
+    perf_config=perf_metrics_config,
 )
 
 show_hbm_usage("after RLCluster creation")
 
@@ -0,0 +1,30 @@
+"""Tests for export."""
+
+import os
+import pathlib
+import time
+from absl.testing import absltest
+from tunix.perf.experimental import export
+from tunix.perf.experimental import tracer
+
+
+class ExportTest(absltest.TestCase):
+
+  def test_perf_metrics_export(self):
+    # Backward compatibility check
+    tmp_dir = pathlib.Path(self.create_tempdir().full_path)
+    exporter = export.PerfMetricsExport(trace_dir=tmp_dir)
+
+    # Create dummy timeline
+    t = tracer.PerfTracer(export_fn=exporter.export_metrics)
+    with t.span("test_span"):
+      time.sleep(0.001)
+    t.export()
+
+    files = os.listdir(tmp_dir)
+    self.assertLen(files, 1)
+    self.assertTrue(files[0].startswith("perfetto_trace_v2_"))
+
+
+if __name__ == "__main__":
+  absltest.main()
@@ -0,0 +1,84 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for perfetto."""
+
+import os
+import tempfile
+import time
+
+from absl.testing import absltest
+from tunix.perf.experimental import perfetto
+from tunix.perf.experimental import tracer
+
+
+class PerfettoTest(absltest.TestCase):
+
+  def test_create_span_name(self):
+    # Test basic span name with global_step
+    name = perfetto._create_span_name("my_span", {"global_step": 10})
+    self.assertEqual(name, "my_span (step=10)")
+
+    # Test peft_train_step with role
+    name = perfetto._create_span_name(
+        "peft_train_step", {"global_step": 20, "role": "actor"}
+    )
+    self.assertEqual(name, "peft_train_step (step=20, role=actor)")
+
+    # Test rollout with group_id and pair_index
+    name = perfetto._create_span_name(
+        "rollout", {"group_id": 5, "pair_index": 3, "global_step": 100}
+    )
+    self.assertEqual(name, "rollout (step=100, group_id=5, pair_index=3)")
+
+    # Test rollout with missing pair_index
+    name = perfetto._create_span_name("rollout", {"group_id": 5})
+    self.assertEqual(name, "rollout (group_id=5)")
+
+    # Test unknown name with extra tags (should ignore specific logic but keep step)
+    name = perfetto._create_span_name(
+        "unknown_span", {"role": "actor", "global_step": 50}
+    )
+    self.assertEqual(name, "unknown_span (step=50)")
+
+    # Test no tags
+    name = perfetto._create_span_name("simple_span", {})
+    self.assertEqual(name, "simple_span")
+
+  # TODO(noghabi): Add more tests for PerfettoTraceWriter.
+  def test_perfetto_trace_writer(self):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+      writer = perfetto.PerfettoTraceWriter(trace_dir=tmp_dir)
+
+      # Create some dummy timelines
+      t = tracer.Timeline("test_timeline", time.perf_counter())
+      s = t.start_span("test_span", time.perf_counter())
+      time.sleep(0.001)
+      t.stop_span(time.perf_counter())
+
+      timelines = {"test_timeline": t}
+
+      writer.write_timelines(timelines)
+
+      # Check if file was created
+      files = os.listdir(tmp_dir)
+      self.assertLen(files, 1)
+      self.assertTrue(files[0].startswith("perfetto_trace_v2_"))
+      self.assertTrue(files[0].endswith(".pb"))
+
+      # We could parse the proto back to verify content, but just existence is good for now.
+
+
+if __name__ == "__main__":
+  absltest.main()
@@ -0,0 +1,220 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import threading
+from unittest import mock
+from absl.testing import absltest
+from tunix.perf.experimental import constants
+from tunix.perf.experimental import timeline
+
+
+class SpanTest(absltest.TestCase):
+
+  def test_span(self):
+    s = timeline.Span(name="test", begin=1.0, id=0)
+    self.assertEqual(s.name, "test")
+    self.assertEqual(s.begin, 1.0)
+    self.assertEqual(s.end, float("inf"))
+    self.assertEqual(s.ended, False)
+    self.assertEqual(s.duration, float("inf"))
+
+  def test_span_with_tags(self):
+    tags_dict = {constants.GLOBAL_STEP: 1, "custom_tag": "value"}
+    s = timeline.Span(name="test_tags", begin=1.0, id=0, tags=tags_dict)
+    self.assertEqual(s.tags, tags_dict)
+    self.assertIn("tags=", repr(s))
+    self.assertIn("global_step", repr(s))
+
+  def test_add_tag(self):
+    s = timeline.Span(name="test_add_tag", begin=1.0, id=0)
+    s.add_tag("foo", "bar")
+    self.assertEqual(s.tags, {"foo": "bar"})
+    s.add_tag(constants.GLOBAL_STEP, 100)
+    self.assertEqual(s.tags, {"foo": "bar", "global_step": 100})
+
+  def test_add_tag_overwrite_warning(self):
+    s = timeline.Span(name="test_add_tag_overwrite", begin=1.0, id=0)
+    s.add_tag("foo", "bar")
+    with self.assertLogs(level="WARNING") as cm:
+      s.add_tag("foo", "baz")
+    self.assertEqual(s.tags, {"foo": "baz"})
+    self.assertTrue(
+        any(
+            "Tag 'foo' already exists with value 'bar'. Overwriting with 'baz'."
+            in o
+            for o in cm.output
+        )
+    )
+
+  def test_repr_with_born_at(self):
+    born_at = 100.0
+    s = timeline.Span(name="test_born_at", begin=101.0, id=0)
+    s.end = 105.0
+
+    # Check default repr (born_at=0.0)
+    expected_default = "[0] test_born_at: 101.000000, 105.000000"
+    self.assertEqual(repr(s), expected_default)
+
+    # Check repr with explicit born_at
+    expected_adjusted = "[0] test_born_at: 1.000000, 5.000000"
+    self.assertEqual(s.__repr__(born_at=born_at), expected_adjusted)
+
+
+class TimelineTest(absltest.TestCase):
+
+  def test_basic_span_lifecycle(self):
+    t = timeline.Timeline("test_tl", 100.0)
+    s = t.start_span("span1", 101.0)
+    self.assertEqual(s.name, "span1")
+    self.assertEqual(s.begin, 101.0)
+    self.assertEqual(s.id, 0)
+    self.assertIsNone(s.parent_id)
+    self.assertFalse(s.ended)
+
+    t.stop_span(102.0)
+    self.assertTrue(s.ended)
+    self.assertEqual(s.end, 102.0)
+
+  def test_nested_spans(self):
+    t = timeline.Timeline("test_tl", 0.0)
+    s1 = t.start_span("root", 1.0)
+    s2 = t.start_span("child", 2.0)
+
+    self.assertEqual(s2.parent_id, s1.id)
+
+    t.stop_span(3.0)  # stops s2
+    self.assertEqual(s2.end, 3.0)
+
+    t.stop_span(4.0)  # stops s1
+    self.assertEqual(s1.end, 4.0)
+
+  def test_stop_span_error_cases(self):
+    t = timeline.Timeline("test_tl", 0.0)
+    with self.assertRaisesRegex(ValueError, "no more spans to end"):
+      t.stop_span(1.0)
+
+    s = t.start_span("s1", 2.0)
+    # End before begin
+    with mock.patch("absl.logging.error") as mock_log:
+      t.stop_span(1.0)
+      mock_log.assert_called_once()
+    self.assertEqual(s.end, 1.0)
+
+  def test_nested_timeline_with_tags_repr(self):
+    born = 1000.0
+    t = timeline.Timeline("test_tl", born)
+
+    # Start root
+    s_root = t.start_span("root", born + 1.0)
+    s_root.add_tag("type", "root_span")
+
+    # Start nested
+    s_child = t.start_span("child", born + 2.0)
+    s_child.add_tag("iter", 1)
+
+    # Stop nested
+    t.stop_span(born + 3.0)
+
+    # Stop root
+    t.stop_span(born + 4.0)
+
+    # Check tags are stored correctly
+    self.assertEqual(s_root.tags, {"type": "root_span"})
+    self.assertEqual(s_child.tags, {"iter": 1})
+
+    # Check full repr string
+    expected_repr = (
+        f"Timeline(test_tl, {born:.6f})\n"
+        "[0] root: 1.000000, 4.000000, tags={'type': 'root_span'}\n"
+        "[1] child: 2.000000, 3.000000 (parent=0), tags={'iter': 1}\n"
+    )
+    self.assertEqual(repr(t), expected_repr)
+
+
+class AsyncTimelineTest(absltest.TestCase):
+
+  def setUp(self):
+    self.patcher = mock.patch("tunix.perf.experimental.timeline._async_wait")
+    self.mock_async_wait = self.patcher.start()
+
+    # Setup mock behavior for _async_wait to immediately succeed by default
+    def default_wait(waitlist, success, failure):
+      success()
+      return mock.Mock(spec=threading.Thread)
+
+    self.mock_async_wait.side_effect = default_wait
+
+  def tearDown(self):
+    self.patcher.stop()
+
+  def test_span_success(self):
+    t = timeline.AsyncTimeline("dev", 0.0)
+    waitlist = ["thing"]
+
+    t.span("async_op", 1.0, waitlist)
+
+    self.mock_async_wait.assert_called_once()
+    self.assertEqual(len(t.spans), 1)
+    s = t.spans[0]
+    self.assertEqual(s.name, "async_op")
+    self.assertEqual(s.begin, 1.0)
+    self.assertTrue(s.ended)  # Ended because mock calls success immediately
+
+  def test_span_with_no_waitlist(self):
+    t = timeline.AsyncTimeline("dev", 0.0)
+    t.span("immediate", 1.0, [])
+    self.mock_async_wait.assert_not_called()
+    self.assertEqual(len(t.spans), 1)
+    self.assertTrue(t.spans[0].ended)
+
+  def test_delayed_completion(self):
+    t = timeline.AsyncTimeline("dev", 0.0)
+
+    # Capture callbacks
+    callbacks = {}
+
+    def capture_wait(waitlist, success, failure):
+      callbacks["success"] = success
+      callbacks["failure"] = failure
+      return mock.Mock(spec=threading.Thread)
+
+    self.mock_async_wait.side_effect = capture_wait
+
+    t.span("delayed", 1.0, ["wait"])
+
+    self.assertEqual(len(t.spans), 0)  # Not yet recorded
+
+    # Simulate completion
+    with mock.patch("time.perf_counter", return_value=5.0):
+      callbacks["success"]()
+
+    self.assertEqual(len(t.spans), 1)
+    s = t.spans[0]
+    self.assertEqual(s.end, 5.0)
+
+  def test_failure(self):
+    t = timeline.AsyncTimeline("dev", 0.0)
+
+    def fail_wait(waitlist, success, failure):
+      failure(RuntimeError("failed"))
+      return mock.Mock(spec=threading.Thread)
+
+    self.mock_async_wait.side_effect = fail_wait
+
+    with self.assertRaisesRegex(RuntimeError, "failed"):
+      t.span("failed", 1.0, ["wait"])
+
+
+if __name__ == "__main__":
+  absltest.main()