MoonshotAI · leonidlouis · Apr 4, 2026 · chatgpt-codex-connector · Apr 4, 2026 · chatgpt-codex-connector
diff --git a/src/kimi_cli/config.py b/src/kimi_cli/config.py
@@ -197,6 +197,10 @@ class Config(BaseModel):
         default="dark",
         description="Terminal color theme. Use 'light' for light terminal backgrounds.",
     )
+    show_tps_meter: bool = Field(
+        default=False,
+        description="Show tokens-per-second (TPS) meter in the status bar",
+    )
     models: dict[str, LLMModel] = Field(default_factory=dict, description="List of LLM models")
     providers: dict[str, LLMProvider] = Field(
         default_factory=dict, description="List of LLM providers"

diff --git a/src/kimi_cli/soul/__init__.py b/src/kimi_cli/soul/__init__.py
@@ -96,6 +96,8 @@ class StatusSnapshot:
     """The maximum number of tokens the context can hold."""
     mcp_status: MCPStatusSnapshot | None = None
     """The current MCP startup snapshot, if MCP is configured."""
+    tps: float = 0.0
+    """Current tokens-per-second rate during streaming. 0 when not streaming."""
 
 
 @runtime_checkable

diff --git a/src/kimi_cli/soul/kimisoul.py b/src/kimi_cli/soul/kimisoul.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
 import asyncio
+import time
 import uuid
+from collections import deque
 from collections.abc import Awaitable, Callable, Sequence
 from dataclasses import dataclass
 from functools import partial
@@ -17,6 +19,7 @@
     APIStatusError,
     APITimeoutError,
     RetryableChatProvider,
+    StreamedMessagePart,
 )
 from kosong.message import Message
 from tenacity import RetryCallState, retry_if_exception, stop_after_attempt, wait_exponential_jitter
@@ -79,6 +82,7 @@
     StepBegin,
     StepInterrupted,
     TextPart,
+    ThinkPart,
     ToolResult,
     TurnBegin,
     TurnEnd,
@@ -148,6 +152,10 @@ def __init__(
         self._steer_queue: asyncio.Queue[str | list[ContentPart]] = asyncio.Queue()
         self._plan_mode: bool = self._runtime.session.state.plan_mode
         self._plan_session_id: str | None = self._runtime.session.state.plan_session_id
+        # TPS tracking for streaming tokens
+        self._streaming_token_timestamps: deque[tuple[float, float]] = deque()
+        self._streaming_token_count: float = 0.0
+        self._tps_window_seconds: float = 3.0
         # Pre-warm slug cache so the persisted slug survives process restarts
         if self._plan_session_id is not None and self._runtime.session.state.plan_slug is not None:
             from kimi_cli.tools.plan.heroes import seed_slug_cache
@@ -380,6 +388,7 @@ def status(self) -> StatusSnapshot:
             context_tokens=token_count,
             max_context_tokens=max_size,
             mcp_status=self._mcp_status_snapshot(),
+            tps=self._calculate_tps(),
         )
 
     @property
@@ -428,6 +437,62 @@ def steer(self, content: str | list[ContentPart]) -> None:
         """Queue a steer message for injection into the current turn."""
         self._steer_queue.put_nowait(content)
 
+    def _track_streaming_tokens(self, token_count: float) -> None:
+        """Track tokens received during streaming for TPS calculation."""
+        now = time.monotonic()
+        self._streaming_token_count += token_count
+        self._streaming_token_timestamps.append((now, self._streaming_token_count))
+        # Prune old entries outside the rolling window
+        cutoff = now - self._tps_window_seconds
+        while self._streaming_token_timestamps and self._streaming_token_timestamps[0][0] < cutoff:
+            self._streaming_token_timestamps.popleft()
+
+    def _reset_streaming_tps(self) -> None:
+        """Reset TPS tracking when streaming ends or a new step begins."""
+        self._streaming_token_timestamps.clear()
+        self._streaming_token_count = 0.0
+
+    def _calculate_tps(self) -> float:
+        """Calculate current tokens-per-second over the rolling window."""
+        if len(self._streaming_token_timestamps) < 2:
+            return 0.0
+        first_time, first_tokens = self._streaming_token_timestamps[0]
+        last_time, last_tokens = self._streaming_token_timestamps[-1]
+        duration = last_time - first_time
+        if duration <= 0:
+            return 0.0
+        tokens = last_tokens - first_tokens
+        return tokens / duration
+
+    @staticmethod
+    def _estimate_tokens_for_tps(text: str) -> float:
+        """Estimate token count for TPS calculation.
+
+        Uses simple heuristics for mixed CJK/Latin text:
+        - CJK characters: ~1.5 tokens each
+        - Other characters: ~1 token per 4 characters
+        """
+        cjk_count = 0
+        other_count = 0
+        for ch in text:
+            cp = ord(ch)
+            if (
+                0x4E00 <= cp <= 0x9FFF  # CJK Unified Ideographs
+                or 0x3400 <= cp <= 0x4DBF  # CJK Extension A
+                or 0xF900 <= cp <= 0xFAFF  # CJK Compatibility
+                or 0x3000 <= cp <= 0x303F  # CJK Symbols
+                or 0xFF00 <= cp <= 0xFFEF  # Fullwidth Forms
+                or 0x3040 <= cp <= 0x309F  # Hiragana
+                or 0x30A0 <= cp <= 0x30FF  # Katakana
+                or 0xAC00 <= cp <= 0xD7AF  # Hangul Syllables
+                or 0x1100 <= cp <= 0x11FF  # Hangul Jamo
+                or 0x3130 <= cp <= 0x318F  # Hangul Compatibility Jamo
+            ):
+                cjk_count += 1
+            else:
+                other_count += 1
+        return cjk_count * 1.5 + other_count / 4
+
     async def _consume_pending_steers(self) -> bool:
         """Drain the steer queue and inject as follow-up user messages.
 
@@ -691,6 +756,8 @@ async def _agent_loop(self) -> TurnOutcome:
                 raise MaxStepsReached(self._loop_control.max_steps_per_turn)
 
             wire_send(StepBegin(n=step_no))
+            # Reset TPS tracking at the start of each step
+            self._reset_streaming_tps()
             back_to_the_future: BackToTheFuture | None = None
             step_outcome: StepOutcome | None = None
             try:
@@ -806,14 +873,26 @@ async def _append_notification(view: NotificationView) -> None:
         # Normalize: merge adjacent user messages for clean API input
         effective_history = normalize_history(self._context.history)
 
+        # Create a wrapped callback to track streaming tokens for TPS calculation
+        def _track_and_wire_send(part: StreamedMessagePart) -> None:
+            """Track tokens from streaming content and send to wire."""
+            match part:
+                case TextPart(text=text) | ThinkPart(think=text):
+                    if text:
+                        # Estimate tokens for TPS calculation
+                        self._track_streaming_tokens(self._estimate_tokens_for_tps(text))
+                case _:
+                    pass  # Other parts don't contain tokens to track
+            wire_send(part)
+
         async def _run_step_once() -> StepResult:
             # run an LLM step (may be interrupted)
             return await kosong.step(
                 chat_provider,
                 self._agent.system_prompt,
                 self._agent.toolset,
                 effective_history,
-                on_message_part=wire_send,
+                on_message_part=_track_and_wire_send,
                 on_tool_result=wire_send,
             )
 
@@ -843,6 +922,7 @@ async def _kosong_step_with_retry() -> StepResult:
             status_update.context_usage = snap.context_usage
             status_update.context_tokens = snap.context_tokens
             status_update.max_context_tokens = snap.max_context_tokens
+            status_update.tps = snap.tps
         wire_send(status_update)
 
         # wait for all tool results (may be interrupted)

diff --git a/src/kimi_cli/ui/shell/__init__.py b/src/kimi_cli/ui/shell/__init__.py
@@ -298,8 +298,10 @@ async def run(self, command: str | None = None) -> bool:
         # Initialize theme from config
         if isinstance(self.soul, KimiSoul):
             from kimi_cli.ui.theme import set_active_theme
+            from kimi_cli.ui.tps_meter import set_show_tps_meter
 
             set_active_theme(self.soul.runtime.config.theme)
+            set_show_tps_meter(self.soul.runtime.config.show_tps_meter)
 
         if command is not None:
             # run single command and exit
@@ -984,9 +986,11 @@ def _activate_prompt_approval_modal(self) -> None:
                 current_request,
                 on_response=self._handle_prompt_approval_response,
                 buffer_text_provider=(
-                    lambda: self._prompt_session._session.default_buffer.text  # pyright: ignore[reportPrivateUsage]
-                    if self._prompt_session is not None
-                    else ""
+                    lambda: (
+                        self._prompt_session._session.default_buffer.text  # pyright: ignore[reportPrivateUsage]
+                        if self._prompt_session is not None
+                        else ""
+                    )
                 ),
                 text_expander=self._prompt_session._get_placeholder_manager().serialize_for_history,  # pyright: ignore[reportPrivateUsage]
             )

diff --git a/src/kimi_cli/ui/shell/prompt.py b/src/kimi_cli/ui/shell/prompt.py
@@ -63,6 +63,7 @@
     sanitize_surrogates,
 )
 from kimi_cli.ui.theme import get_prompt_style, get_toolbar_colors
+from kimi_cli.ui.tps_meter import get_show_tps_meter
 from kimi_cli.utils.clipboard import (
     grab_media_from_clipboard,
     is_clipboard_available,
@@ -2152,9 +2153,12 @@ def _get_one_rotating_tip(self) -> str | None:
     def _render_right_span(status: StatusSnapshot) -> str:
         current_toast = _current_toast("right")
         if current_toast is None:
-            return format_context_status(
+            context_str = format_context_status(
                 status.context_usage,
                 status.context_tokens,
                 status.max_context_tokens,
             )
+            if get_show_tps_meter() and status.tps > 0:
+                return f"{context_str} · {status.tps:.1f} tok/s"
+            return context_str
         return current_toast.message
diff --git a/src/kimi_cli/ui/shell/slash.py b/src/kimi_cli/ui/shell/slash.py
@@ -645,6 +645,57 @@ def theme(app: Shell, args: str):
     raise Reload(session_id=soul.runtime.session.id)
 
 
+@registry.command
+@shell_mode_registry.command
+def tps(app: Shell, args: str):
+    """Toggle TPS (tokens-per-second) meter display in status bar"""
+    from kimi_cli.ui.tps_meter import get_show_tps_meter, set_show_tps_meter
+
+    soul = ensure_kimi_soul(app)
+    if soul is None:
+        return
+
+    current = get_show_tps_meter()
+    arg = args.strip().lower()
+
+    if not arg:
+        status = "on" if current else "off"
+        console.print(f"TPS meter: [bold]{status}[/bold]")
+        console.print("[grey50]Usage: /tps on | /tps off[/grey50]")
+        return
+
+    if arg not in ("on", "off"):
+        console.print(f"[red]Invalid argument: {arg}. Use 'on' or 'off'.[/red]")
+        return
+
+    new_value = arg == "on"
+
+    if new_value == current:
+        console.print(f"[yellow]TPS meter is already {arg}.[/yellow]")
+        return
+
+    config_file = soul.runtime.config.source_file
+    if config_file is None:
+        console.print(
+            "[yellow]TPS toggle requires a config file; "
+            "restart without --config to persist this setting.[/yellow]"
+        )
+        return
+
+    # Persist to disk first — only update in-memory state after success
+    try:
+        config_for_save = load_config(config_file)
+        config_for_save.show_tps_meter = new_value
+        save_config(config_for_save, config_file)
+    except (ConfigError, OSError) as exc:
+        console.print(f"[red]Failed to save config: {exc}[/red]")
+        return
+
+    # Update in-memory state immediately (no reload needed for TPS)
+    set_show_tps_meter(new_value)
+    console.print(f"[green]TPS meter {'enabled' if new_value else 'disabled'}.[/green]")
+
+
 @registry.command
 def web(app: Shell, args: str):
     """Open Kimi Code Web UI in browser"""

diff --git a/src/kimi_cli/ui/shell/visualize.py b/src/kimi_cli/ui/shell/visualize.py
@@ -50,6 +50,7 @@
     prompt_other_input,
     show_question_body_in_pager,
 )
+from kimi_cli.ui.tps_meter import get_show_tps_meter
 from kimi_cli.utils.aioqueue import Queue, QueueShutDown
 from kimi_cli.utils.logging import logger
 from kimi_cli.utils.rich.columns import BulletColumns
@@ -634,6 +635,7 @@ def __init__(self, initial: StatusUpdate) -> None:
         self._context_usage: float = 0.0
         self._context_tokens: int = 0
         self._max_context_tokens: int = 0
+        self._tps: float = 0.0
         self.update(initial)
 
     def render(self) -> RenderableType:
@@ -646,12 +648,22 @@ def update(self, status: StatusUpdate) -> None:
             self._context_tokens = status.context_tokens
         if status.max_context_tokens is not None:
             self._max_context_tokens = status.max_context_tokens
-        if status.context_usage is not None:
-            self.text.plain = format_context_status(
-                self._context_usage,
-                self._context_tokens,
-                self._max_context_tokens,
-            )
+        if status.tps is not None:
+            self._tps = status.tps
+        # Only refresh if context_usage or tps is provided (fields that affect display)
+        if status.context_usage is not None or status.tps is not None:
+            self._refresh_text()
+
+    def _refresh_text(self) -> None:
+        context_str = format_context_status(
+            self._context_usage,
+            self._context_tokens,
+            self._max_context_tokens,
+        )
+        if get_show_tps_meter() and self._tps > 0:
+            self.text.plain = f"{context_str} · {self._tps:.1f} tok/s"
+        else:
+            self.text.plain = context_str
 
 
 @asynccontextmanager

diff --git a/src/kimi_cli/ui/tps_meter.py b/src/kimi_cli/ui/tps_meter.py
@@ -0,0 +1,27 @@
+"""TPS meter display preference - mirrors the theme pattern.
+
+This module provides a global state for the TPS meter display setting,
+similar to how theme.py manages the active color theme.
+"""
+
+# Module-level private state
+_show_tps_meter: bool = False
+
+
+def set_show_tps_meter(enabled: bool) -> None:
+    """Set whether the TPS meter should be displayed in the status bar.
+
+    Args:
+        enabled: True to show the TPS meter, False to hide it.
+    """
+    global _show_tps_meter
+    _show_tps_meter = enabled
+
+
+def get_show_tps_meter() -> bool:
+    """Get whether the TPS meter should be displayed.
+
+    Returns:
+        True if the TPS meter should be shown, False otherwise.
+    """
+    return _show_tps_meter
diff --git a/src/kimi_cli/wire/types.py b/src/kimi_cli/wire/types.py
@@ -176,6 +176,8 @@ class StatusUpdate(BaseModel):
     """Whether plan mode (read-only) is active. None means no change."""
     mcp_status: MCPStatusSnapshot | None = None
     """The current MCP startup snapshot. None means no change."""
+    tps: float | None = None
+    """Current tokens-per-second rate during streaming. None when not streaming."""
 
 
 class Notification(BaseModel):

diff --git a/tests/core/test_config.py b/tests/core/test_config.py
@@ -26,8 +26,7 @@ def test_default_config_dump():
             "default_yolo": False,
             "default_plan_mode": False,
             "default_editor": "",
-            "theme": "dark",
-            "models": {},
+            "theme": "dark", "show_tps_meter": False, "models": {},
             "providers": {},
             "loop_control": {
                 "max_steps_per_turn": 100,