Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/kimi_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ class Config(BaseModel):
default="dark",
description="Terminal color theme. Use 'light' for light terminal backgrounds.",
)
show_tps_meter: bool = Field(
default=False,
description="Show tokens-per-second (TPS) meter in the status bar",
)
models: dict[str, LLMModel] = Field(default_factory=dict, description="List of LLM models")
providers: dict[str, LLMProvider] = Field(
default_factory=dict, description="List of LLM providers"
Expand Down
2 changes: 2 additions & 0 deletions src/kimi_cli/soul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ class StatusSnapshot:
"""The maximum number of tokens the context can hold."""
mcp_status: MCPStatusSnapshot | None = None
"""The current MCP startup snapshot, if MCP is configured."""
tps: float = 0.0
"""Current tokens-per-second rate during streaming. 0 when not streaming."""


@runtime_checkable
Expand Down
82 changes: 81 additions & 1 deletion src/kimi_cli/soul/kimisoul.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import asyncio
import time
import uuid
from collections import deque
from collections.abc import Awaitable, Callable, Sequence
from dataclasses import dataclass
from functools import partial
Expand All @@ -17,6 +19,7 @@
APIStatusError,
APITimeoutError,
RetryableChatProvider,
StreamedMessagePart,
)
from kosong.message import Message
from tenacity import RetryCallState, retry_if_exception, stop_after_attempt, wait_exponential_jitter
Expand Down Expand Up @@ -79,6 +82,7 @@
StepBegin,
StepInterrupted,
TextPart,
ThinkPart,
ToolResult,
TurnBegin,
TurnEnd,
Expand Down Expand Up @@ -148,6 +152,10 @@ def __init__(
self._steer_queue: asyncio.Queue[str | list[ContentPart]] = asyncio.Queue()
self._plan_mode: bool = self._runtime.session.state.plan_mode
self._plan_session_id: str | None = self._runtime.session.state.plan_session_id
# TPS tracking for streaming tokens
self._streaming_token_timestamps: deque[tuple[float, float]] = deque()
self._streaming_token_count: float = 0.0
self._tps_window_seconds: float = 3.0
# Pre-warm slug cache so the persisted slug survives process restarts
if self._plan_session_id is not None and self._runtime.session.state.plan_slug is not None:
from kimi_cli.tools.plan.heroes import seed_slug_cache
Expand Down Expand Up @@ -380,6 +388,7 @@ def status(self) -> StatusSnapshot:
context_tokens=token_count,
max_context_tokens=max_size,
mcp_status=self._mcp_status_snapshot(),
tps=self._calculate_tps(),
)

@property
Expand Down Expand Up @@ -428,6 +437,62 @@ def steer(self, content: str | list[ContentPart]) -> None:
"""Queue a steer message for injection into the current turn."""
self._steer_queue.put_nowait(content)

def _track_streaming_tokens(self, token_count: float) -> None:
"""Track tokens received during streaming for TPS calculation."""
now = time.monotonic()
self._streaming_token_count += token_count
self._streaming_token_timestamps.append((now, self._streaming_token_count))
# Prune old entries outside the rolling window
cutoff = now - self._tps_window_seconds
while self._streaming_token_timestamps and self._streaming_token_timestamps[0][0] < cutoff:
self._streaming_token_timestamps.popleft()

def _reset_streaming_tps(self) -> None:
"""Reset TPS tracking when streaming ends or a new step begins."""
self._streaming_token_timestamps.clear()
self._streaming_token_count = 0.0

def _calculate_tps(self) -> float:
"""Calculate current tokens-per-second over the rolling window."""
if len(self._streaming_token_timestamps) < 2:
return 0.0
first_time, first_tokens = self._streaming_token_timestamps[0]
last_time, last_tokens = self._streaming_token_timestamps[-1]
duration = last_time - first_time
if duration <= 0:
return 0.0
tokens = last_tokens - first_tokens
return tokens / duration

@staticmethod
def _estimate_tokens_for_tps(text: str) -> float:
"""Estimate token count for TPS calculation.

Uses simple heuristics for mixed CJK/Latin text:
- CJK characters: ~1.5 tokens each
- Other characters: ~1 token per 4 characters
"""
cjk_count = 0
other_count = 0
for ch in text:
cp = ord(ch)
if (
0x4E00 <= cp <= 0x9FFF # CJK Unified Ideographs
or 0x3400 <= cp <= 0x4DBF # CJK Extension A
or 0xF900 <= cp <= 0xFAFF # CJK Compatibility
or 0x3000 <= cp <= 0x303F # CJK Symbols
or 0xFF00 <= cp <= 0xFFEF # Fullwidth Forms
or 0x3040 <= cp <= 0x309F # Hiragana
or 0x30A0 <= cp <= 0x30FF # Katakana
or 0xAC00 <= cp <= 0xD7AF # Hangul Syllables
or 0x1100 <= cp <= 0x11FF # Hangul Jamo
or 0x3130 <= cp <= 0x318F # Hangul Compatibility Jamo
):
cjk_count += 1
else:
other_count += 1
return cjk_count * 1.5 + other_count / 4

async def _consume_pending_steers(self) -> bool:
"""Drain the steer queue and inject as follow-up user messages.

Expand Down Expand Up @@ -691,6 +756,8 @@ async def _agent_loop(self) -> TurnOutcome:
raise MaxStepsReached(self._loop_control.max_steps_per_turn)

wire_send(StepBegin(n=step_no))
# Reset TPS tracking at the start of each step
self._reset_streaming_tps()
Comment on lines 758 to +760
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Reset TPS when streaming ends

_reset_streaming_tps() is only called at StepBegin, so once the model finishes emitting parts the old samples remain and status.tps can stay non-zero while the app is idle. That violates the StatusSnapshot.tps contract (“0 when not streaming”) and leaves a stale tok/s value visible between turns until the next step starts. Please clear TPS state at step/turn completion (or expire samples against current time in _calculate_tps).

Useful? React with 👍 / 👎.

back_to_the_future: BackToTheFuture | None = None
step_outcome: StepOutcome | None = None
try:
Expand Down Expand Up @@ -806,14 +873,26 @@ async def _append_notification(view: NotificationView) -> None:
# Normalize: merge adjacent user messages for clean API input
effective_history = normalize_history(self._context.history)

# Create a wrapped callback to track streaming tokens for TPS calculation
def _track_and_wire_send(part: StreamedMessagePart) -> None:
"""Track tokens from streaming content and send to wire."""
match part:
case TextPart(text=text) | ThinkPart(think=text):
if text:
# Estimate tokens for TPS calculation
self._track_streaming_tokens(self._estimate_tokens_for_tps(text))
case _:
pass # Other parts don't contain tokens to track
wire_send(part)
Comment on lines +883 to +886
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Send TPS status updates during streaming

This callback updates TPS counters but only forwards the content part, so clients that render TPS from StatusUpdate (for example the live status block in visualize.py) won’t see changing tok/s during generation and only get a value after the step completes. To actually provide in-stream TPS display, emit/throttle StatusUpdate(tps=...) while parts are arriving.

Useful? React with 👍 / 👎.


async def _run_step_once() -> StepResult:
# run an LLM step (may be interrupted)
return await kosong.step(
chat_provider,
self._agent.system_prompt,
self._agent.toolset,
effective_history,
on_message_part=wire_send,
on_message_part=_track_and_wire_send,
on_tool_result=wire_send,
)

Expand Down Expand Up @@ -843,6 +922,7 @@ async def _kosong_step_with_retry() -> StepResult:
status_update.context_usage = snap.context_usage
status_update.context_tokens = snap.context_tokens
status_update.max_context_tokens = snap.max_context_tokens
status_update.tps = snap.tps
wire_send(status_update)
Comment on lines 922 to 926
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 TPS value not sent in StatusUpdate when provider returns no token usage

In _step(), status_update.tps = snap.tps is placed inside the if result.usage is not None: block (src/kimi_cli/soul/kimisoul.py:918-925). TPS tracking is entirely independent of token usage — it's computed from streaming callback timestamps via _track_streaming_tokens / _calculate_tps. When a provider doesn't return usage info (e.g., some custom or external providers), the TPS value will remain None in the StatusUpdate, so the live view's _StatusBlock won't display TPS even though tokens were streamed and tracked. The prompt toolbar reads self.soul.status directly and would still show TPS, creating an inconsistency between the two display surfaces.

(Refers to lines 915-926)

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


# wait for all tool results (may be interrupted)
Expand Down
10 changes: 7 additions & 3 deletions src/kimi_cli/ui/shell/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,10 @@ async def run(self, command: str | None = None) -> bool:
# Initialize theme from config
if isinstance(self.soul, KimiSoul):
from kimi_cli.ui.theme import set_active_theme
from kimi_cli.ui.tps_meter import set_show_tps_meter

set_active_theme(self.soul.runtime.config.theme)
set_show_tps_meter(self.soul.runtime.config.show_tps_meter)

if command is not None:
# run single command and exit
Expand Down Expand Up @@ -984,9 +986,11 @@ def _activate_prompt_approval_modal(self) -> None:
current_request,
on_response=self._handle_prompt_approval_response,
buffer_text_provider=(
lambda: self._prompt_session._session.default_buffer.text # pyright: ignore[reportPrivateUsage]
if self._prompt_session is not None
else ""
lambda: (
self._prompt_session._session.default_buffer.text # pyright: ignore[reportPrivateUsage]
if self._prompt_session is not None
else ""
)
),
text_expander=self._prompt_session._get_placeholder_manager().serialize_for_history, # pyright: ignore[reportPrivateUsage]
)
Expand Down
6 changes: 5 additions & 1 deletion src/kimi_cli/ui/shell/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
sanitize_surrogates,
)
from kimi_cli.ui.theme import get_prompt_style, get_toolbar_colors
from kimi_cli.ui.tps_meter import get_show_tps_meter
from kimi_cli.utils.clipboard import (
grab_media_from_clipboard,
is_clipboard_available,
Expand Down Expand Up @@ -2152,9 +2153,12 @@ def _get_one_rotating_tip(self) -> str | None:
def _render_right_span(status: StatusSnapshot) -> str:
current_toast = _current_toast("right")
if current_toast is None:
return format_context_status(
context_str = format_context_status(
status.context_usage,
status.context_tokens,
status.max_context_tokens,
)
if get_show_tps_meter() and status.tps > 0:
return f"{context_str} · {status.tps:.1f} tok/s"
return context_str
return current_toast.message
51 changes: 51 additions & 0 deletions src/kimi_cli/ui/shell/slash.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,57 @@ def theme(app: Shell, args: str):
raise Reload(session_id=soul.runtime.session.id)


@registry.command
@shell_mode_registry.command
def tps(app: Shell, args: str):
"""Toggle TPS (tokens-per-second) meter display in status bar"""
from kimi_cli.ui.tps_meter import get_show_tps_meter, set_show_tps_meter

soul = ensure_kimi_soul(app)
if soul is None:
return

current = get_show_tps_meter()
arg = args.strip().lower()

if not arg:
status = "on" if current else "off"
console.print(f"TPS meter: [bold]{status}[/bold]")
console.print("[grey50]Usage: /tps on | /tps off[/grey50]")
return

if arg not in ("on", "off"):
console.print(f"[red]Invalid argument: {arg}. Use 'on' or 'off'.[/red]")
return

new_value = arg == "on"

if new_value == current:
console.print(f"[yellow]TPS meter is already {arg}.[/yellow]")
return

config_file = soul.runtime.config.source_file
if config_file is None:
console.print(
"[yellow]TPS toggle requires a config file; "
"restart without --config to persist this setting.[/yellow]"
)
return

# Persist to disk first — only update in-memory state after success
try:
config_for_save = load_config(config_file)
config_for_save.show_tps_meter = new_value
save_config(config_for_save, config_file)
except (ConfigError, OSError) as exc:
console.print(f"[red]Failed to save config: {exc}[/red]")
return

# Update in-memory state immediately (no reload needed for TPS)
set_show_tps_meter(new_value)
console.print(f"[green]TPS meter {'enabled' if new_value else 'disabled'}.[/green]")


@registry.command
def web(app: Shell, args: str):
"""Open Kimi Code Web UI in browser"""
Expand Down
24 changes: 18 additions & 6 deletions src/kimi_cli/ui/shell/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
prompt_other_input,
show_question_body_in_pager,
)
from kimi_cli.ui.tps_meter import get_show_tps_meter
from kimi_cli.utils.aioqueue import Queue, QueueShutDown
from kimi_cli.utils.logging import logger
from kimi_cli.utils.rich.columns import BulletColumns
Expand Down Expand Up @@ -634,6 +635,7 @@ def __init__(self, initial: StatusUpdate) -> None:
self._context_usage: float = 0.0
self._context_tokens: int = 0
self._max_context_tokens: int = 0
self._tps: float = 0.0
self.update(initial)

def render(self) -> RenderableType:
Expand All @@ -646,12 +648,22 @@ def update(self, status: StatusUpdate) -> None:
self._context_tokens = status.context_tokens
if status.max_context_tokens is not None:
self._max_context_tokens = status.max_context_tokens
if status.context_usage is not None:
self.text.plain = format_context_status(
self._context_usage,
self._context_tokens,
self._max_context_tokens,
)
if status.tps is not None:
self._tps = status.tps
# Only refresh if context_usage or tps is provided (fields that affect display)
if status.context_usage is not None or status.tps is not None:
self._refresh_text()

def _refresh_text(self) -> None:
context_str = format_context_status(
self._context_usage,
self._context_tokens,
self._max_context_tokens,
)
if get_show_tps_meter() and self._tps > 0:
self.text.plain = f"{context_str} · {self._tps:.1f} tok/s"
else:
self.text.plain = context_str


@asynccontextmanager
Expand Down
27 changes: 27 additions & 0 deletions src/kimi_cli/ui/tps_meter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""TPS meter display preference - mirrors the theme pattern.

This module provides a global state for the TPS meter display setting,
similar to how theme.py manages the active color theme.
"""

# Module-level private state
_show_tps_meter: bool = False


def set_show_tps_meter(enabled: bool) -> None:
"""Set whether the TPS meter should be displayed in the status bar.

Args:
enabled: True to show the TPS meter, False to hide it.
"""
global _show_tps_meter
_show_tps_meter = enabled


def get_show_tps_meter() -> bool:
"""Get whether the TPS meter should be displayed.

Returns:
True if the TPS meter should be shown, False otherwise.
"""
return _show_tps_meter
2 changes: 2 additions & 0 deletions src/kimi_cli/wire/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ class StatusUpdate(BaseModel):
"""Whether plan mode (read-only) is active. None means no change."""
mcp_status: MCPStatusSnapshot | None = None
"""The current MCP startup snapshot. None means no change."""
tps: float | None = None
"""Current tokens-per-second rate during streaming. None when not streaming."""


class Notification(BaseModel):
Expand Down
3 changes: 1 addition & 2 deletions tests/core/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ def test_default_config_dump():
"default_yolo": False,
"default_plan_mode": False,
"default_editor": "",
"theme": "dark",
"models": {},
"theme": "dark", "show_tps_meter": False, "models": {},
"providers": {},
"loop_control": {
"max_steps_per_turn": 100,
Expand Down
Loading
Loading