Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Only write entries that are worth mentioning to users.

## Unreleased

- Core: Fix OpenAI Responses provider sending implicit `reasoning.effort=null` which breaks Responses-compatible endpoints that require reasoning — reasoning parameters are now omitted unless explicitly set
- Core: Fix context compaction failing when conversation contains media parts (images, audio, video) — switch from blacklist filtering (exclude `ThinkPart`) to whitelist filtering (only keep `TextPart`) to prevent unsupported content types from being sent to the compaction API
- Web: Fix `@` file mention index not refreshing after switching sessions or when workspace files change — reset index on session switch, auto-refresh after 30s staleness, and support path-prefix search beyond the 500-file limit

Expand Down
1 change: 1 addition & 0 deletions docs/en/release-notes/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This page documents the changes in each Kimi Code CLI release.

## Unreleased

- Core: Fix OpenAI Responses provider sending implicit `reasoning.effort=null` which breaks Responses-compatible endpoints that require reasoning — reasoning parameters are now omitted unless explicitly set
- Core: Fix context compaction failing when conversation contains media parts (images, audio, video) — switch from blacklist filtering (exclude `ThinkPart`) to whitelist filtering (only keep `TextPart`) to prevent unsupported content types from being sent to the compaction API
- Web: Fix `@` file mention index not refreshing after switching sessions or when workspace files change — reset index on session switch, auto-refresh after 30s staleness, and support path-prefix search beyond the 500-file limit

Expand Down
1 change: 1 addition & 0 deletions docs/zh/release-notes/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

## 未发布

- Core:修复 OpenAI Responses provider 隐式发送 `reasoning.effort=null` 导致需要推理的 Responses 兼容端点报错的问题——现在仅在显式设置时才发送推理参数
- Core:修复对话包含媒体内容(图片、音频、视频)时上下文压缩失败的问题——将过滤策略从黑名单(排除 `ThinkPart`)改为白名单(仅保留 `TextPart`),防止不支持的内容类型被发送到压缩 API
- Web:修复 `@` 文件提及索引在切换会话或工作区文件变更后不刷新的问题——切换会话时重置索引,30 秒过期自动刷新,输入路径前缀可查找超出 500 文件上限的文件

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,13 @@ async def generate(

generation_kwargs: dict[str, Any] = {}
generation_kwargs.update(self._generation_kwargs)
generation_kwargs["reasoning"] = Reasoning(
effort=generation_kwargs.pop("reasoning_effort", None),
summary="auto",
)
generation_kwargs["include"] = ["reasoning.encrypted_content"]
reasoning_effort = generation_kwargs.pop("reasoning_effort", None)
if reasoning_effort is not None:
generation_kwargs["reasoning"] = Reasoning(
effort=reasoning_effort,
summary="auto",
)
generation_kwargs["include"] = ["reasoning.encrypted_content"]

try:
response = await self._client.responses.create(
Expand Down
43 changes: 43 additions & 0 deletions packages/kosong/tests/api_snapshot_tests/test_openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,49 @@ async def test_openai_responses_generation_kwargs():
assert (body["temperature"], body["max_output_tokens"]) == snapshot((0.7, 2048))


async def test_openai_responses_omits_reasoning_by_default():
with respx.mock(base_url="https://api.openai.com") as mock:
mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False)
stream = await provider.generate("", [], [Message(role="user", content="Hi")])
async for _ in stream:
pass
body = json.loads(mock.calls.last.request.content.decode())
assert "reasoning" not in body
assert "include" not in body


async def test_openai_responses_with_thinking_off_omits_reasoning():
"""with_thinking("off") should also omit reasoning from the request,
since thinking_effort_to_reasoning_effort("off") returns None."""
with respx.mock(base_url="https://api.openai.com") as mock:
mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False).with_thinking(
"off"
)
stream = await provider.generate("", [], [Message(role="user", content="Hi")])
async for _ in stream:
pass
body = json.loads(mock.calls.last.request.content.decode())
assert "reasoning" not in body
assert "include" not in body


async def test_openai_responses_with_thinking_low():
"""with_thinking("low") should send reasoning with effort="low"."""
with respx.mock(base_url="https://api.openai.com") as mock:
mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False).with_thinking(
"low"
)
stream = await provider.generate("", [], [Message(role="user", content="Think")])
async for _ in stream:
pass
body = json.loads(mock.calls.last.request.content.decode())
assert body["reasoning"] == snapshot({"effort": "low", "summary": "auto"})
assert body["include"] == snapshot(["reasoning.encrypted_content"])


async def test_openai_responses_with_thinking():
with respx.mock(base_url="https://api.openai.com") as mock:
mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
Expand Down
30 changes: 30 additions & 0 deletions tests/core/test_create_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from inline_snapshot import snapshot
from kosong.chat_provider.echo import EchoChatProvider
from kosong.chat_provider.kimi import Kimi
from kosong.contrib.chat_provider.openai_responses import OpenAIResponses
from pydantic import SecretStr

from kimi_cli.config import LLMModel, LLMProvider
Expand Down Expand Up @@ -133,3 +134,32 @@ def test_create_llm_requires_base_url_for_kimi():
model = LLMModel(provider="kimi", model="kimi-base", max_context_size=4096)

assert create_llm(provider, model) is None


def test_create_llm_openai_responses_thinking_false_no_reasoning_in_params():
"""thinking=False should call with_thinking("off"), which sets reasoning_effort=None.
The OpenAIResponses provider handles this by omitting reasoning from the request."""
provider = LLMProvider(
type="openai_responses",
base_url="https://openrouter.ai/api/v1",
api_key=SecretStr("test-key"),
)
model = LLMModel(
provider="openrouter_custom",
model="minimax/minimax-m2.5",
max_context_size=128000,
capabilities=None,
)

llm = create_llm(provider, model, thinking=False)

assert llm is not None
assert isinstance(llm.chat_provider, OpenAIResponses)
# with_thinking("off") sets reasoning_effort=None in generation kwargs,
# but generate() will omit reasoning from the actual API request when effort is None.
assert llm.chat_provider.model_parameters == snapshot(
{
"base_url": "https://openrouter.ai/api/v1/",
"reasoning_effort": None,
}
)
Loading