MoonshotAI · RealKai42 · Mar 11, 2026 · Mar 6, 2026 · Mar 9, 2026 · Mar 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ Only write entries that are worth mentioning to users.
 
 ## Unreleased
 
+- Core: Fix OpenAI Responses provider sending implicit `reasoning.effort=null` which breaks Responses-compatible endpoints that require reasoning — reasoning parameters are now omitted unless explicitly set
 - Core: Fix context compaction failing when conversation contains media parts (images, audio, video) — switch from blacklist filtering (exclude `ThinkPart`) to whitelist filtering (only keep `TextPart`) to prevent unsupported content types from being sent to the compaction API
 - Web: Fix `@` file mention index not refreshing after switching sessions or when workspace files change — reset index on session switch, auto-refresh after 30s staleness, and support path-prefix search beyond the 500-file limit
 

diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md
@@ -4,6 +4,7 @@ This page documents the changes in each Kimi Code CLI release.
 
 ## Unreleased
 
+- Core: Fix OpenAI Responses provider sending implicit `reasoning.effort=null` which breaks Responses-compatible endpoints that require reasoning — reasoning parameters are now omitted unless explicitly set
 - Core: Fix context compaction failing when conversation contains media parts (images, audio, video) — switch from blacklist filtering (exclude `ThinkPart`) to whitelist filtering (only keep `TextPart`) to prevent unsupported content types from being sent to the compaction API
 - Web: Fix `@` file mention index not refreshing after switching sessions or when workspace files change — reset index on session switch, auto-refresh after 30s staleness, and support path-prefix search beyond the 500-file limit
 

diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md
@@ -4,6 +4,7 @@
 
 ## 未发布
 
+- Core：修复 OpenAI Responses provider 隐式发送 `reasoning.effort=null` 导致需要推理的 Responses 兼容端点报错的问题——现在仅在显式设置时才发送推理参数
 - Core：修复对话包含媒体内容（图片、音频、视频）时上下文压缩失败的问题——将过滤策略从黑名单（排除 `ThinkPart`）改为白名单（仅保留 `TextPart`），防止不支持的内容类型被发送到压缩 API
 - Web：修复 `@` 文件提及索引在切换会话或工作区文件变更后不刷新的问题——切换会话时重置索引，30 秒过期自动刷新，输入路径前缀可查找超出 500 文件上限的文件
 

diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_responses.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_responses.py
@@ -168,11 +168,13 @@ async def generate(
 
         generation_kwargs: dict[str, Any] = {}
         generation_kwargs.update(self._generation_kwargs)
-        generation_kwargs["reasoning"] = Reasoning(
-            effort=generation_kwargs.pop("reasoning_effort", None),
-            summary="auto",
-        )
-        generation_kwargs["include"] = ["reasoning.encrypted_content"]
+        reasoning_effort = generation_kwargs.pop("reasoning_effort", None)
+        if reasoning_effort is not None:
+            generation_kwargs["reasoning"] = Reasoning(
+                effort=reasoning_effort,
+                summary="auto",
+            )
+            generation_kwargs["include"] = ["reasoning.encrypted_content"]
 
         try:
             response = await self._client.responses.create(

diff --git a/packages/kosong/tests/api_snapshot_tests/test_openai_responses.py b/packages/kosong/tests/api_snapshot_tests/test_openai_responses.py
@@ -379,6 +379,49 @@ async def test_openai_responses_generation_kwargs():
         assert (body["temperature"], body["max_output_tokens"]) == snapshot((0.7, 2048))
 
 
+async def test_openai_responses_omits_reasoning_by_default():
+    with respx.mock(base_url="https://api.openai.com") as mock:
+        mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
+        provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False)
+        stream = await provider.generate("", [], [Message(role="user", content="Hi")])
+        async for _ in stream:
+            pass
+        body = json.loads(mock.calls.last.request.content.decode())
+        assert "reasoning" not in body
+        assert "include" not in body
+
+
+async def test_openai_responses_with_thinking_off_omits_reasoning():
+    """with_thinking("off") should also omit reasoning from the request,
+    since thinking_effort_to_reasoning_effort("off") returns None."""
+    with respx.mock(base_url="https://api.openai.com") as mock:
+        mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
+        provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False).with_thinking(
+            "off"
+        )
+        stream = await provider.generate("", [], [Message(role="user", content="Hi")])
+        async for _ in stream:
+            pass
+        body = json.loads(mock.calls.last.request.content.decode())
+        assert "reasoning" not in body
+        assert "include" not in body
+
+
+async def test_openai_responses_with_thinking_low():
+    """with_thinking("low") should send reasoning with effort="low"."""
+    with respx.mock(base_url="https://api.openai.com") as mock:
+        mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))
+        provider = OpenAIResponses(model="gpt-4.1", api_key="test-key", stream=False).with_thinking(
+            "low"
+        )
+        stream = await provider.generate("", [], [Message(role="user", content="Think")])
+        async for _ in stream:
+            pass
+        body = json.loads(mock.calls.last.request.content.decode())
+        assert body["reasoning"] == snapshot({"effort": "low", "summary": "auto"})
+        assert body["include"] == snapshot(["reasoning.encrypted_content"])
+
+
 async def test_openai_responses_with_thinking():
     with respx.mock(base_url="https://api.openai.com") as mock:
         mock.post("/v1/responses").mock(return_value=Response(200, json=make_response()))

diff --git a/tests/core/test_create_llm.py b/tests/core/test_create_llm.py
@@ -3,6 +3,7 @@
 from inline_snapshot import snapshot
 from kosong.chat_provider.echo import EchoChatProvider
 from kosong.chat_provider.kimi import Kimi
+from kosong.contrib.chat_provider.openai_responses import OpenAIResponses
 from pydantic import SecretStr
 
 from kimi_cli.config import LLMModel, LLMProvider
@@ -133,3 +134,32 @@ def test_create_llm_requires_base_url_for_kimi():
     model = LLMModel(provider="kimi", model="kimi-base", max_context_size=4096)
 
     assert create_llm(provider, model) is None
+
+
+def test_create_llm_openai_responses_thinking_false_no_reasoning_in_params():
+    """thinking=False should call with_thinking("off"), which sets reasoning_effort=None.
+    The OpenAIResponses provider handles this by omitting reasoning from the request."""
+    provider = LLMProvider(
+        type="openai_responses",
+        base_url="https://openrouter.ai/api/v1",
+        api_key=SecretStr("test-key"),
+    )
+    model = LLMModel(
+        provider="openrouter_custom",
+        model="minimax/minimax-m2.5",
+        max_context_size=128000,
+        capabilities=None,
+    )
+
+    llm = create_llm(provider, model, thinking=False)
+
+    assert llm is not None
+    assert isinstance(llm.chat_provider, OpenAIResponses)
+    # with_thinking("off") sets reasoning_effort=None in generation kwargs,
+    # but generate() will omit reasoning from the actual API request when effort is None.
+    assert llm.chat_provider.model_parameters == snapshot(
+        {
+            "base_url": "https://openrouter.ai/api/v1/",
+            "reasoning_effort": None,
+        }
+    )