Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions docs/my-website/docs/reasoning_content.md
Original file line number Diff line number Diff line change
Expand Up @@ -594,9 +594,26 @@ Expected Response

:::tip gpt-5.4: reasoning_effort + function tools

LiteLLM drops `reasoning_effort` from `gpt-5.4` requests to `litellm.completion()` that include tools, since that combination is supported in the Responses API.
When `gpt-5.4+` requests to `litellm.completion()` include both `reasoning_effort` and `tools`, LiteLLM **automatically routes** the request through the Responses API bridge. This works for both **OpenAI** (`openai/gpt-5.4`) and **Azure** (`azure/gpt-5.4`) providers — no extra configuration needed.

If you need reasoning **and** tools together, use `openai/responses/gpt-5.4` to route through the Responses API instead. See [Responses API Bridge](/docs/providers/openai#openai-chat-completion-to-responses-api-bridge) for details.
You can also route explicitly via `openai/responses/gpt-5.4` or `azure/responses/gpt-5.4`. See [Responses API Bridge](/docs/providers/openai#openai-chat-completion-to-responses-api-bridge) for details.

**Azure custom deployment names:** Auto-routing relies on the deployment name matching the `gpt-5.4*` pattern. If you use a custom deployment name (e.g. `"my-reasoning-model"`), enable routing via:

**SDK:**
```python
litellm.completion(model="azure/responses/my-reasoning-model", ...)
```

**Proxy config:**
```yaml
model_list:
- model_name: my-reasoning-model
litellm_params:
model: azure/my-reasoning-model
model_info:
mode: responses
```

:::

Expand Down
11 changes: 3 additions & 8 deletions litellm/llms/azure/chat/gpt_5_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,9 @@ def map_openai_params(
if result_effort == "none" and not supports_none:
result.pop("reasoning_effort")

# Azure Chat Completions: gpt-5.4+ does not support tools + reasoning together.
# Drop reasoning_effort when both are present (OpenAI routes to Responses API; Azure does not).
if self.is_model_gpt_5_4_plus_model(model):
has_tools = bool(
non_default_params.get("tools") or optional_params.get("tools")
)
if has_tools and result_effort not in (None, "none"):
result.pop("reasoning_effort", None)
# Azure gpt-5.4+ with tools + reasoning_effort is now routed to the
# Responses API bridge (same as OpenAI), so we no longer need to drop
# reasoning_effort here. See: responses_api_bridge_check() in main.py.

return result

Expand Down
23 changes: 13 additions & 10 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,16 +955,6 @@ def responses_api_bridge_check(
model_info["mode"] = "responses"
model = model.replace("responses/", "")

# OpenAI gpt-5.4+ chat-completions calls with both tools + reasoning_effort
# must be bridged to Responses API.
if (
custom_llm_provider == "openai"
and OpenAIGPT5Config.is_model_gpt_5_4_plus_model(model)
and tools
and reasoning_effort is not None
):
model_info["mode"] = "responses"
model = model.replace("responses/", "")
except Exception as e:
verbose_logger.debug("Error getting model info: {}".format(e))

Expand All @@ -974,6 +964,19 @@ def responses_api_bridge_check(
model = model.replace("responses/", "")
mode = "responses"
model_info["mode"] = mode

# OpenAI/Azure gpt-5.4+ chat-completions calls with both tools + reasoning_effort
# must be bridged to Responses API.
if (
custom_llm_provider in ("openai", "azure")
and OpenAIGPT5Config.is_model_gpt_5_4_plus_model(model)
and tools
and reasoning_effort is not None
and model_info.get("mode") != "responses"
):
model_info["mode"] = "responses"
model = model.replace("responses/", "")
Comment thread
Chesars marked this conversation as resolved.
Comment thread
Chesars marked this conversation as resolved.
Comment thread
Chesars marked this conversation as resolved.

Comment thread
Chesars marked this conversation as resolved.
return model_info, model


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,11 @@ def test_azure_gpt5_1_series_temperature_handling(config: AzureOpenAIGPT5Config)
assert params["temperature"] == 0.6


def test_azure_gpt5_4_drops_reasoning_effort_when_tools_present(config: AzureOpenAIGPT5Config):
"""Azure Chat Completions: gpt-5.4+ drops reasoning_effort when tools are present.
def test_azure_gpt5_4_preserves_reasoning_effort_when_tools_present(config: AzureOpenAIGPT5Config):
"""Azure GPT-5.4+ no longer drops reasoning_effort when tools are present.

OpenAI routes tools+reasoning to Responses API; Azure does not, so we drop reasoning_effort.
Both OpenAI and Azure now route tools+reasoning to the Responses API bridge,
so reasoning_effort must be preserved in map_openai_params.
"""
tools = [{"type": "function", "function": {"name": "test", "description": "test"}}]
params = config.map_openai_params(
Expand All @@ -205,7 +206,7 @@ def test_azure_gpt5_4_drops_reasoning_effort_when_tools_present(config: AzureOpe
drop_params=False,
api_version="2024-05-01-preview",
)
assert "reasoning_effort" not in params
assert params.get("reasoning_effort") == "high"
assert params["tools"] == tools


Expand Down
34 changes: 34 additions & 0 deletions tests/test_litellm/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,40 @@ def test_responses_api_bridge_check_gpt_5_5_tools_plus_reasoning_routes_to_respo
assert model_info.get("mode") == "responses"


def test_responses_api_bridge_check_azure_gpt_5_4_tools_plus_reasoning_routes_to_responses():
"""Azure gpt-5.4 with both tools and reasoning_effort should route to Responses API."""
from litellm.main import responses_api_bridge_check

with patch("litellm.main._get_model_info_helper") as mock_get_model_info:
mock_get_model_info.return_value = {"max_tokens": 128000}
model_info, model = responses_api_bridge_check(
model="gpt-5.4",
custom_llm_provider="azure",
tools=[{"type": "function", "function": {"name": "get_capital"}}],
reasoning_effort="high",
)

assert model == "gpt-5.4"
assert model_info.get("mode") == "responses"


def test_responses_api_bridge_check_azure_gpt_5_4_tools_without_reasoning_stays_chat():
"""Azure gpt-5.4 with tools only should not be force-routed to Responses API."""
from litellm.main import responses_api_bridge_check

with patch("litellm.main._get_model_info_helper") as mock_get_model_info:
mock_get_model_info.return_value = {"max_tokens": 128000}
model_info, model = responses_api_bridge_check(
model="gpt-5.4",
custom_llm_provider="azure",
tools=[{"type": "function", "function": {"name": "get_capital"}}],
reasoning_effort=None,
)

assert model == "gpt-5.4"
assert model_info.get("mode") != "responses"


def test_responses_api_bridge_check_gpt_5_4_tools_without_reasoning_stays_chat():
"""gpt-5.4 with tools only should not be force-routed to Responses API."""
from litellm.main import responses_api_bridge_check
Expand Down
Loading