BerriAI · Chesars · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/docs/my-website/docs/reasoning_content.md b/docs/my-website/docs/reasoning_content.md
@@ -594,9 +594,26 @@ Expected Response
 
 :::tip gpt-5.4: reasoning_effort + function tools
 
-LiteLLM drops `reasoning_effort` from `gpt-5.4` requests to `litellm.completion()` that include tools, since that combination is supported in the Responses API.
+When `gpt-5.4+` requests to `litellm.completion()` include both `reasoning_effort` and `tools`, LiteLLM **automatically routes** the request through the Responses API bridge. This works for both **OpenAI** (`openai/gpt-5.4`) and **Azure** (`azure/gpt-5.4`) providers — no extra configuration needed.
 
-If you need reasoning **and** tools together, use `openai/responses/gpt-5.4` to route through the Responses API instead. See [Responses API Bridge](/docs/providers/openai#openai-chat-completion-to-responses-api-bridge) for details.
+You can also route explicitly via `openai/responses/gpt-5.4` or `azure/responses/gpt-5.4`. See [Responses API Bridge](/docs/providers/openai#openai-chat-completion-to-responses-api-bridge) for details.
+
+**Azure custom deployment names:** Auto-routing relies on the deployment name matching the `gpt-5.4*` pattern. If you use a custom deployment name (e.g. `"my-reasoning-model"`), enable routing via:
+
+**SDK:**
+```python
+litellm.completion(model="azure/responses/my-reasoning-model", ...)
+```
+
+**Proxy config:**
+```yaml
+model_list:
+  - model_name: my-reasoning-model
+    litellm_params:
+      model: azure/my-reasoning-model
+    model_info:
+      mode: responses
+```
 
 :::
 

diff --git a/litellm/llms/azure/chat/gpt_5_transformation.py b/litellm/llms/azure/chat/gpt_5_transformation.py
@@ -131,14 +131,9 @@ def map_openai_params(
         if result_effort == "none" and not supports_none:
             result.pop("reasoning_effort")
 
-        # Azure Chat Completions: gpt-5.4+ does not support tools + reasoning together.
-        # Drop reasoning_effort when both are present (OpenAI routes to Responses API; Azure does not).
-        if self.is_model_gpt_5_4_plus_model(model):
-            has_tools = bool(
-                non_default_params.get("tools") or optional_params.get("tools")
-            )
-            if has_tools and result_effort not in (None, "none"):
-                result.pop("reasoning_effort", None)
+        # Azure gpt-5.4+ with tools + reasoning_effort is now routed to the
+        # Responses API bridge (same as OpenAI), so we no longer need to drop
+        # reasoning_effort here.  See: responses_api_bridge_check() in main.py.
 
         return result
 

diff --git a/litellm/main.py b/litellm/main.py
@@ -955,16 +955,6 @@ def responses_api_bridge_check(
             model_info["mode"] = "responses"
             model = model.replace("responses/", "")
 
-        # OpenAI gpt-5.4+ chat-completions calls with both tools + reasoning_effort
-        # must be bridged to Responses API.
-        if (
-            custom_llm_provider == "openai"
-            and OpenAIGPT5Config.is_model_gpt_5_4_plus_model(model)
-            and tools
-            and reasoning_effort is not None
-        ):
-            model_info["mode"] = "responses"
-            model = model.replace("responses/", "")
     except Exception as e:
         verbose_logger.debug("Error getting model info: {}".format(e))
 
@@ -974,6 +964,19 @@ def responses_api_bridge_check(
             model = model.replace("responses/", "")
             mode = "responses"
             model_info["mode"] = mode
+
+    # OpenAI/Azure gpt-5.4+ chat-completions calls with both tools + reasoning_effort
+    # must be bridged to Responses API.
+    if (
+        custom_llm_provider in ("openai", "azure")
+        and OpenAIGPT5Config.is_model_gpt_5_4_plus_model(model)
+        and tools
+        and reasoning_effort is not None
+        and model_info.get("mode") != "responses"
+    ):
+        model_info["mode"] = "responses"
+        model = model.replace("responses/", "")
+
     return model_info, model
 
 

diff --git a/tests/test_litellm/llms/azure/chat/test_azure_gpt5_transformation.py b/tests/test_litellm/llms/azure/chat/test_azure_gpt5_transformation.py
@@ -192,10 +192,11 @@ def test_azure_gpt5_1_series_temperature_handling(config: AzureOpenAIGPT5Config)
     assert params["temperature"] == 0.6
 
 
-def test_azure_gpt5_4_drops_reasoning_effort_when_tools_present(config: AzureOpenAIGPT5Config):
-    """Azure Chat Completions: gpt-5.4+ drops reasoning_effort when tools are present.
+def test_azure_gpt5_4_preserves_reasoning_effort_when_tools_present(config: AzureOpenAIGPT5Config):
+    """Azure GPT-5.4+ no longer drops reasoning_effort when tools are present.
 
-    OpenAI routes tools+reasoning to Responses API; Azure does not, so we drop reasoning_effort.
+    Both OpenAI and Azure now route tools+reasoning to the Responses API bridge,
+    so reasoning_effort must be preserved in map_openai_params.
     """
     tools = [{"type": "function", "function": {"name": "test", "description": "test"}}]
     params = config.map_openai_params(
@@ -205,7 +206,7 @@ def test_azure_gpt5_4_drops_reasoning_effort_when_tools_present(config: AzureOpe
         drop_params=False,
         api_version="2024-05-01-preview",
     )
-    assert "reasoning_effort" not in params
+    assert params.get("reasoning_effort") == "high"
     assert params["tools"] == tools
 
 

diff --git a/tests/test_litellm/test_main.py b/tests/test_litellm/test_main.py
@@ -661,6 +661,40 @@ def test_responses_api_bridge_check_gpt_5_5_tools_plus_reasoning_routes_to_respo
     assert model_info.get("mode") == "responses"
 
 
+def test_responses_api_bridge_check_azure_gpt_5_4_tools_plus_reasoning_routes_to_responses():
+    """Azure gpt-5.4 with both tools and reasoning_effort should route to Responses API."""
+    from litellm.main import responses_api_bridge_check
+
+    with patch("litellm.main._get_model_info_helper") as mock_get_model_info:
+        mock_get_model_info.return_value = {"max_tokens": 128000}
+        model_info, model = responses_api_bridge_check(
+            model="gpt-5.4",
+            custom_llm_provider="azure",
+            tools=[{"type": "function", "function": {"name": "get_capital"}}],
+            reasoning_effort="high",
+        )
+
+    assert model == "gpt-5.4"
+    assert model_info.get("mode") == "responses"
+
+
+def test_responses_api_bridge_check_azure_gpt_5_4_tools_without_reasoning_stays_chat():
+    """Azure gpt-5.4 with tools only should not be force-routed to Responses API."""
+    from litellm.main import responses_api_bridge_check
+
+    with patch("litellm.main._get_model_info_helper") as mock_get_model_info:
+        mock_get_model_info.return_value = {"max_tokens": 128000}
+        model_info, model = responses_api_bridge_check(
+            model="gpt-5.4",
+            custom_llm_provider="azure",
+            tools=[{"type": "function", "function": {"name": "get_capital"}}],
+            reasoning_effort=None,
+        )
+
+    assert model == "gpt-5.4"
+    assert model_info.get("mode") != "responses"
+
+
 def test_responses_api_bridge_check_gpt_5_4_tools_without_reasoning_stays_chat():
     """gpt-5.4 with tools only should not be force-routed to Responses API."""
     from litellm.main import responses_api_bridge_check