From 2d4a7518351c472fbe4fa65cc0b7584c604b5bc3 Mon Sep 17 00:00:00 2001
From: Alexander Vostres <alex.vostres@gmail.com>
Date: Sat, 18 Apr 2026 14:59:08 +0300
Subject: [PATCH 1/5] Implement per-model reasoning fixups for Anthropic models

---
 crates/forge_app/src/dto/anthropic/request.rs | 342 ++++++++++-----
 .../src/dto/anthropic/transforms/mod.rs       |   2 +
 .../transforms/model_specific_thinking.rs     | 388 ++++++++++++++++++
 .../transforms/reasoning_transform.rs         |  55 ++-
 crates/forge_domain/src/context.rs            |  37 ++
 crates/forge_repo/src/provider/anthropic.rs   |  24 +-
 6 files changed, 747 insertions(+), 101 deletions(-)
 create mode 100644 crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
diff --git a/crates/forge_app/src/dto/anthropic/request.rs b/crates/forge_app/src/dto/anthropic/request.rs
index 7527e5f3da..32a91f35d8 100644
--- a/crates/forge_app/src/dto/anthropic/request.rs
+++ b/crates/forge_app/src/dto/anthropic/request.rs
@@ -60,27 +60,40 @@ impl SystemMessage {
     }
 }
 
-#[derive(Serialize, Default, Debug, PartialEq, Eq)]
-pub struct Thinking {
-    pub r#type: ThinkingType,
-    pub budget_tokens: u64,
+/// Anthropic's `thinking` request field. Opus 4.7 rejects the `Enabled` shape
+/// with a 400 — the `ModelSpecificThinking` transformer handles the migration.
+#[derive(Serialize, Debug, PartialEq, Eq)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum Thinking {
+    Enabled {
+        budget_tokens: u64,
+    },
+    Adaptive {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        display: Option<ThinkingDisplay>,
+    },
+    Disabled,
+}
+
+/// On Opus 4.7 adaptive thinking content is omitted from responses unless
+/// `Summarized` is requested explicitly.
+#[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub enum ThinkingDisplay {
+    Summarized,
+    Omitted,
 }
 
-/// Effort level for Anthropic's `output_config` API.
-///
-/// Only the variants officially supported by Anthropic's `output_config.effort`
-/// field. Mutually exclusive with the `thinking` object.
 #[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)]
 #[serde(rename_all = "lowercase")]
 pub enum OutputEffort {
     Low,
     Medium,
     High,
+    XHigh,
     Max,
 }
 
-/// Output configuration for newer Anthropic models that support effort-based
-/// reasoning (e.g. `claude-opus-4-6`).  Mutually exclusive with `thinking`.
 #[derive(Serialize, Debug, PartialEq, Eq)]
 pub struct OutputConfig {
     pub effort: OutputEffort,
@@ -93,14 +106,6 @@ pub enum OutputFormat {
     JsonSchema { schema: schemars::Schema },
 }
 
-#[derive(Serialize, Default, Debug, Clone, Copy, PartialEq, Eq)]
-#[serde(rename_all = "lowercase")]
-pub enum ThinkingType {
-    #[default]
-    Enabled,
-    Disabled,
-}
-
 impl TryFrom<forge_domain::Context> for Request {
     type Error = anyhow::Error;
     fn try_from(request: forge_domain::Context) -> std::result::Result<Self, Self::Error> {
@@ -119,55 +124,41 @@ impl TryFrom<forge_domain::Context> for Request {
             })
             .collect::<Vec<_>>();
 
-        // Route reasoning config to the correct Anthropic serialization.
-        // All paths require enabled == Some(true); without it nothing is emitted.
-        //
-        // • enabled + max_tokens → thinking object (older models, e.g.
-        // claude-3-7-sonnet).   An explicit reasoning budget unambiguously
-        // selects the extended-thinking API.   effort (which may arrive from
-        // embedded defaults) is ignored in this branch.
-        //
-        // • enabled + effort, no max_tokens → output_config.effort (newer models, e.g.
-        //   claude-opus-4-6).  No token budget means the caller chose the effort-based
-        // API.
-        //
-        // • enabled only (no effort, no max_tokens) → thinking with a default budget.
-        let (thinking, output_config) = if let Some(reasoning) = request.reasoning {
-            if reasoning.enabled == Some(true) {
-                if let Some(budget) = reasoning.max_tokens {
-                    // Explicit budget → thinking object regardless of effort.
-                    (
-                        Some(Thinking {
-                            r#type: ThinkingType::Enabled,
-                            budget_tokens: budget as u64,
-                        }),
-                        None,
-                    )
-                } else if let Some(effort) = reasoning.effort {
-                    // Effort without budget → newer output_config API.
-                    let output_effort = match effort {
-                        forge_domain::Effort::Low => OutputEffort::Low,
-                        forge_domain::Effort::High => OutputEffort::High,
-                        forge_domain::Effort::Max => OutputEffort::Max,
-                        // Map unsupported variants to the nearest Anthropic-valid effort.
-                        forge_domain::Effort::None | forge_domain::Effort::Minimal => {
-                            OutputEffort::Low
-                        }
-                        forge_domain::Effort::Medium => OutputEffort::Medium,
-                        forge_domain::Effort::XHigh => OutputEffort::Max,
-                    };
-                    (None, Some(OutputConfig { effort: output_effort }))
-                } else {
-                    // Enabled-only → thinking with default budget.
-                    (
-                        Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 10000 }),
-                        None,
-                    )
-                }
+        // Gate on the domain rule so inherited configs with `enabled: None` but
+        // a positive effort / `max_tokens` still emit reasoning on the wire.
+        let reasoning_on = request.is_reasoning_supported();
+        let (thinking, output_config) = if reasoning_on && let Some(reasoning) = request.reasoning
+        {
+            // Adaptive thinking on 4.7 hides reasoning content by default; opting
+            // into reasoning should surface it unless the caller set `exclude`.
+            let adaptive_display = if reasoning.exclude == Some(true) {
+                Some(ThinkingDisplay::Omitted)
             } else {
-                // enabled=false or enabled=None → no reasoning emitted.
-                (None, None)
-            }
+                Some(ThinkingDisplay::Summarized)
+            };
+
+            let thinking = if let Some(budget) = reasoning.max_tokens {
+                Thinking::Enabled { budget_tokens: budget as u64 }
+            } else {
+                Thinking::Adaptive { display: adaptive_display }
+            };
+
+            // `Effort::None` is an explicit opt-out; `is_reasoning_supported`
+            // already filters it, but guard here so it can never become a stray
+            // `output_config.effort`.
+            let output_config = reasoning.effort.and_then(|effort| {
+                let output_effort = match effort {
+                    forge_domain::Effort::None => return None,
+                    forge_domain::Effort::Minimal | forge_domain::Effort::Low => OutputEffort::Low,
+                    forge_domain::Effort::Medium => OutputEffort::Medium,
+                    forge_domain::Effort::High => OutputEffort::High,
+                    forge_domain::Effort::XHigh => OutputEffort::XHigh,
+                    forge_domain::Effort::Max => OutputEffort::Max,
+                };
+                Some(OutputConfig { effort: output_effort })
+            });
+
+            (Some(thinking), output_config)
         } else {
             (None, None)
         };
@@ -540,37 +531,49 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_thinking_type_serializes_to_enabled() {
-        let thinking_type = ThinkingType::Enabled;
-        let actual = serde_json::to_string(&thinking_type).unwrap();
-        let expected = r#""enabled""#;
+    fn test_thinking_enabled_serializes_with_budget() {
+        let thinking = Thinking::Enabled { budget_tokens: 5000 };
+        let actual = serde_json::to_value(&thinking).unwrap();
+        let expected = serde_json::json!({
+            "type": "enabled",
+            "budget_tokens": 5000
+        });
 
         assert_eq!(actual, expected);
     }
 
     #[test]
-    fn test_thinking_type_serializes_to_disabled() {
-        let thinking_type = ThinkingType::Disabled;
-        let actual = serde_json::to_string(&thinking_type).unwrap();
-        let expected = r#""disabled""#;
+    fn test_thinking_adaptive_serializes_without_display_when_none() {
+        let thinking = Thinking::Adaptive { display: None };
+        let actual = serde_json::to_value(&thinking).unwrap();
+        let expected = serde_json::json!({"type": "adaptive"});
 
         assert_eq!(actual, expected);
     }
 
     #[test]
-    fn test_thinking_struct_serializes_correctly() {
-        let thinking = Thinking { r#type: ThinkingType::Enabled, budget_tokens: 5000 };
+    fn test_thinking_adaptive_serializes_with_summarized_display() {
+        let thinking = Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) };
         let actual = serde_json::to_value(&thinking).unwrap();
         let expected = serde_json::json!({
-            "type": "enabled",
-            "budget_tokens": 5000
+            "type": "adaptive",
+            "display": "summarized"
         });
 
         assert_eq!(actual, expected);
     }
 
     #[test]
-    fn test_reasoning_enabled_with_max_tokens_creates_thinking() {
+    fn test_thinking_disabled_serializes() {
+        let thinking = Thinking::Disabled;
+        let actual = serde_json::to_value(&thinking).unwrap();
+        let expected = serde_json::json!({"type": "disabled"});
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_reasoning_enabled_with_max_tokens_creates_enabled_thinking() {
         let fixture = Context::default().reasoning(ReasoningConfig {
             enabled: Some(true),
             max_tokens: Some(8000),
@@ -580,17 +583,13 @@ mod tests {
 
         let actual = Request::try_from(fixture).unwrap();
 
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 8000 })
-        );
+        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
         assert_eq!(actual.output_config, None);
     }
 
     #[test]
-    fn test_reasoning_max_tokens_takes_priority_over_effort() {
-        // When both max_tokens and effort are set, max_tokens triggers the thinking
-        // path because an explicit budget means the caller wants the older API.
+    fn test_reasoning_max_tokens_and_effort_emit_both() {
+        // Effort and budget are independent knobs — neither should hide the other.
         let fixture = Context::default().reasoning(ReasoningConfig {
             effort: Some(forge_domain::Effort::Low),
             enabled: Some(true),
@@ -600,16 +599,30 @@ mod tests {
 
         let actual = Request::try_from(fixture).unwrap();
 
+        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
         assert_eq!(
-            actual.thinking,
-            Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 8000 })
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::Low })
         );
+    }
+
+    #[test]
+    fn test_reasoning_max_tokens_alone_emits_enabled_only() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            effort: None,
+            enabled: Some(true),
+            max_tokens: Some(8000),
+            exclude: None,
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
         assert_eq!(actual.output_config, None);
     }
 
     #[test]
-    fn test_reasoning_effort_without_budget_creates_output_config() {
-        // Effort with no max_tokens routes to output_config (newer model path).
+    fn test_reasoning_effort_without_budget_creates_output_config_and_adaptive() {
         let fixture = Context::default().reasoning(ReasoningConfig {
             effort: Some(forge_domain::Effort::Low),
             enabled: Some(true),
@@ -623,11 +636,48 @@ mod tests {
             actual.output_config,
             Some(OutputConfig { effort: OutputEffort::Low })
         );
-        assert_eq!(actual.thinking, None);
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
+        );
+    }
+
+    #[test]
+    fn test_reasoning_effort_with_exclude_emits_adaptive_omitted() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            effort: Some(forge_domain::Effort::High),
+            enabled: Some(true),
+            max_tokens: None,
+            exclude: Some(true),
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) })
+        );
+    }
+
+    #[test]
+    fn test_reasoning_xhigh_effort_maps_to_xhigh() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            effort: Some(forge_domain::Effort::XHigh),
+            enabled: Some(true),
+            max_tokens: None,
+            exclude: None,
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::XHigh })
+        );
     }
 
     #[test]
-    fn test_reasoning_enabled_without_max_tokens_uses_default_budget() {
+    fn test_reasoning_enabled_without_budget_or_effort_defaults_to_adaptive_summarized() {
         let fixture = Context::default().reasoning(ReasoningConfig {
             enabled: Some(true),
             max_tokens: None,
@@ -639,7 +689,24 @@ mod tests {
 
         assert_eq!(
             actual.thinking,
-            Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 10000 })
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
+        );
+    }
+
+    #[test]
+    fn test_reasoning_enabled_with_exclude_uses_omitted_display() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: None,
+            exclude: Some(true),
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) })
         );
     }
 
@@ -658,7 +725,10 @@ mod tests {
     }
 
     #[test]
-    fn test_reasoning_enabled_none_does_not_create_thinking() {
+    fn test_reasoning_enabled_none_with_max_tokens_still_emits_thinking() {
+        // Matches the domain's `is_reasoning_supported` rule: enabled: None with a
+        // positive budget counts as on, so inherited/merged configs don't silently
+        // disable reasoning on the wire.
         let fixture = Context::default().reasoning(ReasoningConfig {
             enabled: None,
             max_tokens: Some(8000),
@@ -668,7 +738,91 @@ mod tests {
 
         let actual = Request::try_from(fixture).unwrap();
 
+        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+    }
+
+    #[test]
+    fn test_reasoning_enabled_none_with_effort_still_emits_output_config() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: None,
+            max_tokens: None,
+            effort: Some(forge_domain::Effort::High),
+            exclude: None,
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::High })
+        );
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
+        );
+    }
+
+    #[test]
+    fn test_reasoning_enabled_none_with_zero_max_tokens_does_not_emit() {
+        // Matches `is_reasoning_supported`: max_tokens > 0 is required.
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: None,
+            max_tokens: Some(0),
+            effort: None,
+            exclude: None,
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
         assert_eq!(actual.thinking, None);
+        assert_eq!(actual.output_config, None);
+    }
+
+    #[test]
+    fn test_reasoning_effort_none_does_not_emit_anything() {
+        // Effort::None is an explicit opt-out — no thinking, no output_config.
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: None,
+            max_tokens: None,
+            effort: Some(forge_domain::Effort::None),
+            exclude: None,
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(actual.thinking, None);
+        assert_eq!(actual.output_config, None);
+    }
+
+    #[test]
+    fn test_reasoning_effort_none_overrides_enabled_and_max_tokens() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(8000),
+            effort: Some(forge_domain::Effort::None),
+            exclude: None,
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(actual.thinking, None);
+        assert_eq!(actual.output_config, None);
+    }
+
+    #[test]
+    fn test_reasoning_enabled_false_overrides_effort() {
+        // Explicit opt-out beats inferred enablement.
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(false),
+            max_tokens: None,
+            effort: Some(forge_domain::Effort::High),
+            exclude: None,
+        });
+
+        let actual = Request::try_from(fixture).unwrap();
+
+        assert_eq!(actual.thinking, None);
+        assert_eq!(actual.output_config, None);
     }
 
     #[test]
diff --git a/crates/forge_app/src/dto/anthropic/transforms/mod.rs b/crates/forge_app/src/dto/anthropic/transforms/mod.rs
index 3ea6d5f183..b9a3b2f765 100644
--- a/crates/forge_app/src/dto/anthropic/transforms/mod.rs
+++ b/crates/forge_app/src/dto/anthropic/transforms/mod.rs
@@ -2,6 +2,7 @@ mod auth_system_message;
 mod capitalize_tool_names;
 mod drop_invalid_toolcalls;
 mod enforce_schema;
+mod model_specific_thinking;
 mod reasoning_transform;
 mod remove_output_format;
 mod sanitize_tool_ids;
@@ -11,6 +12,7 @@ pub use auth_system_message::AuthSystemMessage;
 pub use capitalize_tool_names::CapitalizeToolNames;
 pub use drop_invalid_toolcalls::DropInvalidToolUse;
 pub use enforce_schema::EnforceStrictObjectSchema;
+pub use model_specific_thinking::ModelSpecificThinking;
 pub use reasoning_transform::ReasoningTransform;
 pub use remove_output_format::RemoveOutputFormat;
 pub use sanitize_tool_ids::SanitizeToolIds;
diff --git a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
new file mode 100644
index 0000000000..f20bc95535
--- /dev/null
+++ b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
@@ -0,0 +1,388 @@
+use forge_domain::Transformer;
+use tracing::warn;
+
+use crate::dto::anthropic::{OutputConfig, OutputEffort, Request, Thinking, ThinkingDisplay};
+
+/// Default budget applied when converting `Adaptive` thinking to the legacy
+/// `Enabled` shape for pre-4.6 models that don't accept adaptive.
+const DEFAULT_LEGACY_BUDGET_TOKENS: u64 = 10000;
+
+/// Rewrites the `thinking` and `output_config` fields and strips rejected
+/// sampling parameters to match the target model's API contract. Classifies
+/// models into four tiers based on the official Anthropic docs:
+///
+/// | Tier                | Matching ids                   | Thinking          | Effort     | xhigh | max |
+/// |---------------------|--------------------------------|-------------------|------------|-------|-----|
+/// | `AdaptiveOnly`      | `opus-4-7`                     | adaptive only     | ✓          | ✓     | ✓   |
+/// | `AdaptiveFriendly`  | `opus-4-6`, `sonnet-4-6`       | both shapes       | ✓          | →max  | ✓   |
+/// | `LegacyWithEffort`  | `opus-4-5`                     | enabled only      | ✓          | →high | →high |
+/// | `LegacyNoEffort`    | everything else                | enabled only      | drop       | drop  | drop |
+///
+/// Additional behaviour:
+/// - `AdaptiveOnly` strips `temperature`/`top_p`/`top_k` (4.7 rejects non-
+///   defaults) and warns when a caller-supplied `budget_tokens` is dropped.
+/// - The display preference carried from the caller's `ReasoningConfig`
+///   (`exclude: true` → `Omitted`) is applied when rewriting `Enabled` to
+///   `Adaptive` on 4.7.
+pub struct ModelSpecificThinking {
+    model_id: String,
+    /// Defaults to `Summarized` because the legacy `Enabled` shape always
+    /// produced visible reasoning; a 4.7 migration should preserve that unless
+    /// the caller opts out via `exclude`.
+    display: ThinkingDisplay,
+}
+
+impl ModelSpecificThinking {
+    pub fn new(model_id: impl Into<String>) -> Self {
+        Self {
+            model_id: model_id.into(),
+            display: ThinkingDisplay::Summarized,
+        }
+    }
+
+    pub fn display(mut self, display: ThinkingDisplay) -> Self {
+        self.display = display;
+        self
+    }
+
+    fn family(&self) -> ModelFamily {
+        let id = self.model_id.to_lowercase();
+        if id.contains("opus-4-7") {
+            ModelFamily::AdaptiveOnly
+        } else if id.contains("opus-4-6") || id.contains("sonnet-4-6") {
+            ModelFamily::AdaptiveFriendly
+        } else if id.contains("opus-4-5") {
+            ModelFamily::LegacyWithEffort
+        } else {
+            ModelFamily::LegacyNoEffort
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+enum ModelFamily {
+    AdaptiveOnly,
+    AdaptiveFriendly,
+    LegacyWithEffort,
+    LegacyNoEffort,
+}
+
+/// Falls back to `max` — the closest supported level, even though it spends
+/// more tokens than `xhigh` would have.
+fn replace_xhigh_with_max(output_config: &mut Option<OutputConfig>) {
+    if let Some(config) = output_config.as_mut()
+        && config.effort == OutputEffort::XHigh
+    {
+        config.effort = OutputEffort::Max;
+    }
+}
+
+fn clamp_effort_to_high(output_config: &mut Option<OutputConfig>) {
+    if let Some(config) = output_config.as_mut()
+        && matches!(config.effort, OutputEffort::XHigh | OutputEffort::Max)
+    {
+        config.effort = OutputEffort::High;
+    }
+}
+
+impl Transformer for ModelSpecificThinking {
+    type Value = Request;
+
+    fn transform(&mut self, mut request: Self::Value) -> Self::Value {
+        match self.family() {
+            ModelFamily::AdaptiveOnly => {
+                if let Some(Thinking::Enabled { budget_tokens }) = request.thinking {
+                    warn!(
+                        model = %self.model_id,
+                        dropped_budget_tokens = budget_tokens,
+                        "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking \
+                         budgets are unsupported. Use `reasoning.effort` to control \
+                         thinking depth instead."
+                    );
+                    request.thinking = Some(Thinking::Adaptive {
+                        display: Some(self.display),
+                    });
+                }
+                request.temperature = None;
+                request.top_p = None;
+                request.top_k = None;
+            }
+            ModelFamily::LegacyWithEffort => {
+                if let Some(Thinking::Adaptive { .. }) = request.thinking {
+                    request.thinking = Some(Thinking::Enabled {
+                        budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS,
+                    });
+                }
+                clamp_effort_to_high(&mut request.output_config);
+            }
+            ModelFamily::LegacyNoEffort => {
+                if let Some(Thinking::Adaptive { .. }) = request.thinking {
+                    request.thinking = Some(Thinking::Enabled {
+                        budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS,
+                    });
+                }
+                if request.output_config.is_some() {
+                    warn!(
+                        model = %self.model_id,
+                        "Dropping `output_config.effort`: the effort parameter is only \
+                         supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7."
+                    );
+                    request.output_config = None;
+                }
+            }
+            ModelFamily::AdaptiveFriendly => {
+                replace_xhigh_with_max(&mut request.output_config);
+            }
+        }
+        request
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use forge_domain::Transformer;
+    use pretty_assertions::assert_eq;
+
+    use super::*;
+    use crate::dto::anthropic::{Request, Thinking};
+
+    fn fixture_request_with_thinking(thinking: Thinking) -> Request {
+        Request::default()
+            .thinking(thinking)
+            .temperature(0.5f32)
+            .top_p(0.9f32)
+            .top_k(40u64)
+    }
+
+    #[test]
+    fn test_opus_4_7_rewrites_enabled_to_adaptive_with_summarized_display() {
+        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
+        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_7_strips_sampling_params() {
+        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
+        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
+
+        assert_eq!(actual.temperature, None);
+        assert_eq!(actual.top_p, None);
+        assert_eq!(actual.top_k, None);
+    }
+
+    #[test]
+    fn test_opus_4_7_strips_sampling_params_even_without_thinking() {
+        let fixture = Request::default().temperature(0.5f32).top_p(0.9f32).top_k(40u64);
+        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
+
+        assert_eq!(actual.temperature, None);
+        assert_eq!(actual.top_p, None);
+        assert_eq!(actual.top_k, None);
+    }
+
+    #[test]
+    fn test_opus_4_7_bedrock_prefix_still_matches() {
+        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
+        let actual =
+            ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture);
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
+        );
+        assert_eq!(actual.temperature, None);
+    }
+
+    #[test]
+    fn test_opus_4_7_passes_through_adaptive_with_existing_display() {
+        let fixture = fixture_request_with_thinking(Thinking::Adaptive {
+            display: Some(ThinkingDisplay::Omitted),
+        });
+        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_6_passes_both_shapes_through() {
+        let fixture1 = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
+        let actual1 = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture1);
+        assert_eq!(actual1.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(actual1.temperature, Some(0.5));
+
+        let fixture2 = fixture_request_with_thinking(Thinking::Adaptive {
+            display: Some(ThinkingDisplay::Summarized),
+        });
+        let actual2 = ModelSpecificThinking::new("claude-sonnet-4-6").transform(fixture2);
+        assert_eq!(
+            actual2.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_5_rewrites_adaptive_to_enabled() {
+        let fixture = fixture_request_with_thinking(Thinking::Adaptive {
+            display: Some(ThinkingDisplay::Summarized),
+        });
+        let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS })
+        );
+    }
+
+    #[test]
+    fn test_legacy_no_effort_model_passes_enabled_through() {
+        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
+        let actual = ModelSpecificThinking::new("claude-3-7-sonnet-20250219").transform(fixture);
+
+        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(actual.temperature, Some(0.5));
+    }
+
+    #[test]
+    fn test_no_thinking_is_preserved_everywhere() {
+        for model in [
+            "claude-opus-4-7",
+            "claude-opus-4-6",
+            "claude-opus-4-5-20251101",
+            "claude-3-7-sonnet-20250219",
+        ] {
+            let fixture = Request::default();
+            let actual = ModelSpecificThinking::new(model).transform(fixture);
+            assert_eq!(actual.thinking, None, "model {}", model);
+        }
+    }
+
+    #[test]
+    fn test_opus_4_7_uses_configured_display_when_rewriting_enabled() {
+        // The caller's `exclude: true` preference (→ `Omitted` display) must
+        // survive the 4.7 `Enabled → Adaptive` rewrite; otherwise hidden
+        // reasoning would silently become visible.
+        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
+        let actual = ModelSpecificThinking::new("claude-opus-4-7")
+            .display(ThinkingDisplay::Omitted)
+            .transform(fixture);
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_7_preserves_effort_when_dropping_budget() {
+        // When both `max_tokens` and `effort` are set on 4.7, dropping the
+        // budget must not take the effort signal with it — effort is the only
+        // remaining depth knob on 4.7.
+        let fixture = Request::default()
+            .thinking(Thinking::Enabled { budget_tokens: 8000 })
+            .output_config(OutputConfig { effort: OutputEffort::XHigh });
+        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
+
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
+        );
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::XHigh })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_7_preserves_xhigh_effort() {
+        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
+        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
+
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::XHigh })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_6_replaces_xhigh_with_max() {
+        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
+        let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture);
+
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::Max })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_5_clamps_xhigh_to_high() {
+        // Opus 4.5 supports effort but not xhigh or max; clamp to high.
+        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
+        let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
+
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::High })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_5_clamps_max_to_high() {
+        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::Max });
+        let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
+
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::High })
+        );
+    }
+
+    #[test]
+    fn test_opus_4_5_preserves_supported_effort_levels() {
+        for level in [OutputEffort::Low, OutputEffort::Medium, OutputEffort::High] {
+            let fixture = Request::default().output_config(OutputConfig { effort: level });
+            let actual =
+                ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
+            assert_eq!(
+                actual.output_config,
+                Some(OutputConfig { effort: level }),
+                "level {:?}",
+                level
+            );
+        }
+    }
+
+    #[test]
+    fn test_legacy_no_effort_models_drop_output_config() {
+        for model in [
+            "claude-sonnet-4-5-20250929",
+            "claude-haiku-4-5-20251001",
+            "claude-opus-4-1-20250805",
+            "claude-opus-4-20250514",
+            "claude-3-7-sonnet-20250219",
+        ] {
+            let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High });
+            let actual = ModelSpecificThinking::new(model).transform(fixture);
+            assert_eq!(actual.output_config, None, "model {}", model);
+        }
+    }
+
+    #[test]
+    fn test_adaptive_friendly_preserves_high_effort() {
+        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High });
+        let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture);
+
+        assert_eq!(
+            actual.output_config,
+            Some(OutputConfig { effort: OutputEffort::High })
+        );
+    }
+}
diff --git a/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs b/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs
index cfff041f18..1c82876a1c 100644
--- a/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs
+++ b/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs
@@ -5,10 +5,10 @@ pub struct ReasoningTransform;
 impl Transformer for ReasoningTransform {
     type Value = Context;
     fn transform(&mut self, mut context: Self::Value) -> Self::Value {
-        if let Some(reasoning) = context.reasoning.as_ref()
-            && reasoning.enabled.unwrap_or(false)
-        {
-            // if reasoning is enabled then we've to drop top_k and top_p
+        // Must stay in lockstep with the Anthropic request builder, which gates
+        // on the same predicate — otherwise `thinking`/`output_config` ship
+        // alongside sampling params that Anthropic rejects.
+        if context.is_reasoning_supported() {
             context.top_k = None;
             context.top_p = None;
         }
@@ -85,4 +85,51 @@ mod tests {
 
         assert_eq!(actual, expected);
     }
+
+    #[test]
+    fn test_enabled_none_with_effort_still_strips_top_k_and_top_p() {
+        // `enabled: None` + effort is treated as reasoning-on (domain rule).
+        let fixture = create_context_fixture().reasoning(ReasoningConfig {
+            enabled: None,
+            max_tokens: None,
+            effort: Some(forge_domain::Effort::High),
+            exclude: None,
+        });
+        let mut transformer = ReasoningTransform;
+        let actual = transformer.transform(fixture);
+
+        assert_eq!(actual.top_k, None);
+        assert_eq!(actual.top_p, None);
+    }
+
+    #[test]
+    fn test_enabled_none_with_positive_max_tokens_still_strips_top_k_and_top_p() {
+        let fixture = create_context_fixture().reasoning(ReasoningConfig {
+            enabled: None,
+            max_tokens: Some(8000),
+            effort: None,
+            exclude: None,
+        });
+        let mut transformer = ReasoningTransform;
+        let actual = transformer.transform(fixture);
+
+        assert_eq!(actual.top_k, None);
+        assert_eq!(actual.top_p, None);
+    }
+
+    #[test]
+    fn test_enabled_none_with_zero_max_tokens_preserves_top_k_and_top_p() {
+        // Matches `is_reasoning_supported`: max_tokens == 0 is treated as off.
+        let fixture = create_context_fixture().reasoning(ReasoningConfig {
+            enabled: None,
+            max_tokens: Some(0),
+            effort: None,
+            exclude: None,
+        });
+        let mut transformer = ReasoningTransform;
+        let actual = transformer.transform(fixture.clone());
+
+        assert_eq!(actual.top_k, fixture.top_k);
+        assert_eq!(actual.top_p, fixture.top_p);
+    }
 }
diff --git a/crates/forge_domain/src/context.rs b/crates/forge_domain/src/context.rs
index 664000e1eb..13e6c3398b 100644
--- a/crates/forge_domain/src/context.rs
+++ b/crates/forge_domain/src/context.rs
@@ -640,6 +640,12 @@ impl Context {
     /// Checks if reasoning is enabled by user or not.
     pub fn is_reasoning_supported(&self) -> bool {
         self.reasoning.as_ref().is_some_and(|reasoning| {
+            // `Effort::None` is a strong opt-out that wins over `enabled` and
+            // `max_tokens`.
+            if matches!(reasoning.effort, Some(crate::Effort::None)) {
+                return false;
+            }
+
             // When enabled parameter is defined then return it's value directly.
             if reasoning.enabled.is_some() {
                 return reasoning.enabled.unwrap_or_default();
@@ -1193,6 +1199,37 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_context_is_reasoning_not_supported_when_effort_is_none() {
+        // `Effort::None` is documented as "skips the thinking step entirely" and
+        // must act as an explicit opt-out regardless of other fields.
+        let fixture = Context::default().reasoning(crate::ReasoningConfig {
+            effort: Some(crate::Effort::None),
+            ..Default::default()
+        });
+
+        let actual = fixture.is_reasoning_supported();
+
+        assert!(!actual);
+    }
+
+    #[test]
+    fn test_context_is_reasoning_not_supported_when_effort_none_overrides_enabled_true() {
+        let fixture = Context::default().reasoning(crate::ReasoningConfig {
+            enabled: Some(true),
+            effort: Some(crate::Effort::None),
+            max_tokens: Some(8000),
+            ..Default::default()
+        });
+
+        let actual = fixture.is_reasoning_supported();
+
+        assert!(
+            !actual,
+            "Effort::None must win over enabled: true and max_tokens"
+        );
+    }
+
     #[test]
     fn test_add_attachments_file_content_is_droppable() {
         let fixture_attachments = vec![Attachment {
diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs
index 3292f5ab9f..1c9924901d 100644
--- a/crates/forge_repo/src/provider/anthropic.rs
+++ b/crates/forge_repo/src/provider/anthropic.rs
@@ -7,8 +7,8 @@ use forge_app::domain::{
 };
 use forge_app::dto::anthropic::{
     AuthSystemMessage, CapitalizeToolNames, DropInvalidToolUse, EnforceStrictObjectSchema,
-    EventData, ListModelResponse, ReasoningTransform, RemoveOutputFormat, Request, SanitizeToolIds,
-    SetCache,
+    EventData, ListModelResponse, ModelSpecificThinking, ReasoningTransform, RemoveOutputFormat,
+    Request, SanitizeToolIds, SetCache, ThinkingDisplay,
 };
 use forge_app::{EnvironmentInfra, HttpInfra};
 use forge_domain::{ChatRepository, Provider, ProviderId};
@@ -101,6 +101,19 @@ impl<T: HttpInfra> Anthropic<T> {
         // transform the context to match the request format
         let context = ReasoningTransform.transform(context);
 
+        // Read out of `context` before `Request::try_from` consumes it.
+        let display_preference = context
+            .reasoning
+            .as_ref()
+            .and_then(|r| r.exclude)
+            .map(|exclude| {
+                if exclude {
+                    ThinkingDisplay::Omitted
+                } else {
+                    ThinkingDisplay::Summarized
+                }
+            });
+
         let mut request = Request::try_from(context)?.max_tokens(max_tokens as u64);
 
         // For Vertex AI Anthropic, model is in the URL path, not the request body
@@ -110,11 +123,16 @@ impl<T: HttpInfra> Anthropic<T> {
             request = request.model(model.as_str().to_string());
         }
 
+        let mut thinking_transform = ModelSpecificThinking::new(model.as_str());
+        if let Some(display) = display_preference {
+            thinking_transform = thinking_transform.display(display);
+        }
         let pipeline = AuthSystemMessage::default()
             .when(|_| self.use_oauth)
             .pipe(CapitalizeToolNames)
             .pipe(DropInvalidToolUse)
-            .pipe(SanitizeToolIds);
+            .pipe(SanitizeToolIds)
+            .pipe(thinking_transform);
 
         // Vertex AI does not support output_format, so we skip schema enforcement
         // and remove any output_format field

From 17cc84cb6733e82d47cbc0ef749b3af2a6c68aea Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 12:16:52 +0000
Subject: [PATCH 2/5] [autofix.ci] apply automated fixes

---
 crates/forge_app/src/dto/anthropic/request.rs | 23 ++++++---
 .../transforms/model_specific_thinking.rs     | 47 +++++++++++--------
 crates/forge_repo/src/provider/anthropic.rs   | 23 ++++-----
 3 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/crates/forge_app/src/dto/anthropic/request.rs b/crates/forge_app/src/dto/anthropic/request.rs
index 32a91f35d8..addd8421c8 100644
--- a/crates/forge_app/src/dto/anthropic/request.rs
+++ b/crates/forge_app/src/dto/anthropic/request.rs
@@ -127,8 +127,7 @@ impl TryFrom<forge_domain::Context> for Request {
         // Gate on the domain rule so inherited configs with `enabled: None` but
         // a positive effort / `max_tokens` still emit reasoning on the wire.
         let reasoning_on = request.is_reasoning_supported();
-        let (thinking, output_config) = if reasoning_on && let Some(reasoning) = request.reasoning
-        {
+        let (thinking, output_config) = if reasoning_on && let Some(reasoning) = request.reasoning {
             // Adaptive thinking on 4.7 hides reasoning content by default; opting
             // into reasoning should surface it unless the caller set `exclude`.
             let adaptive_display = if reasoning.exclude == Some(true) {
@@ -583,7 +582,10 @@ mod tests {
 
         let actual = Request::try_from(fixture).unwrap();
 
-        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Enabled { budget_tokens: 8000 })
+        );
         assert_eq!(actual.output_config, None);
     }
 
@@ -599,7 +601,10 @@ mod tests {
 
         let actual = Request::try_from(fixture).unwrap();
 
-        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Enabled { budget_tokens: 8000 })
+        );
         assert_eq!(
             actual.output_config,
             Some(OutputConfig { effort: OutputEffort::Low })
@@ -617,7 +622,10 @@ mod tests {
 
         let actual = Request::try_from(fixture).unwrap();
 
-        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Enabled { budget_tokens: 8000 })
+        );
         assert_eq!(actual.output_config, None);
     }
 
@@ -738,7 +746,10 @@ mod tests {
 
         let actual = Request::try_from(fixture).unwrap();
 
-        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Enabled { budget_tokens: 8000 })
+        );
     }
 
     #[test]
diff --git a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
index f20bc95535..4c7157f3ca 100644
--- a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
+++ b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
@@ -99,9 +99,7 @@ impl Transformer for ModelSpecificThinking {
                          budgets are unsupported. Use `reasoning.effort` to control \
                          thinking depth instead."
                     );
-                    request.thinking = Some(Thinking::Adaptive {
-                        display: Some(self.display),
-                    });
+                    request.thinking = Some(Thinking::Adaptive { display: Some(self.display) });
                 }
                 request.temperature = None;
                 request.top_p = None;
@@ -109,17 +107,15 @@ impl Transformer for ModelSpecificThinking {
             }
             ModelFamily::LegacyWithEffort => {
                 if let Some(Thinking::Adaptive { .. }) = request.thinking {
-                    request.thinking = Some(Thinking::Enabled {
-                        budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS,
-                    });
+                    request.thinking =
+                        Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS });
                 }
                 clamp_effort_to_high(&mut request.output_config);
             }
             ModelFamily::LegacyNoEffort => {
                 if let Some(Thinking::Adaptive { .. }) = request.thinking {
-                    request.thinking = Some(Thinking::Enabled {
-                        budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS,
-                    });
+                    request.thinking =
+                        Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS });
                 }
                 if request.output_config.is_some() {
                     warn!(
@@ -177,7 +173,10 @@ mod tests {
 
     #[test]
     fn test_opus_4_7_strips_sampling_params_even_without_thinking() {
-        let fixture = Request::default().temperature(0.5f32).top_p(0.9f32).top_k(40u64);
+        let fixture = Request::default()
+            .temperature(0.5f32)
+            .top_p(0.9f32)
+            .top_k(40u64);
         let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
 
         assert_eq!(actual.temperature, None);
@@ -188,8 +187,7 @@ mod tests {
     #[test]
     fn test_opus_4_7_bedrock_prefix_still_matches() {
         let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
-        let actual =
-            ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture);
+        let actual = ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture);
 
         assert_eq!(
             actual.thinking,
@@ -215,7 +213,10 @@ mod tests {
     fn test_opus_4_6_passes_both_shapes_through() {
         let fixture1 = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
         let actual1 = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture1);
-        assert_eq!(actual1.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(
+            actual1.thinking,
+            Some(Thinking::Enabled { budget_tokens: 8000 })
+        );
         assert_eq!(actual1.temperature, Some(0.5));
 
         let fixture2 = fixture_request_with_thinking(Thinking::Adaptive {
@@ -246,7 +247,10 @@ mod tests {
         let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
         let actual = ModelSpecificThinking::new("claude-3-7-sonnet-20250219").transform(fixture);
 
-        assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 }));
+        assert_eq!(
+            actual.thinking,
+            Some(Thinking::Enabled { budget_tokens: 8000 })
+        );
         assert_eq!(actual.temperature, Some(0.5));
     }
 
@@ -302,7 +306,8 @@ mod tests {
 
     #[test]
     fn test_opus_4_7_preserves_xhigh_effort() {
-        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
+        let fixture =
+            Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
         let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
 
         assert_eq!(
@@ -313,7 +318,8 @@ mod tests {
 
     #[test]
     fn test_opus_4_6_replaces_xhigh_with_max() {
-        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
+        let fixture =
+            Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
         let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture);
 
         assert_eq!(
@@ -325,7 +331,8 @@ mod tests {
     #[test]
     fn test_opus_4_5_clamps_xhigh_to_high() {
         // Opus 4.5 supports effort but not xhigh or max; clamp to high.
-        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
+        let fixture =
+            Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
         let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
 
         assert_eq!(
@@ -349,8 +356,7 @@ mod tests {
     fn test_opus_4_5_preserves_supported_effort_levels() {
         for level in [OutputEffort::Low, OutputEffort::Medium, OutputEffort::High] {
             let fixture = Request::default().output_config(OutputConfig { effort: level });
-            let actual =
-                ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
+            let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
             assert_eq!(
                 actual.output_config,
                 Some(OutputConfig { effort: level }),
@@ -369,7 +375,8 @@ mod tests {
             "claude-opus-4-20250514",
             "claude-3-7-sonnet-20250219",
         ] {
-            let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High });
+            let fixture =
+                Request::default().output_config(OutputConfig { effort: OutputEffort::High });
             let actual = ModelSpecificThinking::new(model).transform(fixture);
             assert_eq!(actual.output_config, None, "model {}", model);
         }
diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs
index 1c9924901d..8429aa11e0 100644
--- a/crates/forge_repo/src/provider/anthropic.rs
+++ b/crates/forge_repo/src/provider/anthropic.rs
@@ -102,17 +102,18 @@ impl<T: HttpInfra> Anthropic<T> {
         let context = ReasoningTransform.transform(context);
 
         // Read out of `context` before `Request::try_from` consumes it.
-        let display_preference = context
-            .reasoning
-            .as_ref()
-            .and_then(|r| r.exclude)
-            .map(|exclude| {
-                if exclude {
-                    ThinkingDisplay::Omitted
-                } else {
-                    ThinkingDisplay::Summarized
-                }
-            });
+        let display_preference =
+            context
+                .reasoning
+                .as_ref()
+                .and_then(|r| r.exclude)
+                .map(|exclude| {
+                    if exclude {
+                        ThinkingDisplay::Omitted
+                    } else {
+                        ThinkingDisplay::Summarized
+                    }
+                });
 
         let mut request = Request::try_from(context)?.max_tokens(max_tokens as u64);
 

From 0e18a098caa9ebe0293f85ff449f9a0c257f13be Mon Sep 17 00:00:00 2001
From: Amit Singh <amitksingh1490@gmail.com>
Date: Sat, 18 Apr 2026 21:39:43 +0530
Subject: [PATCH 3/5] refactor(anthropic): move model-specific reasoning
 normalization from dto to transformers

---
 crates/forge_app/src/dto/anthropic/request.rs |   3 +-
 .../src/dto/anthropic/transforms/mod.rs       |   2 -
 .../transforms/model_specific_thinking.rs     | 395 ------------------
 crates/forge_app/src/orch.rs                  |   8 +-
 crates/forge_app/src/transformers/mod.rs      |   2 +
 .../transformers/model_specific_reasoning.rs  | 392 +++++++++++++++++
 crates/forge_repo/src/provider/anthropic.rs   | 182 ++++++--
 crates/forge_repo/src/provider/bedrock.rs     | 304 ++++++++++++--
 8 files changed, 818 insertions(+), 470 deletions(-)
 delete mode 100644 crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
 create mode 100644 crates/forge_app/src/transformers/model_specific_reasoning.rs

diff --git a/crates/forge_app/src/dto/anthropic/request.rs b/crates/forge_app/src/dto/anthropic/request.rs
index addd8421c8..72aa4496ec 100644
--- a/crates/forge_app/src/dto/anthropic/request.rs
+++ b/crates/forge_app/src/dto/anthropic/request.rs
@@ -61,7 +61,8 @@ impl SystemMessage {
 }
 
 /// Anthropic's `thinking` request field. Opus 4.7 rejects the `Enabled` shape
-/// with a 400 — the `ModelSpecificThinking` transformer handles the migration.
+/// and the orchestrator applies model-specific reasoning normalization before
+/// request conversion.
 #[derive(Serialize, Debug, PartialEq, Eq)]
 #[serde(tag = "type", rename_all = "lowercase")]
 pub enum Thinking {
diff --git a/crates/forge_app/src/dto/anthropic/transforms/mod.rs b/crates/forge_app/src/dto/anthropic/transforms/mod.rs
index b9a3b2f765..3ea6d5f183 100644
--- a/crates/forge_app/src/dto/anthropic/transforms/mod.rs
+++ b/crates/forge_app/src/dto/anthropic/transforms/mod.rs
@@ -2,7 +2,6 @@ mod auth_system_message;
 mod capitalize_tool_names;
 mod drop_invalid_toolcalls;
 mod enforce_schema;
-mod model_specific_thinking;
 mod reasoning_transform;
 mod remove_output_format;
 mod sanitize_tool_ids;
@@ -12,7 +11,6 @@ pub use auth_system_message::AuthSystemMessage;
 pub use capitalize_tool_names::CapitalizeToolNames;
 pub use drop_invalid_toolcalls::DropInvalidToolUse;
 pub use enforce_schema::EnforceStrictObjectSchema;
-pub use model_specific_thinking::ModelSpecificThinking;
 pub use reasoning_transform::ReasoningTransform;
 pub use remove_output_format::RemoveOutputFormat;
 pub use sanitize_tool_ids::SanitizeToolIds;
diff --git a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
deleted file mode 100644
index 4c7157f3ca..0000000000
--- a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs
+++ /dev/null
@@ -1,395 +0,0 @@
-use forge_domain::Transformer;
-use tracing::warn;
-
-use crate::dto::anthropic::{OutputConfig, OutputEffort, Request, Thinking, ThinkingDisplay};
-
-/// Default budget applied when converting `Adaptive` thinking to the legacy
-/// `Enabled` shape for pre-4.6 models that don't accept adaptive.
-const DEFAULT_LEGACY_BUDGET_TOKENS: u64 = 10000;
-
-/// Rewrites the `thinking` and `output_config` fields and strips rejected
-/// sampling parameters to match the target model's API contract. Classifies
-/// models into four tiers based on the official Anthropic docs:
-///
-/// | Tier                | Matching ids                   | Thinking          | Effort     | xhigh | max |
-/// |---------------------|--------------------------------|-------------------|------------|-------|-----|
-/// | `AdaptiveOnly`      | `opus-4-7`                     | adaptive only     | ✓          | ✓     | ✓   |
-/// | `AdaptiveFriendly`  | `opus-4-6`, `sonnet-4-6`       | both shapes       | ✓          | →max  | ✓   |
-/// | `LegacyWithEffort`  | `opus-4-5`                     | enabled only      | ✓          | →high | →high |
-/// | `LegacyNoEffort`    | everything else                | enabled only      | drop       | drop  | drop |
-///
-/// Additional behaviour:
-/// - `AdaptiveOnly` strips `temperature`/`top_p`/`top_k` (4.7 rejects non-
-///   defaults) and warns when a caller-supplied `budget_tokens` is dropped.
-/// - The display preference carried from the caller's `ReasoningConfig`
-///   (`exclude: true` → `Omitted`) is applied when rewriting `Enabled` to
-///   `Adaptive` on 4.7.
-pub struct ModelSpecificThinking {
-    model_id: String,
-    /// Defaults to `Summarized` because the legacy `Enabled` shape always
-    /// produced visible reasoning; a 4.7 migration should preserve that unless
-    /// the caller opts out via `exclude`.
-    display: ThinkingDisplay,
-}
-
-impl ModelSpecificThinking {
-    pub fn new(model_id: impl Into<String>) -> Self {
-        Self {
-            model_id: model_id.into(),
-            display: ThinkingDisplay::Summarized,
-        }
-    }
-
-    pub fn display(mut self, display: ThinkingDisplay) -> Self {
-        self.display = display;
-        self
-    }
-
-    fn family(&self) -> ModelFamily {
-        let id = self.model_id.to_lowercase();
-        if id.contains("opus-4-7") {
-            ModelFamily::AdaptiveOnly
-        } else if id.contains("opus-4-6") || id.contains("sonnet-4-6") {
-            ModelFamily::AdaptiveFriendly
-        } else if id.contains("opus-4-5") {
-            ModelFamily::LegacyWithEffort
-        } else {
-            ModelFamily::LegacyNoEffort
-        }
-    }
-}
-
-#[derive(Debug, PartialEq, Eq)]
-enum ModelFamily {
-    AdaptiveOnly,
-    AdaptiveFriendly,
-    LegacyWithEffort,
-    LegacyNoEffort,
-}
-
-/// Falls back to `max` — the closest supported level, even though it spends
-/// more tokens than `xhigh` would have.
-fn replace_xhigh_with_max(output_config: &mut Option<OutputConfig>) {
-    if let Some(config) = output_config.as_mut()
-        && config.effort == OutputEffort::XHigh
-    {
-        config.effort = OutputEffort::Max;
-    }
-}
-
-fn clamp_effort_to_high(output_config: &mut Option<OutputConfig>) {
-    if let Some(config) = output_config.as_mut()
-        && matches!(config.effort, OutputEffort::XHigh | OutputEffort::Max)
-    {
-        config.effort = OutputEffort::High;
-    }
-}
-
-impl Transformer for ModelSpecificThinking {
-    type Value = Request;
-
-    fn transform(&mut self, mut request: Self::Value) -> Self::Value {
-        match self.family() {
-            ModelFamily::AdaptiveOnly => {
-                if let Some(Thinking::Enabled { budget_tokens }) = request.thinking {
-                    warn!(
-                        model = %self.model_id,
-                        dropped_budget_tokens = budget_tokens,
-                        "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking \
-                         budgets are unsupported. Use `reasoning.effort` to control \
-                         thinking depth instead."
-                    );
-                    request.thinking = Some(Thinking::Adaptive { display: Some(self.display) });
-                }
-                request.temperature = None;
-                request.top_p = None;
-                request.top_k = None;
-            }
-            ModelFamily::LegacyWithEffort => {
-                if let Some(Thinking::Adaptive { .. }) = request.thinking {
-                    request.thinking =
-                        Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS });
-                }
-                clamp_effort_to_high(&mut request.output_config);
-            }
-            ModelFamily::LegacyNoEffort => {
-                if let Some(Thinking::Adaptive { .. }) = request.thinking {
-                    request.thinking =
-                        Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS });
-                }
-                if request.output_config.is_some() {
-                    warn!(
-                        model = %self.model_id,
-                        "Dropping `output_config.effort`: the effort parameter is only \
-                         supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7."
-                    );
-                    request.output_config = None;
-                }
-            }
-            ModelFamily::AdaptiveFriendly => {
-                replace_xhigh_with_max(&mut request.output_config);
-            }
-        }
-        request
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use forge_domain::Transformer;
-    use pretty_assertions::assert_eq;
-
-    use super::*;
-    use crate::dto::anthropic::{Request, Thinking};
-
-    fn fixture_request_with_thinking(thinking: Thinking) -> Request {
-        Request::default()
-            .thinking(thinking)
-            .temperature(0.5f32)
-            .top_p(0.9f32)
-            .top_k(40u64)
-    }
-
-    #[test]
-    fn test_opus_4_7_rewrites_enabled_to_adaptive_with_summarized_display() {
-        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
-        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
-
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_7_strips_sampling_params() {
-        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
-        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
-
-        assert_eq!(actual.temperature, None);
-        assert_eq!(actual.top_p, None);
-        assert_eq!(actual.top_k, None);
-    }
-
-    #[test]
-    fn test_opus_4_7_strips_sampling_params_even_without_thinking() {
-        let fixture = Request::default()
-            .temperature(0.5f32)
-            .top_p(0.9f32)
-            .top_k(40u64);
-        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
-
-        assert_eq!(actual.temperature, None);
-        assert_eq!(actual.top_p, None);
-        assert_eq!(actual.top_k, None);
-    }
-
-    #[test]
-    fn test_opus_4_7_bedrock_prefix_still_matches() {
-        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
-        let actual = ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture);
-
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
-        );
-        assert_eq!(actual.temperature, None);
-    }
-
-    #[test]
-    fn test_opus_4_7_passes_through_adaptive_with_existing_display() {
-        let fixture = fixture_request_with_thinking(Thinking::Adaptive {
-            display: Some(ThinkingDisplay::Omitted),
-        });
-        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
-
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_6_passes_both_shapes_through() {
-        let fixture1 = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
-        let actual1 = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture1);
-        assert_eq!(
-            actual1.thinking,
-            Some(Thinking::Enabled { budget_tokens: 8000 })
-        );
-        assert_eq!(actual1.temperature, Some(0.5));
-
-        let fixture2 = fixture_request_with_thinking(Thinking::Adaptive {
-            display: Some(ThinkingDisplay::Summarized),
-        });
-        let actual2 = ModelSpecificThinking::new("claude-sonnet-4-6").transform(fixture2);
-        assert_eq!(
-            actual2.thinking,
-            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_5_rewrites_adaptive_to_enabled() {
-        let fixture = fixture_request_with_thinking(Thinking::Adaptive {
-            display: Some(ThinkingDisplay::Summarized),
-        });
-        let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
-
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS })
-        );
-    }
-
-    #[test]
-    fn test_legacy_no_effort_model_passes_enabled_through() {
-        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
-        let actual = ModelSpecificThinking::new("claude-3-7-sonnet-20250219").transform(fixture);
-
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking::Enabled { budget_tokens: 8000 })
-        );
-        assert_eq!(actual.temperature, Some(0.5));
-    }
-
-    #[test]
-    fn test_no_thinking_is_preserved_everywhere() {
-        for model in [
-            "claude-opus-4-7",
-            "claude-opus-4-6",
-            "claude-opus-4-5-20251101",
-            "claude-3-7-sonnet-20250219",
-        ] {
-            let fixture = Request::default();
-            let actual = ModelSpecificThinking::new(model).transform(fixture);
-            assert_eq!(actual.thinking, None, "model {}", model);
-        }
-    }
-
-    #[test]
-    fn test_opus_4_7_uses_configured_display_when_rewriting_enabled() {
-        // The caller's `exclude: true` preference (→ `Omitted` display) must
-        // survive the 4.7 `Enabled → Adaptive` rewrite; otherwise hidden
-        // reasoning would silently become visible.
-        let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 });
-        let actual = ModelSpecificThinking::new("claude-opus-4-7")
-            .display(ThinkingDisplay::Omitted)
-            .transform(fixture);
-
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_7_preserves_effort_when_dropping_budget() {
-        // When both `max_tokens` and `effort` are set on 4.7, dropping the
-        // budget must not take the effort signal with it — effort is the only
-        // remaining depth knob on 4.7.
-        let fixture = Request::default()
-            .thinking(Thinking::Enabled { budget_tokens: 8000 })
-            .output_config(OutputConfig { effort: OutputEffort::XHigh });
-        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
-
-        assert_eq!(
-            actual.thinking,
-            Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) })
-        );
-        assert_eq!(
-            actual.output_config,
-            Some(OutputConfig { effort: OutputEffort::XHigh })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_7_preserves_xhigh_effort() {
-        let fixture =
-            Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
-        let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture);
-
-        assert_eq!(
-            actual.output_config,
-            Some(OutputConfig { effort: OutputEffort::XHigh })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_6_replaces_xhigh_with_max() {
-        let fixture =
-            Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
-        let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture);
-
-        assert_eq!(
-            actual.output_config,
-            Some(OutputConfig { effort: OutputEffort::Max })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_5_clamps_xhigh_to_high() {
-        // Opus 4.5 supports effort but not xhigh or max; clamp to high.
-        let fixture =
-            Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh });
-        let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
-
-        assert_eq!(
-            actual.output_config,
-            Some(OutputConfig { effort: OutputEffort::High })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_5_clamps_max_to_high() {
-        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::Max });
-        let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
-
-        assert_eq!(
-            actual.output_config,
-            Some(OutputConfig { effort: OutputEffort::High })
-        );
-    }
-
-    #[test]
-    fn test_opus_4_5_preserves_supported_effort_levels() {
-        for level in [OutputEffort::Low, OutputEffort::Medium, OutputEffort::High] {
-            let fixture = Request::default().output_config(OutputConfig { effort: level });
-            let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture);
-            assert_eq!(
-                actual.output_config,
-                Some(OutputConfig { effort: level }),
-                "level {:?}",
-                level
-            );
-        }
-    }
-
-    #[test]
-    fn test_legacy_no_effort_models_drop_output_config() {
-        for model in [
-            "claude-sonnet-4-5-20250929",
-            "claude-haiku-4-5-20251001",
-            "claude-opus-4-1-20250805",
-            "claude-opus-4-20250514",
-            "claude-3-7-sonnet-20250219",
-        ] {
-            let fixture =
-                Request::default().output_config(OutputConfig { effort: OutputEffort::High });
-            let actual = ModelSpecificThinking::new(model).transform(fixture);
-            assert_eq!(actual.output_config, None, "model {}", model);
-        }
-    }
-
-    #[test]
-    fn test_adaptive_friendly_preserves_high_effort() {
-        let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High });
-        let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture);
-
-        assert_eq!(
-            actual.output_config,
-            Some(OutputConfig { effort: OutputEffort::High })
-        );
-    }
-}
diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs
index 86157c24e2..a81145ac80 100644
--- a/crates/forge_app/src/orch.rs
+++ b/crates/forge_app/src/orch.rs
@@ -11,6 +11,7 @@ use tokio::sync::Notify;
 use tracing::warn;
 
 use crate::agent::AgentService;
+use crate::transformers::ModelSpecificReasoning;
 use crate::{EnvironmentInfra, TemplateEngine};
 
 #[derive(Clone, Setters)]
@@ -208,7 +209,12 @@ impl<S: AgentService + EnvironmentInfra<Config = forge_config::ForgeConfig>> Orc
             .pipe(DropReasoningDetails.when(|_| !reasoning_supported))
             // Strip all reasoning from messages when the model has changed (signatures are
             // model-specific and invalid across models). No-op when model is unchanged.
-            .pipe(ReasoningNormalizer::new(model_id.clone()));
+            .pipe(ReasoningNormalizer::new(model_id.clone()))
+            // Normalize Anthropic reasoning knobs per model family before provider conversion.
+            .pipe(
+                ModelSpecificReasoning::new(model_id.as_str())
+                    .when(|_| model_id.as_str().to_lowercase().contains("claude")),
+            );
         let response = self
             .services
             .chat_agent(
diff --git a/crates/forge_app/src/transformers/mod.rs b/crates/forge_app/src/transformers/mod.rs
index ef434b62dd..a8b84543ea 100644
--- a/crates/forge_app/src/transformers/mod.rs
+++ b/crates/forge_app/src/transformers/mod.rs
@@ -1,7 +1,9 @@
 mod compaction;
 mod dedupe_role;
 mod drop_role;
+mod model_specific_reasoning;
 mod strip_working_dir;
 mod trim_context_summary;
 
 pub use compaction::SummaryTransformer;
+pub(crate) use model_specific_reasoning::ModelSpecificReasoning;
diff --git a/crates/forge_app/src/transformers/model_specific_reasoning.rs b/crates/forge_app/src/transformers/model_specific_reasoning.rs
new file mode 100644
index 0000000000..d3d1788b54
--- /dev/null
+++ b/crates/forge_app/src/transformers/model_specific_reasoning.rs
@@ -0,0 +1,392 @@
+use forge_domain::{Context, Effort, ReasoningConfig, Transformer};
+use tracing::warn;
+
+/// Default budget applied when converting adaptive-style reasoning into legacy
+/// budget-based reasoning for pre-4.6 Anthropic model families.
+const DEFAULT_LEGACY_BUDGET_TOKENS: usize = 10000;
+
+#[derive(Debug, PartialEq, Eq)]
+enum AnthropicModelFamily {
+    AdaptiveOnly,
+    AdaptiveFriendly,
+    LegacyWithEffort,
+    LegacyNoEffort,
+}
+
+/// Normalizes reasoning knobs for Anthropic model families before provider
+/// conversion.
+pub(crate) struct ModelSpecificReasoning {
+    model_id: String,
+}
+
+impl ModelSpecificReasoning {
+    /// Creates a model-specific reasoning normalizer for the given model id.
+    pub(crate) fn new(model_id: impl Into<String>) -> Self {
+        Self { model_id: model_id.into() }
+    }
+
+    fn family(&self) -> AnthropicModelFamily {
+        let id = self.model_id.to_lowercase();
+        if id.contains("opus-4-7") {
+            AnthropicModelFamily::AdaptiveOnly
+        } else if id.contains("opus-4-6") || id.contains("sonnet-4-6") {
+            AnthropicModelFamily::AdaptiveFriendly
+        } else if id.contains("opus-4-5") {
+            AnthropicModelFamily::LegacyWithEffort
+        } else {
+            AnthropicModelFamily::LegacyNoEffort
+        }
+    }
+}
+
+fn replace_xhigh_with_max(reasoning: &mut Option<ReasoningConfig>) {
+    if let Some(reasoning) = reasoning.as_mut()
+        && reasoning.effort == Some(Effort::XHigh)
+    {
+        reasoning.effort = Some(Effort::Max);
+    }
+}
+
+fn clamp_effort_to_high(reasoning: &mut Option<ReasoningConfig>) {
+    if let Some(reasoning) = reasoning.as_mut()
+        && matches!(reasoning.effort, Some(Effort::XHigh | Effort::Max))
+    {
+        reasoning.effort = Some(Effort::High);
+    }
+}
+
+fn set_default_legacy_budget(reasoning: &mut Option<ReasoningConfig>) {
+    if let Some(reasoning) = reasoning.as_mut()
+        && reasoning.max_tokens.is_none()
+    {
+        reasoning.max_tokens = Some(DEFAULT_LEGACY_BUDGET_TOKENS);
+    }
+}
+
+impl Transformer for ModelSpecificReasoning {
+    type Value = Context;
+
+    fn transform(&mut self, mut context: Self::Value) -> Self::Value {
+        let reasoning_on = context.is_reasoning_supported();
+
+        match self.family() {
+            AnthropicModelFamily::AdaptiveOnly => {
+                if reasoning_on
+                    && let Some(reasoning) = context.reasoning.as_mut()
+                    && let Some(max_tokens) = reasoning.max_tokens.take()
+                {
+                    warn!(
+                        model = %self.model_id,
+                        dropped_max_tokens = max_tokens,
+                        "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking budgets are unsupported. Use `reasoning.effort` to control thinking depth instead."
+                    );
+                }
+                context.temperature = None;
+                context.top_p = None;
+                context.top_k = None;
+            }
+            AnthropicModelFamily::AdaptiveFriendly => {
+                if reasoning_on {
+                    replace_xhigh_with_max(&mut context.reasoning);
+                }
+            }
+            AnthropicModelFamily::LegacyWithEffort => {
+                if reasoning_on {
+                    set_default_legacy_budget(&mut context.reasoning);
+                    clamp_effort_to_high(&mut context.reasoning);
+                }
+            }
+            AnthropicModelFamily::LegacyNoEffort => {
+                if reasoning_on {
+                    set_default_legacy_budget(&mut context.reasoning);
+                    if let Some(reasoning) = context.reasoning.as_mut()
+                        && reasoning.effort.is_some()
+                    {
+                        warn!(
+                            model = %self.model_id,
+                            "Dropping `reasoning.effort`: the effort parameter is only supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7."
+                        );
+                        reasoning.effort = None;
+                    }
+                }
+            }
+        }
+
+        context
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use forge_domain::{Context, Effort, ReasoningConfig, Temperature, TopK, TopP, Transformer};
+    use pretty_assertions::assert_eq;
+
+    use super::*;
+
+    fn fixture_context_with_sampling() -> Context {
+        Context::default()
+            .temperature(Temperature::new(0.5).unwrap())
+            .top_p(TopP::new(0.9).unwrap())
+            .top_k(TopK::new(40).unwrap())
+    }
+
+    #[test]
+    fn test_opus_4_7_drops_max_tokens_and_sampling_params() {
+        let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(8000),
+            effort: Some(Effort::XHigh),
+            exclude: Some(true),
+        });
+
+        let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture);
+
+        let expected = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: Some(Effort::XHigh),
+            exclude: Some(true),
+        });
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_opus_4_7_strips_sampling_even_without_reasoning() {
+        let fixture = fixture_context_with_sampling();
+
+        let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture);
+
+        let expected = Context::default();
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_adaptive_friendly_replaces_xhigh_with_max() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: Some(Effort::XHigh),
+            exclude: None,
+        });
+
+        let actual = ModelSpecificReasoning::new("claude-opus-4-6").transform(fixture);
+
+        let expected = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: Some(Effort::Max),
+            exclude: None,
+        });
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_legacy_with_effort_backfills_budget_and_clamps_effort() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: Some(Effort::Max),
+            exclude: None,
+        });
+
+        let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture);
+
+        let expected = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(DEFAULT_LEGACY_BUDGET_TOKENS),
+            effort: Some(Effort::High),
+            exclude: None,
+        });
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_legacy_no_effort_backfills_budget_and_drops_effort() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: Some(Effort::High),
+            exclude: None,
+        });
+
+        let actual = ModelSpecificReasoning::new("claude-3-7-sonnet-20250219").transform(fixture);
+
+        let expected = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(DEFAULT_LEGACY_BUDGET_TOKENS),
+            effort: None,
+            exclude: None,
+        });
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_opus_4_7_bedrock_prefix_still_matches() {
+        // Bedrock region prefixes (`us.anthropic.claude-...`) must still be
+        // classified as AdaptiveOnly so sampling params are stripped and
+        // `max_tokens` is dropped.
+        let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(8000),
+            effort: Some(Effort::XHigh),
+            exclude: None,
+        });
+
+        let actual =
+            ModelSpecificReasoning::new("us.anthropic.claude-opus-4-7").transform(fixture);
+
+        let expected = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: Some(Effort::XHigh),
+            exclude: None,
+        });
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_opus_4_7_preserves_effort_when_dropping_max_tokens() {
+        // When both knobs are set on 4.7, only `max_tokens` should be dropped;
+        // `effort` is the remaining depth knob and must survive.
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(8000),
+            effort: Some(Effort::XHigh),
+            exclude: None,
+        });
+
+        let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture);
+
+        let expected = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: None,
+            effort: Some(Effort::XHigh),
+            exclude: None,
+        });
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_opus_4_5_clamps_max_to_high() {
+        let fixture = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(8000),
+            effort: Some(Effort::Max),
+            exclude: None,
+        });
+
+        let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture);
+
+        let expected = Context::default().reasoning(ReasoningConfig {
+            enabled: Some(true),
+            max_tokens: Some(8000),
+            effort: Some(Effort::High),
+            exclude: None,
+        });
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_opus_4_5_preserves_supported_effort_levels() {
+        for level in [Effort::Low, Effort::Medium, Effort::High] {
+            let fixture = Context::default().reasoning(ReasoningConfig {
+                enabled: Some(true),
+                max_tokens: Some(8000),
+                effort: Some(level.clone()),
+                exclude: None,
+            });
+
+            let actual =
+                ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture);
+
+            let expected = Context::default().reasoning(ReasoningConfig {
+                enabled: Some(true),
+                max_tokens: Some(8000),
+                effort: Some(level.clone()),
+                exclude: None,
+            });
+
+            assert_eq!(actual, expected, "effort level {:?}", level);
+        }
+    }
+
+    #[test]
+    fn test_legacy_no_effort_drops_effort_for_all_pre_4_5_ids() {
+        // All pre-Opus-4.5 Claude ids (plus the newer non-effort family members
+        // Sonnet 4.5 and Haiku 4.5) should land in LegacyNoEffort and have their
+        // effort stripped.
+        for model in [
+            "claude-sonnet-4-5-20250929",
+            "claude-haiku-4-5-20251001",
+            "claude-opus-4-1-20250805",
+            "claude-opus-4-20250514",
+            "claude-3-7-sonnet-20250219",
+        ] {
+            let fixture = Context::default().reasoning(ReasoningConfig {
+                enabled: Some(true),
+                max_tokens: Some(8000),
+                effort: Some(Effort::High),
+                exclude: None,
+            });
+
+            let actual = ModelSpecificReasoning::new(model).transform(fixture);
+
+            let expected = Context::default().reasoning(ReasoningConfig {
+                enabled: Some(true),
+                max_tokens: Some(8000),
+                effort: None,
+                exclude: None,
+            });
+
+            assert_eq!(actual, expected, "model {}", model);
+        }
+    }
+
+    #[test]
+    fn test_no_reasoning_is_preserved_everywhere() {
+        // A context without `reasoning` must pass through unchanged for every
+        // family except AdaptiveOnly, which still strips sampling params.
+        for model in [
+            "claude-opus-4-6",
+            "claude-sonnet-4-6",
+            "claude-opus-4-5-20251101",
+            "claude-3-7-sonnet-20250219",
+        ] {
+            let fixture = Context::default();
+            let actual = ModelSpecificReasoning::new(model).transform(fixture);
+            let expected = Context::default();
+            assert_eq!(actual, expected, "model {}", model);
+        }
+    }
+
+    #[test]
+    fn test_adaptive_friendly_preserves_non_xhigh_effort() {
+        for level in [Effort::Low, Effort::Medium, Effort::High, Effort::Max] {
+            let fixture = Context::default().reasoning(ReasoningConfig {
+                enabled: Some(true),
+                max_tokens: None,
+                effort: Some(level.clone()),
+                exclude: None,
+            });
+
+            let actual = ModelSpecificReasoning::new("claude-opus-4-6").transform(fixture);
+
+            let expected = Context::default().reasoning(ReasoningConfig {
+                enabled: Some(true),
+                max_tokens: None,
+                effort: Some(level.clone()),
+                exclude: None,
+            });
+
+            assert_eq!(actual, expected, "effort level {:?}", level);
+        }
+    }
+}
diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs
index 8429aa11e0..236dc45b7e 100644
--- a/crates/forge_repo/src/provider/anthropic.rs
+++ b/crates/forge_repo/src/provider/anthropic.rs
@@ -7,8 +7,8 @@ use forge_app::domain::{
 };
 use forge_app::dto::anthropic::{
     AuthSystemMessage, CapitalizeToolNames, DropInvalidToolUse, EnforceStrictObjectSchema,
-    EventData, ListModelResponse, ModelSpecificThinking, ReasoningTransform, RemoveOutputFormat,
-    Request, SanitizeToolIds, SetCache, ThinkingDisplay,
+    EventData, ListModelResponse, ReasoningTransform, RemoveOutputFormat, Request, SanitizeToolIds,
+    SetCache,
 };
 use forge_app::{EnvironmentInfra, HttpInfra};
 use forge_domain::{ChatRepository, Provider, ProviderId};
@@ -34,7 +34,7 @@ impl<H: HttpInfra> Anthropic<H> {
         Self { http, provider, anthropic_version: version, use_oauth }
     }
 
-    fn get_headers(&self) -> Vec<(String, String)> {
+    fn get_headers(&self, model: Option<&ModelId>) -> Vec<(String, String)> {
         let mut headers = vec![(
             "anthropic-version".to_string(),
             self.anthropic_version.clone(),
@@ -66,25 +66,36 @@ impl<H: HttpInfra> Anthropic<H> {
 
         // Add beta flags (not needed for Vertex AI)
         if self.provider.id != ProviderId::VERTEX_AI_ANTHROPIC {
+            let mut betas: Vec<&'static str> = Vec::new();
             if self.use_oauth {
-                // OAuth requires multiple beta flags including structured outputs
-                headers.push((
-                    "anthropic-beta".to_string(),
-                    "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,structured-outputs-2025-11-13".to_string(),
-                ));
-            } else {
-                // API key auth also needs beta flags for structured outputs and thinking
-                headers.push((
-                    "anthropic-beta".to_string(),
-                    "interleaved-thinking-2025-05-14,structured-outputs-2025-11-13".to_string(),
-                ));
+                betas.push("claude-code-20250219");
+                betas.push("oauth-2025-04-20");
+            }
+            // Adaptive thinking auto-enables interleaved thinking on Opus 4.7,
+            // Opus 4.6, and Sonnet 4.6 — the beta header is redundant there per
+            // the Opus 4.7 migration guide. Keep it for older models so manual
+            // `extended-thinking` requests still get interleaved turns.
+            if interleaved_thinking_required(model) {
+                betas.push("interleaved-thinking-2025-05-14");
             }
+            betas.push("structured-outputs-2025-11-13");
+            headers.push(("anthropic-beta".to_string(), betas.join(",")));
         }
 
         headers
     }
 }
 
+/// Returns false when the model auto-enables interleaved thinking through
+/// adaptive thinking (Opus 4.7, Opus 4.6, Sonnet 4.6). When the model is
+/// unknown (e.g., listing endpoints), the flag is included because it is
+/// harmless on non-chat endpoints and necessary on older chat models.
+fn interleaved_thinking_required(model: Option<&ModelId>) -> bool {
+    let Some(model) = model else { return true };
+    let id = model.as_str().to_lowercase();
+    !(id.contains("opus-4-7") || id.contains("opus-4-6") || id.contains("sonnet-4-6"))
+}
+
 impl<T: HttpInfra> Anthropic<T> {
     /// Determines whether this provider should bypass reqwest-eventsource
     /// content-type validation and parse SSE from raw bytes instead.
@@ -101,20 +112,6 @@ impl<T: HttpInfra> Anthropic<T> {
         // transform the context to match the request format
         let context = ReasoningTransform.transform(context);
 
-        // Read out of `context` before `Request::try_from` consumes it.
-        let display_preference =
-            context
-                .reasoning
-                .as_ref()
-                .and_then(|r| r.exclude)
-                .map(|exclude| {
-                    if exclude {
-                        ThinkingDisplay::Omitted
-                    } else {
-                        ThinkingDisplay::Summarized
-                    }
-                });
-
         let mut request = Request::try_from(context)?.max_tokens(max_tokens as u64);
 
         // For Vertex AI Anthropic, model is in the URL path, not the request body
@@ -124,16 +121,11 @@ impl<T: HttpInfra> Anthropic<T> {
             request = request.model(model.as_str().to_string());
         }
 
-        let mut thinking_transform = ModelSpecificThinking::new(model.as_str());
-        if let Some(display) = display_preference {
-            thinking_transform = thinking_transform.display(display);
-        }
         let pipeline = AuthSystemMessage::default()
             .when(|_| self.use_oauth)
             .pipe(CapitalizeToolNames)
             .pipe(DropInvalidToolUse)
-            .pipe(SanitizeToolIds)
-            .pipe(thinking_transform);
+            .pipe(SanitizeToolIds);
 
         // Vertex AI does not support output_format, so we skip schema enforcement
         // and remove any output_format field
@@ -164,7 +156,7 @@ impl<T: HttpInfra> Anthropic<T> {
             serde_json::to_vec(&request).with_context(|| "Failed to serialize request")?;
 
         let parsed_url = Url::parse(&url).with_context(|| format!("Invalid URL: {}", url))?;
-        let headers = create_headers(self.get_headers());
+        let headers = create_headers(self.get_headers(Some(model)));
 
         if self.should_use_raw_sse() {
             return self.chat_raw_sse(&parsed_url, headers, json_bytes).await;
@@ -255,7 +247,7 @@ impl<T: HttpInfra> Anthropic<T> {
 
                 let response = self
                     .http
-                    .http_get(url, Some(create_headers(self.get_headers())))
+                    .http_get(url, Some(create_headers(self.get_headers(None))))
                     .await
                     .with_context(|| format_http_context(None, "GET", url))
                     .with_context(|| "Failed to fetch models")?;
@@ -682,7 +674,7 @@ mod tests {
             false, // API key auth (not OAuth)
         );
 
-        let actual = fixture.get_headers();
+        let actual = fixture.get_headers(None);
 
         // Should contain anthropic-version header
         assert!(
@@ -710,9 +702,12 @@ mod tests {
             beta_value.contains("structured-outputs-2025-11-13"),
             "Beta header should include structured-outputs flag"
         );
+        // When the model is unknown (e.g., model listing), keep the
+        // interleaved-thinking header since it is harmless on non-chat
+        // endpoints and still required for older chat models.
         assert!(
             beta_value.contains("interleaved-thinking-2025-05-14"),
-            "Beta header should include interleaved-thinking flag"
+            "Beta header should include interleaved-thinking flag when model is unknown"
         );
     }
 
@@ -761,7 +756,7 @@ mod tests {
             true, // OAuth auth
         );
 
-        let actual = fixture.get_headers();
+        let actual = fixture.get_headers(None);
 
         // Should contain anthropic-version header
         assert!(
@@ -795,6 +790,115 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_get_headers_drops_interleaved_thinking_for_4_6_plus_models() {
+        // Adaptive thinking auto-enables interleaved thinking on Opus 4.7,
+        // Opus 4.6, and Sonnet 4.6; the beta header is redundant there.
+        let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap();
+        let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap();
+
+        let provider = Provider {
+            id: forge_app::domain::ProviderId::ANTHROPIC,
+            provider_type: forge_domain::ProviderType::Llm,
+            response: Some(forge_app::domain::ProviderResponse::Anthropic),
+            url: chat_url,
+            credential: Some(forge_domain::AuthCredential {
+                id: forge_app::domain::ProviderId::ANTHROPIC,
+                auth_details: forge_domain::AuthDetails::ApiKey(forge_domain::ApiKey::from(
+                    "sk-test-key".to_string(),
+                )),
+                url_params: std::collections::HashMap::new(),
+            }),
+            auth_methods: vec![forge_domain::AuthMethod::ApiKey],
+            url_params: vec![],
+            models: Some(forge_domain::ModelSource::Url(model_url)),
+            custom_headers: None,
+        };
+
+        let fixture = Anthropic::new(
+            Arc::new(MockHttpClient::new()),
+            provider,
+            "2023-06-01".to_string(),
+            false,
+        );
+
+        for model_id in [
+            "claude-opus-4-7",
+            "claude-opus-4-6",
+            "claude-sonnet-4-6",
+            "us.anthropic.claude-opus-4-7",
+            "global.anthropic.claude-sonnet-4-6",
+        ] {
+            let model = ModelId::new(model_id);
+            let actual = fixture.get_headers(Some(&model));
+            let (_, beta_value) = actual
+                .iter()
+                .find(|(k, _)| k == "anthropic-beta")
+                .expect("anthropic-beta header should be present");
+            assert!(
+                !beta_value.contains("interleaved-thinking-2025-05-14"),
+                "Beta header should NOT include interleaved-thinking flag for {} (auto-enabled by adaptive thinking)",
+                model_id
+            );
+            assert!(
+                beta_value.contains("structured-outputs-2025-11-13"),
+                "structured-outputs flag must still be present for {}",
+                model_id
+            );
+        }
+    }
+
+    #[test]
+    fn test_get_headers_keeps_interleaved_thinking_for_pre_4_6_models() {
+        let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap();
+        let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap();
+
+        let provider = Provider {
+            id: forge_app::domain::ProviderId::ANTHROPIC,
+            provider_type: forge_domain::ProviderType::Llm,
+            response: Some(forge_app::domain::ProviderResponse::Anthropic),
+            url: chat_url,
+            credential: Some(forge_domain::AuthCredential {
+                id: forge_app::domain::ProviderId::ANTHROPIC,
+                auth_details: forge_domain::AuthDetails::ApiKey(forge_domain::ApiKey::from(
+                    "sk-test-key".to_string(),
+                )),
+                url_params: std::collections::HashMap::new(),
+            }),
+            auth_methods: vec![forge_domain::AuthMethod::ApiKey],
+            url_params: vec![],
+            models: Some(forge_domain::ModelSource::Url(model_url)),
+            custom_headers: None,
+        };
+
+        let fixture = Anthropic::new(
+            Arc::new(MockHttpClient::new()),
+            provider,
+            "2023-06-01".to_string(),
+            false,
+        );
+
+        for model_id in [
+            "claude-opus-4-5-20251101",
+            "claude-sonnet-4-5-20250929",
+            "claude-haiku-4-5-20251001",
+            "claude-opus-4-1-20250805",
+            "claude-3-7-sonnet-20250219",
+        ] {
+            let model = ModelId::new(model_id);
+            let actual = fixture.get_headers(Some(&model));
+            let (_, beta_value) = actual
+                .iter()
+                .find(|(k, _)| k == "anthropic-beta")
+                .expect("anthropic-beta header should be present");
+            assert!(
+                beta_value.contains("interleaved-thinking-2025-05-14"),
+                "Beta header should include interleaved-thinking flag for pre-4.6 model {}",
+                model_id
+            );
+        }
+    }
+
     #[test]
     fn test_vertex_ai_removes_output_format() {
         use forge_domain::ResponseFormat;
diff --git a/crates/forge_repo/src/provider/bedrock.rs b/crates/forge_repo/src/provider/bedrock.rs
index c5e9653167..0bbe13dfe1 100644
--- a/crates/forge_repo/src/provider/bedrock.rs
+++ b/crates/forge_repo/src/provider/bedrock.rs
@@ -439,6 +439,20 @@ impl FromDomain<forge_domain::Context>
         use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput;
         use aws_sdk_bedrockruntime::types::{InferenceConfiguration, Message, SystemContentBlock};
 
+        // Capture reasoning-related flags before `context.messages` / other fields
+        // are consumed below. `ModelSpecificReasoning` runs earlier in the pipeline
+        // and has already normalized `reasoning` per model family, so here we just
+        // branch on the shape it produced:
+        // - `max_tokens.is_some()` -> legacy `thinking.enabled` budget shape
+        // - otherwise              -> `thinking.adaptive` (Opus 4.7 / 4.6 / Sonnet 4.6)
+        let reasoning_on = context.is_reasoning_supported();
+        let emits_legacy_thinking = reasoning_on
+            && context
+                .reasoning
+                .as_ref()
+                .and_then(|r| r.max_tokens)
+                .is_some();
+
         // Convert system messages
         let system: Vec<SystemContentBlock> = context
             .messages
@@ -522,18 +536,17 @@ impl FromDomain<forge_domain::Context>
         };
 
         // Convert inference configuration
-        // When extended thinking is enabled, top_p must be >= 0.95 or unset
-        let has_thinking = context
-            .reasoning
-            .as_ref()
-            .and_then(|r| r.enabled)
-            .unwrap_or(false);
-        let adjusted_top_p = if has_thinking {
-            // If thinking is enabled and top_p is set, ensure it's at least 0.95
+        // When `thinking.enabled` (legacy budget shape) is being emitted below,
+        // Anthropic-on-Bedrock requires `top_p >= 0.95` or unset. `thinking.adaptive`
+        // (Opus 4.7 / Opus 4.6 / Sonnet 4.6) has no such constraint, and
+        // `ModelSpecificReasoning` already strips `top_p` entirely for Opus 4.7.
+        let adjusted_top_p = if emits_legacy_thinking {
+            // If legacy thinking is emitted and top_p is set, ensure it's at least 0.95
             context.top_p.map(|p| {
                 let value = p.value();
                 if value < 0.95 {
-                    forge_domain::TopP::new(0.95).unwrap()
+                    // SAFETY: 0.95 is a valid TopP value (between 0.0 and 1.0)
+                    forge_domain::TopP::new(0.95).expect("0.95 is valid TopP")
                 } else {
                     p
                 }
@@ -558,29 +571,59 @@ impl FromDomain<forge_domain::Context>
             None
         };
 
-        // Convert reasoning configuration to additional model request fields
-        // For Claude models with extended thinking support
-        // Based on AWS Bedrock docs: additionalModelRequestFields for Claude extended
-        // thinking https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+        // Convert reasoning configuration to `additional_model_request_fields`
+        // for Anthropic-on-Bedrock. Two thinking shapes are emitted based on
+        // `reasoning.max_tokens`, which `ModelSpecificReasoning` has already
+        // normalized per family:
+        //
+        //   - `max_tokens: Some(N)` → `{type: "enabled", budget_tokens: N}`
+        //     (Opus 4.5 and older; budget is backfilled to 10k when absent.)
+        //   - `max_tokens: None`    → `{type: "adaptive", display: ...}`
+        //     (Opus 4.7 rejects the legacy shape with 400; Opus 4.6 /
+        //     Sonnet 4.6 accept adaptive natively.)
+        //
+        // When present, `reasoning.effort` is emitted as `output_config.effort`
+        // for families that support it (`ModelSpecificReasoning` drops effort
+        // on LegacyNoEffort, so the Option is already correctly shaped here).
+        //
+        // AWS Bedrock passes `additional_model_request_fields` through verbatim
+        // to Anthropic for Claude models. See
+        // https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
         let additional_model_fields = if let Some(reasoning_config) = &context.reasoning {
-            if reasoning_config.enabled.unwrap_or(false) {
+            if !reasoning_on {
+                None
+            } else {
                 let mut thinking_config = std::collections::HashMap::new();
-                thinking_config.insert(
-                    "type".to_string(),
-                    aws_smithy_types::Document::String("enabled".to_string()),
-                );
-
-                // Set budget_tokens (REQUIRED when thinking is enabled)
-                // The budget_tokens parameter determines the maximum number of tokens
-                // Claude is allowed to use for its internal reasoning process
-                // Default to 4000 if not specified (AWS recommendation for good quality)
-                let budget_tokens = reasoning_config.max_tokens.unwrap_or(4000);
-                thinking_config.insert(
-                    "budget_tokens".to_string(),
-                    aws_smithy_types::Document::Number(aws_smithy_types::Number::PosInt(
-                        budget_tokens as u64,
-                    )),
-                );
+                if let Some(budget) = reasoning_config.max_tokens {
+                    thinking_config.insert(
+                        "type".to_string(),
+                        aws_smithy_types::Document::String("enabled".to_string()),
+                    );
+                    thinking_config.insert(
+                        "budget_tokens".to_string(),
+                        aws_smithy_types::Document::Number(aws_smithy_types::Number::PosInt(
+                            budget as u64,
+                        )),
+                    );
+                } else {
+                    thinking_config.insert(
+                        "type".to_string(),
+                        aws_smithy_types::Document::String("adaptive".to_string()),
+                    );
+                    // Opus 4.7 changed the default to `omitted`; preserve the
+                    // caller's `exclude` preference so `exclude: true` stays
+                    // `omitted` and every other case surfaces `summarized`
+                    // (matching the legacy pre-4.7 visible-thinking behavior).
+                    let display = if reasoning_config.exclude == Some(true) {
+                        "omitted"
+                    } else {
+                        "summarized"
+                    };
+                    thinking_config.insert(
+                        "display".to_string(),
+                        aws_smithy_types::Document::String(display.to_string()),
+                    );
+                }
 
                 let mut fields = std::collections::HashMap::new();
                 fields.insert(
@@ -588,9 +631,29 @@ impl FromDomain<forge_domain::Context>
                     aws_smithy_types::Document::Object(thinking_config),
                 );
 
+                if let Some(effort) = reasoning_config.effort.as_ref() {
+                    let effort_str = match effort {
+                        forge_domain::Effort::None => None,
+                        forge_domain::Effort::Minimal | forge_domain::Effort::Low => Some("low"),
+                        forge_domain::Effort::Medium => Some("medium"),
+                        forge_domain::Effort::High => Some("high"),
+                        forge_domain::Effort::XHigh => Some("xhigh"),
+                        forge_domain::Effort::Max => Some("max"),
+                    };
+                    if let Some(effort_str) = effort_str {
+                        let mut output_config = std::collections::HashMap::new();
+                        output_config.insert(
+                            "effort".to_string(),
+                            aws_smithy_types::Document::String(effort_str.to_string()),
+                        );
+                        fields.insert(
+                            "output_config".to_string(),
+                            aws_smithy_types::Document::Object(output_config),
+                        );
+                    }
+                }
+
                 Some(aws_smithy_types::Document::Object(fields))
-            } else {
-                None
             }
         } else {
             None
@@ -1788,6 +1851,183 @@ mod tests {
         assert!(actual.additional_model_request_fields().is_some());
     }
 
+    /// Opus 4.7 / Opus 4.6 / Sonnet 4.6 path: `ModelSpecificReasoning` strips
+    /// `max_tokens`, so Bedrock emits `thinking.adaptive` with the legacy
+    /// `display: summarized` default (visible thinking).
+    #[test]
+    fn test_from_domain_context_emits_adaptive_thinking_when_max_tokens_absent() {
+        use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput;
+        use forge_domain::{Context, ReasoningConfig};
+
+        let fixture = Context {
+            conversation_id: None,
+            initiator: None,
+            messages: vec![],
+            tools: vec![],
+            tool_choice: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            max_tokens: None,
+            reasoning: Some(ReasoningConfig {
+                effort: None,
+                max_tokens: None, // normalized away by ModelSpecificReasoning for 4.7/4.6
+                exclude: None,
+                enabled: Some(true),
+            }),
+            stream: None,
+            response_format: None,
+        };
+
+        let actual = ConverseStreamInput::from_domain(fixture).unwrap();
+        let fields = actual
+            .additional_model_request_fields()
+            .expect("adaptive thinking should emit additional_model_request_fields");
+
+        let thinking = match fields {
+            aws_smithy_types::Document::Object(m) => m.get("thinking").expect("thinking present"),
+            _ => panic!("expected object"),
+        };
+        let thinking_map = match thinking {
+            aws_smithy_types::Document::Object(m) => m,
+            _ => panic!("expected thinking object"),
+        };
+        assert_eq!(
+            thinking_map.get("type"),
+            Some(&aws_smithy_types::Document::String("adaptive".to_string()))
+        );
+        assert_eq!(
+            thinking_map.get("display"),
+            Some(&aws_smithy_types::Document::String(
+                "summarized".to_string()
+            ))
+        );
+        assert!(
+            thinking_map.get("budget_tokens").is_none(),
+            "adaptive must not carry budget_tokens"
+        );
+    }
+
+    /// `exclude: true` preference maps to `display: omitted` on the adaptive shape.
+    #[test]
+    fn test_from_domain_context_adaptive_thinking_respects_exclude() {
+        use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput;
+        use forge_domain::{Context, ReasoningConfig};
+
+        let fixture = Context {
+            conversation_id: None,
+            initiator: None,
+            messages: vec![],
+            tools: vec![],
+            tool_choice: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            max_tokens: None,
+            reasoning: Some(ReasoningConfig {
+                effort: None,
+                max_tokens: None,
+                exclude: Some(true),
+                enabled: Some(true),
+            }),
+            stream: None,
+            response_format: None,
+        };
+
+        let actual = ConverseStreamInput::from_domain(fixture).unwrap();
+        let fields = actual.additional_model_request_fields().unwrap();
+        let thinking = match fields {
+            aws_smithy_types::Document::Object(m) => m.get("thinking").unwrap(),
+            _ => panic!("expected object"),
+        };
+        let thinking_map = match thinking {
+            aws_smithy_types::Document::Object(m) => m,
+            _ => panic!("expected thinking object"),
+        };
+        assert_eq!(
+            thinking_map.get("display"),
+            Some(&aws_smithy_types::Document::String("omitted".to_string()))
+        );
+    }
+
+    /// Adaptive thinking must NOT trigger the legacy `top_p >= 0.95` clamp —
+    /// that constraint only applies to `thinking.enabled` (budget shape).
+    #[test]
+    fn test_from_domain_context_adaptive_thinking_does_not_clamp_top_p() {
+        use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput;
+        use forge_domain::{Context, ReasoningConfig, TopP};
+
+        let fixture = Context {
+            conversation_id: None,
+            initiator: None,
+            messages: vec![],
+            tools: vec![],
+            tool_choice: None,
+            temperature: None,
+            top_p: Some(TopP::new(0.5).unwrap()),
+            top_k: None,
+            max_tokens: None,
+            reasoning: Some(ReasoningConfig {
+                effort: None,
+                max_tokens: None,
+                exclude: None,
+                enabled: Some(true),
+            }),
+            stream: None,
+            response_format: None,
+        };
+
+        let actual = ConverseStreamInput::from_domain(fixture).unwrap();
+        let top_p = actual.inference_config().unwrap().top_p().unwrap();
+        assert!(
+            (top_p - 0.5).abs() < f32::EPSILON,
+            "adaptive thinking must leave top_p untouched, got {top_p}"
+        );
+    }
+
+    /// When `reasoning.effort` survives normalization (i.e. 4.5+/4.6+/4.7
+    /// families), it must be emitted as `output_config.effort`.
+    #[test]
+    fn test_from_domain_context_emits_output_config_effort() {
+        use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput;
+        use forge_domain::{Context, Effort, ReasoningConfig};
+
+        let fixture = Context {
+            conversation_id: None,
+            initiator: None,
+            messages: vec![],
+            tools: vec![],
+            tool_choice: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            max_tokens: None,
+            reasoning: Some(ReasoningConfig {
+                effort: Some(Effort::High),
+                max_tokens: None,
+                exclude: None,
+                enabled: Some(true),
+            }),
+            stream: None,
+            response_format: None,
+        };
+
+        let actual = ConverseStreamInput::from_domain(fixture).unwrap();
+        let fields = actual.additional_model_request_fields().unwrap();
+        let output_config = match fields {
+            aws_smithy_types::Document::Object(m) => m.get("output_config").unwrap(),
+            _ => panic!("expected object"),
+        };
+        let output_map = match output_config {
+            aws_smithy_types::Document::Object(m) => m,
+            _ => panic!("expected output_config object"),
+        };
+        assert_eq!(
+            output_map.get("effort"),
+            Some(&aws_smithy_types::Document::String("high".to_string()))
+        );
+    }
+
     #[test]
     fn test_json_value_to_document_empty_object() {
         let fixture = serde_json::json!({});

From ecceb066a531c09952c031f450528d1c7770bce4 Mon Sep 17 00:00:00 2001
From: Amit Singh <amitksingh1490@gmail.com>
Date: Sat, 18 Apr 2026 21:45:07 +0530
Subject: [PATCH 4/5] chore: apply cargo fmt fixes

Co-Authored-By: ForgeCode <noreply@forgecode.dev>
---
 .../forge_app/src/transformers/model_specific_reasoning.rs  | 6 ++----
 crates/forge_main/src/info.rs                               | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/crates/forge_app/src/transformers/model_specific_reasoning.rs b/crates/forge_app/src/transformers/model_specific_reasoning.rs
index d3d1788b54..7c39c2228c 100644
--- a/crates/forge_app/src/transformers/model_specific_reasoning.rs
+++ b/crates/forge_app/src/transformers/model_specific_reasoning.rs
@@ -237,8 +237,7 @@ mod tests {
             exclude: None,
         });
 
-        let actual =
-            ModelSpecificReasoning::new("us.anthropic.claude-opus-4-7").transform(fixture);
+        let actual = ModelSpecificReasoning::new("us.anthropic.claude-opus-4-7").transform(fixture);
 
         let expected = Context::default().reasoning(ReasoningConfig {
             enabled: Some(true),
@@ -304,8 +303,7 @@ mod tests {
                 exclude: None,
             });
 
-            let actual =
-                ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture);
+            let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture);
 
             let expected = Context::default().reasoning(ReasoningConfig {
                 enabled: Some(true),
diff --git a/crates/forge_main/src/info.rs b/crates/forge_main/src/info.rs
index b0815a8799..074e8e9711 100644
--- a/crates/forge_main/src/info.rs
+++ b/crates/forge_main/src/info.rs
@@ -75,7 +75,7 @@ impl Section {
 /// # Output Format
 ///
 /// ```text
-/// 
+///
 /// CONFIGURATION
 ///   model gpt-4
 /// provider openai

From 47c363635274c79b6f7f1d731bdcaf25f5103009 Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 16:17:56 +0000
Subject: [PATCH 5/5] [autofix.ci] apply automated fixes

---
 crates/forge_main/src/info.rs             |  2 +-
 crates/forge_repo/src/provider/bedrock.rs | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/crates/forge_main/src/info.rs b/crates/forge_main/src/info.rs
index 074e8e9711..b0815a8799 100644
--- a/crates/forge_main/src/info.rs
+++ b/crates/forge_main/src/info.rs
@@ -75,7 +75,7 @@ impl Section {
 /// # Output Format
 ///
 /// ```text
-///
+/// 
 /// CONFIGURATION
 ///   model gpt-4
 /// provider openai
diff --git a/crates/forge_repo/src/provider/bedrock.rs b/crates/forge_repo/src/provider/bedrock.rs
index 0bbe13dfe1..1901044f77 100644
--- a/crates/forge_repo/src/provider/bedrock.rs
+++ b/crates/forge_repo/src/provider/bedrock.rs
@@ -576,11 +576,11 @@ impl FromDomain<forge_domain::Context>
         // `reasoning.max_tokens`, which `ModelSpecificReasoning` has already
         // normalized per family:
         //
-        //   - `max_tokens: Some(N)` → `{type: "enabled", budget_tokens: N}`
-        //     (Opus 4.5 and older; budget is backfilled to 10k when absent.)
-        //   - `max_tokens: None`    → `{type: "adaptive", display: ...}`
-        //     (Opus 4.7 rejects the legacy shape with 400; Opus 4.6 /
-        //     Sonnet 4.6 accept adaptive natively.)
+        //   - `max_tokens: Some(N)` → `{type: "enabled", budget_tokens: N}` (Opus 4.5
+        //     and older; budget is backfilled to 10k when absent.)
+        //   - `max_tokens: None`    → `{type: "adaptive", display: ...}` (Opus 4.7
+        //     rejects the legacy shape with 400; Opus 4.6 / Sonnet 4.6 accept adaptive
+        //     natively.)
         //
         // When present, `reasoning.effort` is emitted as `output_config.effort`
         // for families that support it (`ModelSpecificReasoning` drops effort
@@ -1908,7 +1908,8 @@ mod tests {
         );
     }
 
-    /// `exclude: true` preference maps to `display: omitted` on the adaptive shape.
+    /// `exclude: true` preference maps to `display: omitted` on the adaptive
+    /// shape.
     #[test]
     fn test_from_domain_context_adaptive_thinking_respects_exclude() {
         use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput;