From 2d4a7518351c472fbe4fa65cc0b7584c604b5bc3 Mon Sep 17 00:00:00 2001 From: Alexander Vostres Date: Sat, 18 Apr 2026 14:59:08 +0300 Subject: [PATCH 1/5] Implement per-model reasoning fixups for Anthropic models --- crates/forge_app/src/dto/anthropic/request.rs | 342 ++++++++++----- .../src/dto/anthropic/transforms/mod.rs | 2 + .../transforms/model_specific_thinking.rs | 388 ++++++++++++++++++ .../transforms/reasoning_transform.rs | 55 ++- crates/forge_domain/src/context.rs | 37 ++ crates/forge_repo/src/provider/anthropic.rs | 24 +- 6 files changed, 747 insertions(+), 101 deletions(-) create mode 100644 crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs diff --git a/crates/forge_app/src/dto/anthropic/request.rs b/crates/forge_app/src/dto/anthropic/request.rs index 7527e5f3da..32a91f35d8 100644 --- a/crates/forge_app/src/dto/anthropic/request.rs +++ b/crates/forge_app/src/dto/anthropic/request.rs @@ -60,27 +60,40 @@ impl SystemMessage { } } -#[derive(Serialize, Default, Debug, PartialEq, Eq)] -pub struct Thinking { - pub r#type: ThinkingType, - pub budget_tokens: u64, +/// Anthropic's `thinking` request field. Opus 4.7 rejects the `Enabled` shape +/// with a 400 — the `ModelSpecificThinking` transformer handles the migration. +#[derive(Serialize, Debug, PartialEq, Eq)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum Thinking { + Enabled { + budget_tokens: u64, + }, + Adaptive { + #[serde(skip_serializing_if = "Option::is_none")] + display: Option, + }, + Disabled, +} + +/// On Opus 4.7 adaptive thinking content is omitted from responses unless +/// `Summarized` is requested explicitly. +#[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum ThinkingDisplay { + Summarized, + Omitted, } -/// Effort level for Anthropic's `output_config` API. -/// -/// Only the variants officially supported by Anthropic's `output_config.effort` -/// field. Mutually exclusive with the `thinking` object. #[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum OutputEffort { Low, Medium, High, + XHigh, Max, } -/// Output configuration for newer Anthropic models that support effort-based -/// reasoning (e.g. `claude-opus-4-6`). Mutually exclusive with `thinking`. #[derive(Serialize, Debug, PartialEq, Eq)] pub struct OutputConfig { pub effort: OutputEffort, @@ -93,14 +106,6 @@ pub enum OutputFormat { JsonSchema { schema: schemars::Schema }, } -#[derive(Serialize, Default, Debug, Clone, Copy, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] -pub enum ThinkingType { - #[default] - Enabled, - Disabled, -} - impl TryFrom for Request { type Error = anyhow::Error; fn try_from(request: forge_domain::Context) -> std::result::Result { @@ -119,55 +124,41 @@ impl TryFrom for Request { }) .collect::>(); - // Route reasoning config to the correct Anthropic serialization. - // All paths require enabled == Some(true); without it nothing is emitted. - // - // • enabled + max_tokens → thinking object (older models, e.g. - // claude-3-7-sonnet). An explicit reasoning budget unambiguously - // selects the extended-thinking API. effort (which may arrive from - // embedded defaults) is ignored in this branch. - // - // • enabled + effort, no max_tokens → output_config.effort (newer models, e.g. - // claude-opus-4-6). No token budget means the caller chose the effort-based - // API. - // - // • enabled only (no effort, no max_tokens) → thinking with a default budget. - let (thinking, output_config) = if let Some(reasoning) = request.reasoning { - if reasoning.enabled == Some(true) { - if let Some(budget) = reasoning.max_tokens { - // Explicit budget → thinking object regardless of effort. - ( - Some(Thinking { - r#type: ThinkingType::Enabled, - budget_tokens: budget as u64, - }), - None, - ) - } else if let Some(effort) = reasoning.effort { - // Effort without budget → newer output_config API. - let output_effort = match effort { - forge_domain::Effort::Low => OutputEffort::Low, - forge_domain::Effort::High => OutputEffort::High, - forge_domain::Effort::Max => OutputEffort::Max, - // Map unsupported variants to the nearest Anthropic-valid effort. - forge_domain::Effort::None | forge_domain::Effort::Minimal => { - OutputEffort::Low - } - forge_domain::Effort::Medium => OutputEffort::Medium, - forge_domain::Effort::XHigh => OutputEffort::Max, - }; - (None, Some(OutputConfig { effort: output_effort })) - } else { - // Enabled-only → thinking with default budget. - ( - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 10000 }), - None, - ) - } + // Gate on the domain rule so inherited configs with `enabled: None` but + // a positive effort / `max_tokens` still emit reasoning on the wire. + let reasoning_on = request.is_reasoning_supported(); + let (thinking, output_config) = if reasoning_on && let Some(reasoning) = request.reasoning + { + // Adaptive thinking on 4.7 hides reasoning content by default; opting + // into reasoning should surface it unless the caller set `exclude`. + let adaptive_display = if reasoning.exclude == Some(true) { + Some(ThinkingDisplay::Omitted) } else { - // enabled=false or enabled=None → no reasoning emitted. - (None, None) - } + Some(ThinkingDisplay::Summarized) + }; + + let thinking = if let Some(budget) = reasoning.max_tokens { + Thinking::Enabled { budget_tokens: budget as u64 } + } else { + Thinking::Adaptive { display: adaptive_display } + }; + + // `Effort::None` is an explicit opt-out; `is_reasoning_supported` + // already filters it, but guard here so it can never become a stray + // `output_config.effort`. + let output_config = reasoning.effort.and_then(|effort| { + let output_effort = match effort { + forge_domain::Effort::None => return None, + forge_domain::Effort::Minimal | forge_domain::Effort::Low => OutputEffort::Low, + forge_domain::Effort::Medium => OutputEffort::Medium, + forge_domain::Effort::High => OutputEffort::High, + forge_domain::Effort::XHigh => OutputEffort::XHigh, + forge_domain::Effort::Max => OutputEffort::Max, + }; + Some(OutputConfig { effort: output_effort }) + }); + + (Some(thinking), output_config) } else { (None, None) }; @@ -540,37 +531,49 @@ mod tests { use super::*; #[test] - fn test_thinking_type_serializes_to_enabled() { - let thinking_type = ThinkingType::Enabled; - let actual = serde_json::to_string(&thinking_type).unwrap(); - let expected = r#""enabled""#; + fn test_thinking_enabled_serializes_with_budget() { + let thinking = Thinking::Enabled { budget_tokens: 5000 }; + let actual = serde_json::to_value(&thinking).unwrap(); + let expected = serde_json::json!({ + "type": "enabled", + "budget_tokens": 5000 + }); assert_eq!(actual, expected); } #[test] - fn test_thinking_type_serializes_to_disabled() { - let thinking_type = ThinkingType::Disabled; - let actual = serde_json::to_string(&thinking_type).unwrap(); - let expected = r#""disabled""#; + fn test_thinking_adaptive_serializes_without_display_when_none() { + let thinking = Thinking::Adaptive { display: None }; + let actual = serde_json::to_value(&thinking).unwrap(); + let expected = serde_json::json!({"type": "adaptive"}); assert_eq!(actual, expected); } #[test] - fn test_thinking_struct_serializes_correctly() { - let thinking = Thinking { r#type: ThinkingType::Enabled, budget_tokens: 5000 }; + fn test_thinking_adaptive_serializes_with_summarized_display() { + let thinking = Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }; let actual = serde_json::to_value(&thinking).unwrap(); let expected = serde_json::json!({ - "type": "enabled", - "budget_tokens": 5000 + "type": "adaptive", + "display": "summarized" }); assert_eq!(actual, expected); } #[test] - fn test_reasoning_enabled_with_max_tokens_creates_thinking() { + fn test_thinking_disabled_serializes() { + let thinking = Thinking::Disabled; + let actual = serde_json::to_value(&thinking).unwrap(); + let expected = serde_json::json!({"type": "disabled"}); + + assert_eq!(actual, expected); + } + + #[test] + fn test_reasoning_enabled_with_max_tokens_creates_enabled_thinking() { let fixture = Context::default().reasoning(ReasoningConfig { enabled: Some(true), max_tokens: Some(8000), @@ -580,17 +583,13 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); - assert_eq!( - actual.thinking, - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 8000 }) - ); + assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); assert_eq!(actual.output_config, None); } #[test] - fn test_reasoning_max_tokens_takes_priority_over_effort() { - // When both max_tokens and effort are set, max_tokens triggers the thinking - // path because an explicit budget means the caller wants the older API. + fn test_reasoning_max_tokens_and_effort_emit_both() { + // Effort and budget are independent knobs — neither should hide the other. let fixture = Context::default().reasoning(ReasoningConfig { effort: Some(forge_domain::Effort::Low), enabled: Some(true), @@ -600,16 +599,30 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); + assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); assert_eq!( - actual.thinking, - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 8000 }) + actual.output_config, + Some(OutputConfig { effort: OutputEffort::Low }) ); + } + + #[test] + fn test_reasoning_max_tokens_alone_emits_enabled_only() { + let fixture = Context::default().reasoning(ReasoningConfig { + effort: None, + enabled: Some(true), + max_tokens: Some(8000), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); assert_eq!(actual.output_config, None); } #[test] - fn test_reasoning_effort_without_budget_creates_output_config() { - // Effort with no max_tokens routes to output_config (newer model path). + fn test_reasoning_effort_without_budget_creates_output_config_and_adaptive() { let fixture = Context::default().reasoning(ReasoningConfig { effort: Some(forge_domain::Effort::Low), enabled: Some(true), @@ -623,11 +636,48 @@ mod tests { actual.output_config, Some(OutputConfig { effort: OutputEffort::Low }) ); - assert_eq!(actual.thinking, None); + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_reasoning_effort_with_exclude_emits_adaptive_omitted() { + let fixture = Context::default().reasoning(ReasoningConfig { + effort: Some(forge_domain::Effort::High), + enabled: Some(true), + max_tokens: None, + exclude: Some(true), + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) + ); + } + + #[test] + fn test_reasoning_xhigh_effort_maps_to_xhigh() { + let fixture = Context::default().reasoning(ReasoningConfig { + effort: Some(forge_domain::Effort::XHigh), + enabled: Some(true), + max_tokens: None, + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::XHigh }) + ); } #[test] - fn test_reasoning_enabled_without_max_tokens_uses_default_budget() { + fn test_reasoning_enabled_without_budget_or_effort_defaults_to_adaptive_summarized() { let fixture = Context::default().reasoning(ReasoningConfig { enabled: Some(true), max_tokens: None, @@ -639,7 +689,24 @@ mod tests { assert_eq!( actual.thinking, - Some(Thinking { r#type: ThinkingType::Enabled, budget_tokens: 10000 }) + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_reasoning_enabled_with_exclude_uses_omitted_display() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: None, + exclude: Some(true), + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) ); } @@ -658,7 +725,10 @@ mod tests { } #[test] - fn test_reasoning_enabled_none_does_not_create_thinking() { + fn test_reasoning_enabled_none_with_max_tokens_still_emits_thinking() { + // Matches the domain's `is_reasoning_supported` rule: enabled: None with a + // positive budget counts as on, so inherited/merged configs don't silently + // disable reasoning on the wire. let fixture = Context::default().reasoning(ReasoningConfig { enabled: None, max_tokens: Some(8000), @@ -668,7 +738,91 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); + assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + } + + #[test] + fn test_reasoning_enabled_none_with_effort_still_emits_output_config() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: None, + max_tokens: None, + effort: Some(forge_domain::Effort::High), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::High }) + ); + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_reasoning_enabled_none_with_zero_max_tokens_does_not_emit() { + // Matches `is_reasoning_supported`: max_tokens > 0 is required. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: None, + max_tokens: Some(0), + effort: None, + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); + } + + #[test] + fn test_reasoning_effort_none_does_not_emit_anything() { + // Effort::None is an explicit opt-out — no thinking, no output_config. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: None, + max_tokens: None, + effort: Some(forge_domain::Effort::None), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); + } + + #[test] + fn test_reasoning_effort_none_overrides_enabled_and_max_tokens() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(forge_domain::Effort::None), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); + } + + #[test] + fn test_reasoning_enabled_false_overrides_effort() { + // Explicit opt-out beats inferred enablement. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(false), + max_tokens: None, + effort: Some(forge_domain::Effort::High), + exclude: None, + }); + + let actual = Request::try_from(fixture).unwrap(); + + assert_eq!(actual.thinking, None); + assert_eq!(actual.output_config, None); } #[test] diff --git a/crates/forge_app/src/dto/anthropic/transforms/mod.rs b/crates/forge_app/src/dto/anthropic/transforms/mod.rs index 3ea6d5f183..b9a3b2f765 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/mod.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/mod.rs @@ -2,6 +2,7 @@ mod auth_system_message; mod capitalize_tool_names; mod drop_invalid_toolcalls; mod enforce_schema; +mod model_specific_thinking; mod reasoning_transform; mod remove_output_format; mod sanitize_tool_ids; @@ -11,6 +12,7 @@ pub use auth_system_message::AuthSystemMessage; pub use capitalize_tool_names::CapitalizeToolNames; pub use drop_invalid_toolcalls::DropInvalidToolUse; pub use enforce_schema::EnforceStrictObjectSchema; +pub use model_specific_thinking::ModelSpecificThinking; pub use reasoning_transform::ReasoningTransform; pub use remove_output_format::RemoveOutputFormat; pub use sanitize_tool_ids::SanitizeToolIds; diff --git a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs new file mode 100644 index 0000000000..f20bc95535 --- /dev/null +++ b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs @@ -0,0 +1,388 @@ +use forge_domain::Transformer; +use tracing::warn; + +use crate::dto::anthropic::{OutputConfig, OutputEffort, Request, Thinking, ThinkingDisplay}; + +/// Default budget applied when converting `Adaptive` thinking to the legacy +/// `Enabled` shape for pre-4.6 models that don't accept adaptive. +const DEFAULT_LEGACY_BUDGET_TOKENS: u64 = 10000; + +/// Rewrites the `thinking` and `output_config` fields and strips rejected +/// sampling parameters to match the target model's API contract. Classifies +/// models into four tiers based on the official Anthropic docs: +/// +/// | Tier | Matching ids | Thinking | Effort | xhigh | max | +/// |---------------------|--------------------------------|-------------------|------------|-------|-----| +/// | `AdaptiveOnly` | `opus-4-7` | adaptive only | ✓ | ✓ | ✓ | +/// | `AdaptiveFriendly` | `opus-4-6`, `sonnet-4-6` | both shapes | ✓ | →max | ✓ | +/// | `LegacyWithEffort` | `opus-4-5` | enabled only | ✓ | →high | →high | +/// | `LegacyNoEffort` | everything else | enabled only | drop | drop | drop | +/// +/// Additional behaviour: +/// - `AdaptiveOnly` strips `temperature`/`top_p`/`top_k` (4.7 rejects non- +/// defaults) and warns when a caller-supplied `budget_tokens` is dropped. +/// - The display preference carried from the caller's `ReasoningConfig` +/// (`exclude: true` → `Omitted`) is applied when rewriting `Enabled` to +/// `Adaptive` on 4.7. +pub struct ModelSpecificThinking { + model_id: String, + /// Defaults to `Summarized` because the legacy `Enabled` shape always + /// produced visible reasoning; a 4.7 migration should preserve that unless + /// the caller opts out via `exclude`. + display: ThinkingDisplay, +} + +impl ModelSpecificThinking { + pub fn new(model_id: impl Into) -> Self { + Self { + model_id: model_id.into(), + display: ThinkingDisplay::Summarized, + } + } + + pub fn display(mut self, display: ThinkingDisplay) -> Self { + self.display = display; + self + } + + fn family(&self) -> ModelFamily { + let id = self.model_id.to_lowercase(); + if id.contains("opus-4-7") { + ModelFamily::AdaptiveOnly + } else if id.contains("opus-4-6") || id.contains("sonnet-4-6") { + ModelFamily::AdaptiveFriendly + } else if id.contains("opus-4-5") { + ModelFamily::LegacyWithEffort + } else { + ModelFamily::LegacyNoEffort + } + } +} + +#[derive(Debug, PartialEq, Eq)] +enum ModelFamily { + AdaptiveOnly, + AdaptiveFriendly, + LegacyWithEffort, + LegacyNoEffort, +} + +/// Falls back to `max` — the closest supported level, even though it spends +/// more tokens than `xhigh` would have. +fn replace_xhigh_with_max(output_config: &mut Option) { + if let Some(config) = output_config.as_mut() + && config.effort == OutputEffort::XHigh + { + config.effort = OutputEffort::Max; + } +} + +fn clamp_effort_to_high(output_config: &mut Option) { + if let Some(config) = output_config.as_mut() + && matches!(config.effort, OutputEffort::XHigh | OutputEffort::Max) + { + config.effort = OutputEffort::High; + } +} + +impl Transformer for ModelSpecificThinking { + type Value = Request; + + fn transform(&mut self, mut request: Self::Value) -> Self::Value { + match self.family() { + ModelFamily::AdaptiveOnly => { + if let Some(Thinking::Enabled { budget_tokens }) = request.thinking { + warn!( + model = %self.model_id, + dropped_budget_tokens = budget_tokens, + "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking \ + budgets are unsupported. Use `reasoning.effort` to control \ + thinking depth instead." + ); + request.thinking = Some(Thinking::Adaptive { + display: Some(self.display), + }); + } + request.temperature = None; + request.top_p = None; + request.top_k = None; + } + ModelFamily::LegacyWithEffort => { + if let Some(Thinking::Adaptive { .. }) = request.thinking { + request.thinking = Some(Thinking::Enabled { + budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS, + }); + } + clamp_effort_to_high(&mut request.output_config); + } + ModelFamily::LegacyNoEffort => { + if let Some(Thinking::Adaptive { .. }) = request.thinking { + request.thinking = Some(Thinking::Enabled { + budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS, + }); + } + if request.output_config.is_some() { + warn!( + model = %self.model_id, + "Dropping `output_config.effort`: the effort parameter is only \ + supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7." + ); + request.output_config = None; + } + } + ModelFamily::AdaptiveFriendly => { + replace_xhigh_with_max(&mut request.output_config); + } + } + request + } +} + +#[cfg(test)] +mod tests { + use forge_domain::Transformer; + use pretty_assertions::assert_eq; + + use super::*; + use crate::dto::anthropic::{Request, Thinking}; + + fn fixture_request_with_thinking(thinking: Thinking) -> Request { + Request::default() + .thinking(thinking) + .temperature(0.5f32) + .top_p(0.9f32) + .top_k(40u64) + } + + #[test] + fn test_opus_4_7_rewrites_enabled_to_adaptive_with_summarized_display() { + let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); + let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_opus_4_7_strips_sampling_params() { + let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); + let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); + + assert_eq!(actual.temperature, None); + assert_eq!(actual.top_p, None); + assert_eq!(actual.top_k, None); + } + + #[test] + fn test_opus_4_7_strips_sampling_params_even_without_thinking() { + let fixture = Request::default().temperature(0.5f32).top_p(0.9f32).top_k(40u64); + let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); + + assert_eq!(actual.temperature, None); + assert_eq!(actual.top_p, None); + assert_eq!(actual.top_k, None); + } + + #[test] + fn test_opus_4_7_bedrock_prefix_still_matches() { + let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); + let actual = + ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + assert_eq!(actual.temperature, None); + } + + #[test] + fn test_opus_4_7_passes_through_adaptive_with_existing_display() { + let fixture = fixture_request_with_thinking(Thinking::Adaptive { + display: Some(ThinkingDisplay::Omitted), + }); + let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) + ); + } + + #[test] + fn test_opus_4_6_passes_both_shapes_through() { + let fixture1 = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); + let actual1 = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture1); + assert_eq!(actual1.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!(actual1.temperature, Some(0.5)); + + let fixture2 = fixture_request_with_thinking(Thinking::Adaptive { + display: Some(ThinkingDisplay::Summarized), + }); + let actual2 = ModelSpecificThinking::new("claude-sonnet-4-6").transform(fixture2); + assert_eq!( + actual2.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + } + + #[test] + fn test_opus_4_5_rewrites_adaptive_to_enabled() { + let fixture = fixture_request_with_thinking(Thinking::Adaptive { + display: Some(ThinkingDisplay::Summarized), + }); + let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); + + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS }) + ); + } + + #[test] + fn test_legacy_no_effort_model_passes_enabled_through() { + let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); + let actual = ModelSpecificThinking::new("claude-3-7-sonnet-20250219").transform(fixture); + + assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!(actual.temperature, Some(0.5)); + } + + #[test] + fn test_no_thinking_is_preserved_everywhere() { + for model in [ + "claude-opus-4-7", + "claude-opus-4-6", + "claude-opus-4-5-20251101", + "claude-3-7-sonnet-20250219", + ] { + let fixture = Request::default(); + let actual = ModelSpecificThinking::new(model).transform(fixture); + assert_eq!(actual.thinking, None, "model {}", model); + } + } + + #[test] + fn test_opus_4_7_uses_configured_display_when_rewriting_enabled() { + // The caller's `exclude: true` preference (→ `Omitted` display) must + // survive the 4.7 `Enabled → Adaptive` rewrite; otherwise hidden + // reasoning would silently become visible. + let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); + let actual = ModelSpecificThinking::new("claude-opus-4-7") + .display(ThinkingDisplay::Omitted) + .transform(fixture); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) + ); + } + + #[test] + fn test_opus_4_7_preserves_effort_when_dropping_budget() { + // When both `max_tokens` and `effort` are set on 4.7, dropping the + // budget must not take the effort signal with it — effort is the only + // remaining depth knob on 4.7. + let fixture = Request::default() + .thinking(Thinking::Enabled { budget_tokens: 8000 }) + .output_config(OutputConfig { effort: OutputEffort::XHigh }); + let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); + + assert_eq!( + actual.thinking, + Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) + ); + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::XHigh }) + ); + } + + #[test] + fn test_opus_4_7_preserves_xhigh_effort() { + let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); + let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::XHigh }) + ); + } + + #[test] + fn test_opus_4_6_replaces_xhigh_with_max() { + let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); + let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::Max }) + ); + } + + #[test] + fn test_opus_4_5_clamps_xhigh_to_high() { + // Opus 4.5 supports effort but not xhigh or max; clamp to high. + let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); + let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::High }) + ); + } + + #[test] + fn test_opus_4_5_clamps_max_to_high() { + let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::Max }); + let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::High }) + ); + } + + #[test] + fn test_opus_4_5_preserves_supported_effort_levels() { + for level in [OutputEffort::Low, OutputEffort::Medium, OutputEffort::High] { + let fixture = Request::default().output_config(OutputConfig { effort: level }); + let actual = + ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: level }), + "level {:?}", + level + ); + } + } + + #[test] + fn test_legacy_no_effort_models_drop_output_config() { + for model in [ + "claude-sonnet-4-5-20250929", + "claude-haiku-4-5-20251001", + "claude-opus-4-1-20250805", + "claude-opus-4-20250514", + "claude-3-7-sonnet-20250219", + ] { + let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High }); + let actual = ModelSpecificThinking::new(model).transform(fixture); + assert_eq!(actual.output_config, None, "model {}", model); + } + } + + #[test] + fn test_adaptive_friendly_preserves_high_effort() { + let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High }); + let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture); + + assert_eq!( + actual.output_config, + Some(OutputConfig { effort: OutputEffort::High }) + ); + } +} diff --git a/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs b/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs index cfff041f18..1c82876a1c 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/reasoning_transform.rs @@ -5,10 +5,10 @@ pub struct ReasoningTransform; impl Transformer for ReasoningTransform { type Value = Context; fn transform(&mut self, mut context: Self::Value) -> Self::Value { - if let Some(reasoning) = context.reasoning.as_ref() - && reasoning.enabled.unwrap_or(false) - { - // if reasoning is enabled then we've to drop top_k and top_p + // Must stay in lockstep with the Anthropic request builder, which gates + // on the same predicate — otherwise `thinking`/`output_config` ship + // alongside sampling params that Anthropic rejects. + if context.is_reasoning_supported() { context.top_k = None; context.top_p = None; } @@ -85,4 +85,51 @@ mod tests { assert_eq!(actual, expected); } + + #[test] + fn test_enabled_none_with_effort_still_strips_top_k_and_top_p() { + // `enabled: None` + effort is treated as reasoning-on (domain rule). + let fixture = create_context_fixture().reasoning(ReasoningConfig { + enabled: None, + max_tokens: None, + effort: Some(forge_domain::Effort::High), + exclude: None, + }); + let mut transformer = ReasoningTransform; + let actual = transformer.transform(fixture); + + assert_eq!(actual.top_k, None); + assert_eq!(actual.top_p, None); + } + + #[test] + fn test_enabled_none_with_positive_max_tokens_still_strips_top_k_and_top_p() { + let fixture = create_context_fixture().reasoning(ReasoningConfig { + enabled: None, + max_tokens: Some(8000), + effort: None, + exclude: None, + }); + let mut transformer = ReasoningTransform; + let actual = transformer.transform(fixture); + + assert_eq!(actual.top_k, None); + assert_eq!(actual.top_p, None); + } + + #[test] + fn test_enabled_none_with_zero_max_tokens_preserves_top_k_and_top_p() { + // Matches `is_reasoning_supported`: max_tokens == 0 is treated as off. + let fixture = create_context_fixture().reasoning(ReasoningConfig { + enabled: None, + max_tokens: Some(0), + effort: None, + exclude: None, + }); + let mut transformer = ReasoningTransform; + let actual = transformer.transform(fixture.clone()); + + assert_eq!(actual.top_k, fixture.top_k); + assert_eq!(actual.top_p, fixture.top_p); + } } diff --git a/crates/forge_domain/src/context.rs b/crates/forge_domain/src/context.rs index 664000e1eb..13e6c3398b 100644 --- a/crates/forge_domain/src/context.rs +++ b/crates/forge_domain/src/context.rs @@ -640,6 +640,12 @@ impl Context { /// Checks if reasoning is enabled by user or not. pub fn is_reasoning_supported(&self) -> bool { self.reasoning.as_ref().is_some_and(|reasoning| { + // `Effort::None` is a strong opt-out that wins over `enabled` and + // `max_tokens`. + if matches!(reasoning.effort, Some(crate::Effort::None)) { + return false; + } + // When enabled parameter is defined then return it's value directly. if reasoning.enabled.is_some() { return reasoning.enabled.unwrap_or_default(); @@ -1193,6 +1199,37 @@ mod tests { ); } + #[test] + fn test_context_is_reasoning_not_supported_when_effort_is_none() { + // `Effort::None` is documented as "skips the thinking step entirely" and + // must act as an explicit opt-out regardless of other fields. + let fixture = Context::default().reasoning(crate::ReasoningConfig { + effort: Some(crate::Effort::None), + ..Default::default() + }); + + let actual = fixture.is_reasoning_supported(); + + assert!(!actual); + } + + #[test] + fn test_context_is_reasoning_not_supported_when_effort_none_overrides_enabled_true() { + let fixture = Context::default().reasoning(crate::ReasoningConfig { + enabled: Some(true), + effort: Some(crate::Effort::None), + max_tokens: Some(8000), + ..Default::default() + }); + + let actual = fixture.is_reasoning_supported(); + + assert!( + !actual, + "Effort::None must win over enabled: true and max_tokens" + ); + } + #[test] fn test_add_attachments_file_content_is_droppable() { let fixture_attachments = vec![Attachment { diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs index 3292f5ab9f..1c9924901d 100644 --- a/crates/forge_repo/src/provider/anthropic.rs +++ b/crates/forge_repo/src/provider/anthropic.rs @@ -7,8 +7,8 @@ use forge_app::domain::{ }; use forge_app::dto::anthropic::{ AuthSystemMessage, CapitalizeToolNames, DropInvalidToolUse, EnforceStrictObjectSchema, - EventData, ListModelResponse, ReasoningTransform, RemoveOutputFormat, Request, SanitizeToolIds, - SetCache, + EventData, ListModelResponse, ModelSpecificThinking, ReasoningTransform, RemoveOutputFormat, + Request, SanitizeToolIds, SetCache, ThinkingDisplay, }; use forge_app::{EnvironmentInfra, HttpInfra}; use forge_domain::{ChatRepository, Provider, ProviderId}; @@ -101,6 +101,19 @@ impl Anthropic { // transform the context to match the request format let context = ReasoningTransform.transform(context); + // Read out of `context` before `Request::try_from` consumes it. + let display_preference = context + .reasoning + .as_ref() + .and_then(|r| r.exclude) + .map(|exclude| { + if exclude { + ThinkingDisplay::Omitted + } else { + ThinkingDisplay::Summarized + } + }); + let mut request = Request::try_from(context)?.max_tokens(max_tokens as u64); // For Vertex AI Anthropic, model is in the URL path, not the request body @@ -110,11 +123,16 @@ impl Anthropic { request = request.model(model.as_str().to_string()); } + let mut thinking_transform = ModelSpecificThinking::new(model.as_str()); + if let Some(display) = display_preference { + thinking_transform = thinking_transform.display(display); + } let pipeline = AuthSystemMessage::default() .when(|_| self.use_oauth) .pipe(CapitalizeToolNames) .pipe(DropInvalidToolUse) - .pipe(SanitizeToolIds); + .pipe(SanitizeToolIds) + .pipe(thinking_transform); // Vertex AI does not support output_format, so we skip schema enforcement // and remove any output_format field From 17cc84cb6733e82d47cbc0ef749b3af2a6c68aea Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Sat, 18 Apr 2026 12:16:52 +0000 Subject: [PATCH 2/5] [autofix.ci] apply automated fixes --- crates/forge_app/src/dto/anthropic/request.rs | 23 ++++++--- .../transforms/model_specific_thinking.rs | 47 +++++++++++-------- crates/forge_repo/src/provider/anthropic.rs | 23 ++++----- 3 files changed, 56 insertions(+), 37 deletions(-) diff --git a/crates/forge_app/src/dto/anthropic/request.rs b/crates/forge_app/src/dto/anthropic/request.rs index 32a91f35d8..addd8421c8 100644 --- a/crates/forge_app/src/dto/anthropic/request.rs +++ b/crates/forge_app/src/dto/anthropic/request.rs @@ -127,8 +127,7 @@ impl TryFrom for Request { // Gate on the domain rule so inherited configs with `enabled: None` but // a positive effort / `max_tokens` still emit reasoning on the wire. let reasoning_on = request.is_reasoning_supported(); - let (thinking, output_config) = if reasoning_on && let Some(reasoning) = request.reasoning - { + let (thinking, output_config) = if reasoning_on && let Some(reasoning) = request.reasoning { // Adaptive thinking on 4.7 hides reasoning content by default; opting // into reasoning should surface it unless the caller set `exclude`. let adaptive_display = if reasoning.exclude == Some(true) { @@ -583,7 +582,10 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); - assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); assert_eq!(actual.output_config, None); } @@ -599,7 +601,10 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); - assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); assert_eq!( actual.output_config, Some(OutputConfig { effort: OutputEffort::Low }) @@ -617,7 +622,10 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); - assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); assert_eq!(actual.output_config, None); } @@ -738,7 +746,10 @@ mod tests { let actual = Request::try_from(fixture).unwrap(); - assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); } #[test] diff --git a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs index f20bc95535..4c7157f3ca 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs @@ -99,9 +99,7 @@ impl Transformer for ModelSpecificThinking { budgets are unsupported. Use `reasoning.effort` to control \ thinking depth instead." ); - request.thinking = Some(Thinking::Adaptive { - display: Some(self.display), - }); + request.thinking = Some(Thinking::Adaptive { display: Some(self.display) }); } request.temperature = None; request.top_p = None; @@ -109,17 +107,15 @@ impl Transformer for ModelSpecificThinking { } ModelFamily::LegacyWithEffort => { if let Some(Thinking::Adaptive { .. }) = request.thinking { - request.thinking = Some(Thinking::Enabled { - budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS, - }); + request.thinking = + Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS }); } clamp_effort_to_high(&mut request.output_config); } ModelFamily::LegacyNoEffort => { if let Some(Thinking::Adaptive { .. }) = request.thinking { - request.thinking = Some(Thinking::Enabled { - budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS, - }); + request.thinking = + Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS }); } if request.output_config.is_some() { warn!( @@ -177,7 +173,10 @@ mod tests { #[test] fn test_opus_4_7_strips_sampling_params_even_without_thinking() { - let fixture = Request::default().temperature(0.5f32).top_p(0.9f32).top_k(40u64); + let fixture = Request::default() + .temperature(0.5f32) + .top_p(0.9f32) + .top_k(40u64); let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); assert_eq!(actual.temperature, None); @@ -188,8 +187,7 @@ mod tests { #[test] fn test_opus_4_7_bedrock_prefix_still_matches() { let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); - let actual = - ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture); + let actual = ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture); assert_eq!( actual.thinking, @@ -215,7 +213,10 @@ mod tests { fn test_opus_4_6_passes_both_shapes_through() { let fixture1 = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); let actual1 = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture1); - assert_eq!(actual1.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!( + actual1.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); assert_eq!(actual1.temperature, Some(0.5)); let fixture2 = fixture_request_with_thinking(Thinking::Adaptive { @@ -246,7 +247,10 @@ mod tests { let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); let actual = ModelSpecificThinking::new("claude-3-7-sonnet-20250219").transform(fixture); - assert_eq!(actual.thinking, Some(Thinking::Enabled { budget_tokens: 8000 })); + assert_eq!( + actual.thinking, + Some(Thinking::Enabled { budget_tokens: 8000 }) + ); assert_eq!(actual.temperature, Some(0.5)); } @@ -302,7 +306,8 @@ mod tests { #[test] fn test_opus_4_7_preserves_xhigh_effort() { - let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); + let fixture = + Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); assert_eq!( @@ -313,7 +318,8 @@ mod tests { #[test] fn test_opus_4_6_replaces_xhigh_with_max() { - let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); + let fixture = + Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture); assert_eq!( @@ -325,7 +331,8 @@ mod tests { #[test] fn test_opus_4_5_clamps_xhigh_to_high() { // Opus 4.5 supports effort but not xhigh or max; clamp to high. - let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); + let fixture = + Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); assert_eq!( @@ -349,8 +356,7 @@ mod tests { fn test_opus_4_5_preserves_supported_effort_levels() { for level in [OutputEffort::Low, OutputEffort::Medium, OutputEffort::High] { let fixture = Request::default().output_config(OutputConfig { effort: level }); - let actual = - ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); + let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); assert_eq!( actual.output_config, Some(OutputConfig { effort: level }), @@ -369,7 +375,8 @@ mod tests { "claude-opus-4-20250514", "claude-3-7-sonnet-20250219", ] { - let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High }); + let fixture = + Request::default().output_config(OutputConfig { effort: OutputEffort::High }); let actual = ModelSpecificThinking::new(model).transform(fixture); assert_eq!(actual.output_config, None, "model {}", model); } diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs index 1c9924901d..8429aa11e0 100644 --- a/crates/forge_repo/src/provider/anthropic.rs +++ b/crates/forge_repo/src/provider/anthropic.rs @@ -102,17 +102,18 @@ impl Anthropic { let context = ReasoningTransform.transform(context); // Read out of `context` before `Request::try_from` consumes it. - let display_preference = context - .reasoning - .as_ref() - .and_then(|r| r.exclude) - .map(|exclude| { - if exclude { - ThinkingDisplay::Omitted - } else { - ThinkingDisplay::Summarized - } - }); + let display_preference = + context + .reasoning + .as_ref() + .and_then(|r| r.exclude) + .map(|exclude| { + if exclude { + ThinkingDisplay::Omitted + } else { + ThinkingDisplay::Summarized + } + }); let mut request = Request::try_from(context)?.max_tokens(max_tokens as u64); From 0e18a098caa9ebe0293f85ff449f9a0c257f13be Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Sat, 18 Apr 2026 21:39:43 +0530 Subject: [PATCH 3/5] refactor(anthropic): move model-specific reasoning normalization from dto to transformers --- crates/forge_app/src/dto/anthropic/request.rs | 3 +- .../src/dto/anthropic/transforms/mod.rs | 2 - .../transforms/model_specific_thinking.rs | 395 ------------------ crates/forge_app/src/orch.rs | 8 +- crates/forge_app/src/transformers/mod.rs | 2 + .../transformers/model_specific_reasoning.rs | 392 +++++++++++++++++ crates/forge_repo/src/provider/anthropic.rs | 182 ++++++-- crates/forge_repo/src/provider/bedrock.rs | 304 ++++++++++++-- 8 files changed, 818 insertions(+), 470 deletions(-) delete mode 100644 crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs create mode 100644 crates/forge_app/src/transformers/model_specific_reasoning.rs diff --git a/crates/forge_app/src/dto/anthropic/request.rs b/crates/forge_app/src/dto/anthropic/request.rs index addd8421c8..72aa4496ec 100644 --- a/crates/forge_app/src/dto/anthropic/request.rs +++ b/crates/forge_app/src/dto/anthropic/request.rs @@ -61,7 +61,8 @@ impl SystemMessage { } /// Anthropic's `thinking` request field. Opus 4.7 rejects the `Enabled` shape -/// with a 400 — the `ModelSpecificThinking` transformer handles the migration. +/// and the orchestrator applies model-specific reasoning normalization before +/// request conversion. #[derive(Serialize, Debug, PartialEq, Eq)] #[serde(tag = "type", rename_all = "lowercase")] pub enum Thinking { diff --git a/crates/forge_app/src/dto/anthropic/transforms/mod.rs b/crates/forge_app/src/dto/anthropic/transforms/mod.rs index b9a3b2f765..3ea6d5f183 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/mod.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/mod.rs @@ -2,7 +2,6 @@ mod auth_system_message; mod capitalize_tool_names; mod drop_invalid_toolcalls; mod enforce_schema; -mod model_specific_thinking; mod reasoning_transform; mod remove_output_format; mod sanitize_tool_ids; @@ -12,7 +11,6 @@ pub use auth_system_message::AuthSystemMessage; pub use capitalize_tool_names::CapitalizeToolNames; pub use drop_invalid_toolcalls::DropInvalidToolUse; pub use enforce_schema::EnforceStrictObjectSchema; -pub use model_specific_thinking::ModelSpecificThinking; pub use reasoning_transform::ReasoningTransform; pub use remove_output_format::RemoveOutputFormat; pub use sanitize_tool_ids::SanitizeToolIds; diff --git a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs b/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs deleted file mode 100644 index 4c7157f3ca..0000000000 --- a/crates/forge_app/src/dto/anthropic/transforms/model_specific_thinking.rs +++ /dev/null @@ -1,395 +0,0 @@ -use forge_domain::Transformer; -use tracing::warn; - -use crate::dto::anthropic::{OutputConfig, OutputEffort, Request, Thinking, ThinkingDisplay}; - -/// Default budget applied when converting `Adaptive` thinking to the legacy -/// `Enabled` shape for pre-4.6 models that don't accept adaptive. -const DEFAULT_LEGACY_BUDGET_TOKENS: u64 = 10000; - -/// Rewrites the `thinking` and `output_config` fields and strips rejected -/// sampling parameters to match the target model's API contract. Classifies -/// models into four tiers based on the official Anthropic docs: -/// -/// | Tier | Matching ids | Thinking | Effort | xhigh | max | -/// |---------------------|--------------------------------|-------------------|------------|-------|-----| -/// | `AdaptiveOnly` | `opus-4-7` | adaptive only | ✓ | ✓ | ✓ | -/// | `AdaptiveFriendly` | `opus-4-6`, `sonnet-4-6` | both shapes | ✓ | →max | ✓ | -/// | `LegacyWithEffort` | `opus-4-5` | enabled only | ✓ | →high | →high | -/// | `LegacyNoEffort` | everything else | enabled only | drop | drop | drop | -/// -/// Additional behaviour: -/// - `AdaptiveOnly` strips `temperature`/`top_p`/`top_k` (4.7 rejects non- -/// defaults) and warns when a caller-supplied `budget_tokens` is dropped. -/// - The display preference carried from the caller's `ReasoningConfig` -/// (`exclude: true` → `Omitted`) is applied when rewriting `Enabled` to -/// `Adaptive` on 4.7. -pub struct ModelSpecificThinking { - model_id: String, - /// Defaults to `Summarized` because the legacy `Enabled` shape always - /// produced visible reasoning; a 4.7 migration should preserve that unless - /// the caller opts out via `exclude`. - display: ThinkingDisplay, -} - -impl ModelSpecificThinking { - pub fn new(model_id: impl Into) -> Self { - Self { - model_id: model_id.into(), - display: ThinkingDisplay::Summarized, - } - } - - pub fn display(mut self, display: ThinkingDisplay) -> Self { - self.display = display; - self - } - - fn family(&self) -> ModelFamily { - let id = self.model_id.to_lowercase(); - if id.contains("opus-4-7") { - ModelFamily::AdaptiveOnly - } else if id.contains("opus-4-6") || id.contains("sonnet-4-6") { - ModelFamily::AdaptiveFriendly - } else if id.contains("opus-4-5") { - ModelFamily::LegacyWithEffort - } else { - ModelFamily::LegacyNoEffort - } - } -} - -#[derive(Debug, PartialEq, Eq)] -enum ModelFamily { - AdaptiveOnly, - AdaptiveFriendly, - LegacyWithEffort, - LegacyNoEffort, -} - -/// Falls back to `max` — the closest supported level, even though it spends -/// more tokens than `xhigh` would have. -fn replace_xhigh_with_max(output_config: &mut Option) { - if let Some(config) = output_config.as_mut() - && config.effort == OutputEffort::XHigh - { - config.effort = OutputEffort::Max; - } -} - -fn clamp_effort_to_high(output_config: &mut Option) { - if let Some(config) = output_config.as_mut() - && matches!(config.effort, OutputEffort::XHigh | OutputEffort::Max) - { - config.effort = OutputEffort::High; - } -} - -impl Transformer for ModelSpecificThinking { - type Value = Request; - - fn transform(&mut self, mut request: Self::Value) -> Self::Value { - match self.family() { - ModelFamily::AdaptiveOnly => { - if let Some(Thinking::Enabled { budget_tokens }) = request.thinking { - warn!( - model = %self.model_id, - dropped_budget_tokens = budget_tokens, - "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking \ - budgets are unsupported. Use `reasoning.effort` to control \ - thinking depth instead." - ); - request.thinking = Some(Thinking::Adaptive { display: Some(self.display) }); - } - request.temperature = None; - request.top_p = None; - request.top_k = None; - } - ModelFamily::LegacyWithEffort => { - if let Some(Thinking::Adaptive { .. }) = request.thinking { - request.thinking = - Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS }); - } - clamp_effort_to_high(&mut request.output_config); - } - ModelFamily::LegacyNoEffort => { - if let Some(Thinking::Adaptive { .. }) = request.thinking { - request.thinking = - Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS }); - } - if request.output_config.is_some() { - warn!( - model = %self.model_id, - "Dropping `output_config.effort`: the effort parameter is only \ - supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7." - ); - request.output_config = None; - } - } - ModelFamily::AdaptiveFriendly => { - replace_xhigh_with_max(&mut request.output_config); - } - } - request - } -} - -#[cfg(test)] -mod tests { - use forge_domain::Transformer; - use pretty_assertions::assert_eq; - - use super::*; - use crate::dto::anthropic::{Request, Thinking}; - - fn fixture_request_with_thinking(thinking: Thinking) -> Request { - Request::default() - .thinking(thinking) - .temperature(0.5f32) - .top_p(0.9f32) - .top_k(40u64) - } - - #[test] - fn test_opus_4_7_rewrites_enabled_to_adaptive_with_summarized_display() { - let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); - let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); - - assert_eq!( - actual.thinking, - Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) - ); - } - - #[test] - fn test_opus_4_7_strips_sampling_params() { - let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); - let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); - - assert_eq!(actual.temperature, None); - assert_eq!(actual.top_p, None); - assert_eq!(actual.top_k, None); - } - - #[test] - fn test_opus_4_7_strips_sampling_params_even_without_thinking() { - let fixture = Request::default() - .temperature(0.5f32) - .top_p(0.9f32) - .top_k(40u64); - let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); - - assert_eq!(actual.temperature, None); - assert_eq!(actual.top_p, None); - assert_eq!(actual.top_k, None); - } - - #[test] - fn test_opus_4_7_bedrock_prefix_still_matches() { - let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); - let actual = ModelSpecificThinking::new("us.anthropic.claude-opus-4-7").transform(fixture); - - assert_eq!( - actual.thinking, - Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) - ); - assert_eq!(actual.temperature, None); - } - - #[test] - fn test_opus_4_7_passes_through_adaptive_with_existing_display() { - let fixture = fixture_request_with_thinking(Thinking::Adaptive { - display: Some(ThinkingDisplay::Omitted), - }); - let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); - - assert_eq!( - actual.thinking, - Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) - ); - } - - #[test] - fn test_opus_4_6_passes_both_shapes_through() { - let fixture1 = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); - let actual1 = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture1); - assert_eq!( - actual1.thinking, - Some(Thinking::Enabled { budget_tokens: 8000 }) - ); - assert_eq!(actual1.temperature, Some(0.5)); - - let fixture2 = fixture_request_with_thinking(Thinking::Adaptive { - display: Some(ThinkingDisplay::Summarized), - }); - let actual2 = ModelSpecificThinking::new("claude-sonnet-4-6").transform(fixture2); - assert_eq!( - actual2.thinking, - Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) - ); - } - - #[test] - fn test_opus_4_5_rewrites_adaptive_to_enabled() { - let fixture = fixture_request_with_thinking(Thinking::Adaptive { - display: Some(ThinkingDisplay::Summarized), - }); - let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); - - assert_eq!( - actual.thinking, - Some(Thinking::Enabled { budget_tokens: DEFAULT_LEGACY_BUDGET_TOKENS }) - ); - } - - #[test] - fn test_legacy_no_effort_model_passes_enabled_through() { - let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); - let actual = ModelSpecificThinking::new("claude-3-7-sonnet-20250219").transform(fixture); - - assert_eq!( - actual.thinking, - Some(Thinking::Enabled { budget_tokens: 8000 }) - ); - assert_eq!(actual.temperature, Some(0.5)); - } - - #[test] - fn test_no_thinking_is_preserved_everywhere() { - for model in [ - "claude-opus-4-7", - "claude-opus-4-6", - "claude-opus-4-5-20251101", - "claude-3-7-sonnet-20250219", - ] { - let fixture = Request::default(); - let actual = ModelSpecificThinking::new(model).transform(fixture); - assert_eq!(actual.thinking, None, "model {}", model); - } - } - - #[test] - fn test_opus_4_7_uses_configured_display_when_rewriting_enabled() { - // The caller's `exclude: true` preference (→ `Omitted` display) must - // survive the 4.7 `Enabled → Adaptive` rewrite; otherwise hidden - // reasoning would silently become visible. - let fixture = fixture_request_with_thinking(Thinking::Enabled { budget_tokens: 8000 }); - let actual = ModelSpecificThinking::new("claude-opus-4-7") - .display(ThinkingDisplay::Omitted) - .transform(fixture); - - assert_eq!( - actual.thinking, - Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Omitted) }) - ); - } - - #[test] - fn test_opus_4_7_preserves_effort_when_dropping_budget() { - // When both `max_tokens` and `effort` are set on 4.7, dropping the - // budget must not take the effort signal with it — effort is the only - // remaining depth knob on 4.7. - let fixture = Request::default() - .thinking(Thinking::Enabled { budget_tokens: 8000 }) - .output_config(OutputConfig { effort: OutputEffort::XHigh }); - let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); - - assert_eq!( - actual.thinking, - Some(Thinking::Adaptive { display: Some(ThinkingDisplay::Summarized) }) - ); - assert_eq!( - actual.output_config, - Some(OutputConfig { effort: OutputEffort::XHigh }) - ); - } - - #[test] - fn test_opus_4_7_preserves_xhigh_effort() { - let fixture = - Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); - let actual = ModelSpecificThinking::new("claude-opus-4-7").transform(fixture); - - assert_eq!( - actual.output_config, - Some(OutputConfig { effort: OutputEffort::XHigh }) - ); - } - - #[test] - fn test_opus_4_6_replaces_xhigh_with_max() { - let fixture = - Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); - let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture); - - assert_eq!( - actual.output_config, - Some(OutputConfig { effort: OutputEffort::Max }) - ); - } - - #[test] - fn test_opus_4_5_clamps_xhigh_to_high() { - // Opus 4.5 supports effort but not xhigh or max; clamp to high. - let fixture = - Request::default().output_config(OutputConfig { effort: OutputEffort::XHigh }); - let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); - - assert_eq!( - actual.output_config, - Some(OutputConfig { effort: OutputEffort::High }) - ); - } - - #[test] - fn test_opus_4_5_clamps_max_to_high() { - let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::Max }); - let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); - - assert_eq!( - actual.output_config, - Some(OutputConfig { effort: OutputEffort::High }) - ); - } - - #[test] - fn test_opus_4_5_preserves_supported_effort_levels() { - for level in [OutputEffort::Low, OutputEffort::Medium, OutputEffort::High] { - let fixture = Request::default().output_config(OutputConfig { effort: level }); - let actual = ModelSpecificThinking::new("claude-opus-4-5-20251101").transform(fixture); - assert_eq!( - actual.output_config, - Some(OutputConfig { effort: level }), - "level {:?}", - level - ); - } - } - - #[test] - fn test_legacy_no_effort_models_drop_output_config() { - for model in [ - "claude-sonnet-4-5-20250929", - "claude-haiku-4-5-20251001", - "claude-opus-4-1-20250805", - "claude-opus-4-20250514", - "claude-3-7-sonnet-20250219", - ] { - let fixture = - Request::default().output_config(OutputConfig { effort: OutputEffort::High }); - let actual = ModelSpecificThinking::new(model).transform(fixture); - assert_eq!(actual.output_config, None, "model {}", model); - } - } - - #[test] - fn test_adaptive_friendly_preserves_high_effort() { - let fixture = Request::default().output_config(OutputConfig { effort: OutputEffort::High }); - let actual = ModelSpecificThinking::new("claude-opus-4-6").transform(fixture); - - assert_eq!( - actual.output_config, - Some(OutputConfig { effort: OutputEffort::High }) - ); - } -} diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs index 86157c24e2..a81145ac80 100644 --- a/crates/forge_app/src/orch.rs +++ b/crates/forge_app/src/orch.rs @@ -11,6 +11,7 @@ use tokio::sync::Notify; use tracing::warn; use crate::agent::AgentService; +use crate::transformers::ModelSpecificReasoning; use crate::{EnvironmentInfra, TemplateEngine}; #[derive(Clone, Setters)] @@ -208,7 +209,12 @@ impl> Orc .pipe(DropReasoningDetails.when(|_| !reasoning_supported)) // Strip all reasoning from messages when the model has changed (signatures are // model-specific and invalid across models). No-op when model is unchanged. - .pipe(ReasoningNormalizer::new(model_id.clone())); + .pipe(ReasoningNormalizer::new(model_id.clone())) + // Normalize Anthropic reasoning knobs per model family before provider conversion. + .pipe( + ModelSpecificReasoning::new(model_id.as_str()) + .when(|_| model_id.as_str().to_lowercase().contains("claude")), + ); let response = self .services .chat_agent( diff --git a/crates/forge_app/src/transformers/mod.rs b/crates/forge_app/src/transformers/mod.rs index ef434b62dd..a8b84543ea 100644 --- a/crates/forge_app/src/transformers/mod.rs +++ b/crates/forge_app/src/transformers/mod.rs @@ -1,7 +1,9 @@ mod compaction; mod dedupe_role; mod drop_role; +mod model_specific_reasoning; mod strip_working_dir; mod trim_context_summary; pub use compaction::SummaryTransformer; +pub(crate) use model_specific_reasoning::ModelSpecificReasoning; diff --git a/crates/forge_app/src/transformers/model_specific_reasoning.rs b/crates/forge_app/src/transformers/model_specific_reasoning.rs new file mode 100644 index 0000000000..d3d1788b54 --- /dev/null +++ b/crates/forge_app/src/transformers/model_specific_reasoning.rs @@ -0,0 +1,392 @@ +use forge_domain::{Context, Effort, ReasoningConfig, Transformer}; +use tracing::warn; + +/// Default budget applied when converting adaptive-style reasoning into legacy +/// budget-based reasoning for pre-4.6 Anthropic model families. +const DEFAULT_LEGACY_BUDGET_TOKENS: usize = 10000; + +#[derive(Debug, PartialEq, Eq)] +enum AnthropicModelFamily { + AdaptiveOnly, + AdaptiveFriendly, + LegacyWithEffort, + LegacyNoEffort, +} + +/// Normalizes reasoning knobs for Anthropic model families before provider +/// conversion. +pub(crate) struct ModelSpecificReasoning { + model_id: String, +} + +impl ModelSpecificReasoning { + /// Creates a model-specific reasoning normalizer for the given model id. + pub(crate) fn new(model_id: impl Into) -> Self { + Self { model_id: model_id.into() } + } + + fn family(&self) -> AnthropicModelFamily { + let id = self.model_id.to_lowercase(); + if id.contains("opus-4-7") { + AnthropicModelFamily::AdaptiveOnly + } else if id.contains("opus-4-6") || id.contains("sonnet-4-6") { + AnthropicModelFamily::AdaptiveFriendly + } else if id.contains("opus-4-5") { + AnthropicModelFamily::LegacyWithEffort + } else { + AnthropicModelFamily::LegacyNoEffort + } + } +} + +fn replace_xhigh_with_max(reasoning: &mut Option) { + if let Some(reasoning) = reasoning.as_mut() + && reasoning.effort == Some(Effort::XHigh) + { + reasoning.effort = Some(Effort::Max); + } +} + +fn clamp_effort_to_high(reasoning: &mut Option) { + if let Some(reasoning) = reasoning.as_mut() + && matches!(reasoning.effort, Some(Effort::XHigh | Effort::Max)) + { + reasoning.effort = Some(Effort::High); + } +} + +fn set_default_legacy_budget(reasoning: &mut Option) { + if let Some(reasoning) = reasoning.as_mut() + && reasoning.max_tokens.is_none() + { + reasoning.max_tokens = Some(DEFAULT_LEGACY_BUDGET_TOKENS); + } +} + +impl Transformer for ModelSpecificReasoning { + type Value = Context; + + fn transform(&mut self, mut context: Self::Value) -> Self::Value { + let reasoning_on = context.is_reasoning_supported(); + + match self.family() { + AnthropicModelFamily::AdaptiveOnly => { + if reasoning_on + && let Some(reasoning) = context.reasoning.as_mut() + && let Some(max_tokens) = reasoning.max_tokens.take() + { + warn!( + model = %self.model_id, + dropped_max_tokens = max_tokens, + "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking budgets are unsupported. Use `reasoning.effort` to control thinking depth instead." + ); + } + context.temperature = None; + context.top_p = None; + context.top_k = None; + } + AnthropicModelFamily::AdaptiveFriendly => { + if reasoning_on { + replace_xhigh_with_max(&mut context.reasoning); + } + } + AnthropicModelFamily::LegacyWithEffort => { + if reasoning_on { + set_default_legacy_budget(&mut context.reasoning); + clamp_effort_to_high(&mut context.reasoning); + } + } + AnthropicModelFamily::LegacyNoEffort => { + if reasoning_on { + set_default_legacy_budget(&mut context.reasoning); + if let Some(reasoning) = context.reasoning.as_mut() + && reasoning.effort.is_some() + { + warn!( + model = %self.model_id, + "Dropping `reasoning.effort`: the effort parameter is only supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7." + ); + reasoning.effort = None; + } + } + } + } + + context + } +} + +#[cfg(test)] +mod tests { + use forge_domain::{Context, Effort, ReasoningConfig, Temperature, TopK, TopP, Transformer}; + use pretty_assertions::assert_eq; + + use super::*; + + fn fixture_context_with_sampling() -> Context { + Context::default() + .temperature(Temperature::new(0.5).unwrap()) + .top_p(TopP::new(0.9).unwrap()) + .top_k(TopK::new(40).unwrap()) + } + + #[test] + fn test_opus_4_7_drops_max_tokens_and_sampling_params() { + let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: Some(true), + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: Some(true), + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_7_strips_sampling_even_without_reasoning() { + let fixture = fixture_context_with_sampling(); + + let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture); + + let expected = Context::default(); + + assert_eq!(actual, expected); + } + + #[test] + fn test_adaptive_friendly_replaces_xhigh_with_max() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-6").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::Max), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_legacy_with_effort_backfills_budget_and_clamps_effort() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::Max), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(DEFAULT_LEGACY_BUDGET_TOKENS), + effort: Some(Effort::High), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_legacy_no_effort_backfills_budget_and_drops_effort() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::High), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-3-7-sonnet-20250219").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(DEFAULT_LEGACY_BUDGET_TOKENS), + effort: None, + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_7_bedrock_prefix_still_matches() { + // Bedrock region prefixes (`us.anthropic.claude-...`) must still be + // classified as AdaptiveOnly so sampling params are stripped and + // `max_tokens` is dropped. + let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: None, + }); + + let actual = + ModelSpecificReasoning::new("us.anthropic.claude-opus-4-7").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_7_preserves_effort_when_dropping_max_tokens() { + // When both knobs are set on 4.7, only `max_tokens` should be dropped; + // `effort` is the remaining depth knob and must survive. + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-7").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_5_clamps_max_to_high() { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::Max), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::High), + exclude: None, + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_5_preserves_supported_effort_levels() { + for level in [Effort::Low, Effort::Medium, Effort::High] { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(level.clone()), + exclude: None, + }); + + let actual = + ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(level.clone()), + exclude: None, + }); + + assert_eq!(actual, expected, "effort level {:?}", level); + } + } + + #[test] + fn test_legacy_no_effort_drops_effort_for_all_pre_4_5_ids() { + // All pre-Opus-4.5 Claude ids (plus the newer non-effort family members + // Sonnet 4.5 and Haiku 4.5) should land in LegacyNoEffort and have their + // effort stripped. + for model in [ + "claude-sonnet-4-5-20250929", + "claude-haiku-4-5-20251001", + "claude-opus-4-1-20250805", + "claude-opus-4-20250514", + "claude-3-7-sonnet-20250219", + ] { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::High), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new(model).transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: None, + exclude: None, + }); + + assert_eq!(actual, expected, "model {}", model); + } + } + + #[test] + fn test_no_reasoning_is_preserved_everywhere() { + // A context without `reasoning` must pass through unchanged for every + // family except AdaptiveOnly, which still strips sampling params. + for model in [ + "claude-opus-4-6", + "claude-sonnet-4-6", + "claude-opus-4-5-20251101", + "claude-3-7-sonnet-20250219", + ] { + let fixture = Context::default(); + let actual = ModelSpecificReasoning::new(model).transform(fixture); + let expected = Context::default(); + assert_eq!(actual, expected, "model {}", model); + } + } + + #[test] + fn test_adaptive_friendly_preserves_non_xhigh_effort() { + for level in [Effort::Low, Effort::Medium, Effort::High, Effort::Max] { + let fixture = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(level.clone()), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-6").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(level.clone()), + exclude: None, + }); + + assert_eq!(actual, expected, "effort level {:?}", level); + } + } +} diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs index 8429aa11e0..236dc45b7e 100644 --- a/crates/forge_repo/src/provider/anthropic.rs +++ b/crates/forge_repo/src/provider/anthropic.rs @@ -7,8 +7,8 @@ use forge_app::domain::{ }; use forge_app::dto::anthropic::{ AuthSystemMessage, CapitalizeToolNames, DropInvalidToolUse, EnforceStrictObjectSchema, - EventData, ListModelResponse, ModelSpecificThinking, ReasoningTransform, RemoveOutputFormat, - Request, SanitizeToolIds, SetCache, ThinkingDisplay, + EventData, ListModelResponse, ReasoningTransform, RemoveOutputFormat, Request, SanitizeToolIds, + SetCache, }; use forge_app::{EnvironmentInfra, HttpInfra}; use forge_domain::{ChatRepository, Provider, ProviderId}; @@ -34,7 +34,7 @@ impl Anthropic { Self { http, provider, anthropic_version: version, use_oauth } } - fn get_headers(&self) -> Vec<(String, String)> { + fn get_headers(&self, model: Option<&ModelId>) -> Vec<(String, String)> { let mut headers = vec![( "anthropic-version".to_string(), self.anthropic_version.clone(), @@ -66,25 +66,36 @@ impl Anthropic { // Add beta flags (not needed for Vertex AI) if self.provider.id != ProviderId::VERTEX_AI_ANTHROPIC { + let mut betas: Vec<&'static str> = Vec::new(); if self.use_oauth { - // OAuth requires multiple beta flags including structured outputs - headers.push(( - "anthropic-beta".to_string(), - "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,structured-outputs-2025-11-13".to_string(), - )); - } else { - // API key auth also needs beta flags for structured outputs and thinking - headers.push(( - "anthropic-beta".to_string(), - "interleaved-thinking-2025-05-14,structured-outputs-2025-11-13".to_string(), - )); + betas.push("claude-code-20250219"); + betas.push("oauth-2025-04-20"); + } + // Adaptive thinking auto-enables interleaved thinking on Opus 4.7, + // Opus 4.6, and Sonnet 4.6 — the beta header is redundant there per + // the Opus 4.7 migration guide. Keep it for older models so manual + // `extended-thinking` requests still get interleaved turns. + if interleaved_thinking_required(model) { + betas.push("interleaved-thinking-2025-05-14"); } + betas.push("structured-outputs-2025-11-13"); + headers.push(("anthropic-beta".to_string(), betas.join(","))); } headers } } +/// Returns false when the model auto-enables interleaved thinking through +/// adaptive thinking (Opus 4.7, Opus 4.6, Sonnet 4.6). When the model is +/// unknown (e.g., listing endpoints), the flag is included because it is +/// harmless on non-chat endpoints and necessary on older chat models. +fn interleaved_thinking_required(model: Option<&ModelId>) -> bool { + let Some(model) = model else { return true }; + let id = model.as_str().to_lowercase(); + !(id.contains("opus-4-7") || id.contains("opus-4-6") || id.contains("sonnet-4-6")) +} + impl Anthropic { /// Determines whether this provider should bypass reqwest-eventsource /// content-type validation and parse SSE from raw bytes instead. @@ -101,20 +112,6 @@ impl Anthropic { // transform the context to match the request format let context = ReasoningTransform.transform(context); - // Read out of `context` before `Request::try_from` consumes it. - let display_preference = - context - .reasoning - .as_ref() - .and_then(|r| r.exclude) - .map(|exclude| { - if exclude { - ThinkingDisplay::Omitted - } else { - ThinkingDisplay::Summarized - } - }); - let mut request = Request::try_from(context)?.max_tokens(max_tokens as u64); // For Vertex AI Anthropic, model is in the URL path, not the request body @@ -124,16 +121,11 @@ impl Anthropic { request = request.model(model.as_str().to_string()); } - let mut thinking_transform = ModelSpecificThinking::new(model.as_str()); - if let Some(display) = display_preference { - thinking_transform = thinking_transform.display(display); - } let pipeline = AuthSystemMessage::default() .when(|_| self.use_oauth) .pipe(CapitalizeToolNames) .pipe(DropInvalidToolUse) - .pipe(SanitizeToolIds) - .pipe(thinking_transform); + .pipe(SanitizeToolIds); // Vertex AI does not support output_format, so we skip schema enforcement // and remove any output_format field @@ -164,7 +156,7 @@ impl Anthropic { serde_json::to_vec(&request).with_context(|| "Failed to serialize request")?; let parsed_url = Url::parse(&url).with_context(|| format!("Invalid URL: {}", url))?; - let headers = create_headers(self.get_headers()); + let headers = create_headers(self.get_headers(Some(model))); if self.should_use_raw_sse() { return self.chat_raw_sse(&parsed_url, headers, json_bytes).await; @@ -255,7 +247,7 @@ impl Anthropic { let response = self .http - .http_get(url, Some(create_headers(self.get_headers()))) + .http_get(url, Some(create_headers(self.get_headers(None)))) .await .with_context(|| format_http_context(None, "GET", url)) .with_context(|| "Failed to fetch models")?; @@ -682,7 +674,7 @@ mod tests { false, // API key auth (not OAuth) ); - let actual = fixture.get_headers(); + let actual = fixture.get_headers(None); // Should contain anthropic-version header assert!( @@ -710,9 +702,12 @@ mod tests { beta_value.contains("structured-outputs-2025-11-13"), "Beta header should include structured-outputs flag" ); + // When the model is unknown (e.g., model listing), keep the + // interleaved-thinking header since it is harmless on non-chat + // endpoints and still required for older chat models. assert!( beta_value.contains("interleaved-thinking-2025-05-14"), - "Beta header should include interleaved-thinking flag" + "Beta header should include interleaved-thinking flag when model is unknown" ); } @@ -761,7 +756,7 @@ mod tests { true, // OAuth auth ); - let actual = fixture.get_headers(); + let actual = fixture.get_headers(None); // Should contain anthropic-version header assert!( @@ -795,6 +790,115 @@ mod tests { ); } + #[test] + fn test_get_headers_drops_interleaved_thinking_for_4_6_plus_models() { + // Adaptive thinking auto-enables interleaved thinking on Opus 4.7, + // Opus 4.6, and Sonnet 4.6; the beta header is redundant there. + let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap(); + let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap(); + + let provider = Provider { + id: forge_app::domain::ProviderId::ANTHROPIC, + provider_type: forge_domain::ProviderType::Llm, + response: Some(forge_app::domain::ProviderResponse::Anthropic), + url: chat_url, + credential: Some(forge_domain::AuthCredential { + id: forge_app::domain::ProviderId::ANTHROPIC, + auth_details: forge_domain::AuthDetails::ApiKey(forge_domain::ApiKey::from( + "sk-test-key".to_string(), + )), + url_params: std::collections::HashMap::new(), + }), + auth_methods: vec![forge_domain::AuthMethod::ApiKey], + url_params: vec![], + models: Some(forge_domain::ModelSource::Url(model_url)), + custom_headers: None, + }; + + let fixture = Anthropic::new( + Arc::new(MockHttpClient::new()), + provider, + "2023-06-01".to_string(), + false, + ); + + for model_id in [ + "claude-opus-4-7", + "claude-opus-4-6", + "claude-sonnet-4-6", + "us.anthropic.claude-opus-4-7", + "global.anthropic.claude-sonnet-4-6", + ] { + let model = ModelId::new(model_id); + let actual = fixture.get_headers(Some(&model)); + let (_, beta_value) = actual + .iter() + .find(|(k, _)| k == "anthropic-beta") + .expect("anthropic-beta header should be present"); + assert!( + !beta_value.contains("interleaved-thinking-2025-05-14"), + "Beta header should NOT include interleaved-thinking flag for {} (auto-enabled by adaptive thinking)", + model_id + ); + assert!( + beta_value.contains("structured-outputs-2025-11-13"), + "structured-outputs flag must still be present for {}", + model_id + ); + } + } + + #[test] + fn test_get_headers_keeps_interleaved_thinking_for_pre_4_6_models() { + let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap(); + let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap(); + + let provider = Provider { + id: forge_app::domain::ProviderId::ANTHROPIC, + provider_type: forge_domain::ProviderType::Llm, + response: Some(forge_app::domain::ProviderResponse::Anthropic), + url: chat_url, + credential: Some(forge_domain::AuthCredential { + id: forge_app::domain::ProviderId::ANTHROPIC, + auth_details: forge_domain::AuthDetails::ApiKey(forge_domain::ApiKey::from( + "sk-test-key".to_string(), + )), + url_params: std::collections::HashMap::new(), + }), + auth_methods: vec![forge_domain::AuthMethod::ApiKey], + url_params: vec![], + models: Some(forge_domain::ModelSource::Url(model_url)), + custom_headers: None, + }; + + let fixture = Anthropic::new( + Arc::new(MockHttpClient::new()), + provider, + "2023-06-01".to_string(), + false, + ); + + for model_id in [ + "claude-opus-4-5-20251101", + "claude-sonnet-4-5-20250929", + "claude-haiku-4-5-20251001", + "claude-opus-4-1-20250805", + "claude-3-7-sonnet-20250219", + ] { + let model = ModelId::new(model_id); + let actual = fixture.get_headers(Some(&model)); + let (_, beta_value) = actual + .iter() + .find(|(k, _)| k == "anthropic-beta") + .expect("anthropic-beta header should be present"); + assert!( + beta_value.contains("interleaved-thinking-2025-05-14"), + "Beta header should include interleaved-thinking flag for pre-4.6 model {}", + model_id + ); + } + } + #[test] fn test_vertex_ai_removes_output_format() { use forge_domain::ResponseFormat; diff --git a/crates/forge_repo/src/provider/bedrock.rs b/crates/forge_repo/src/provider/bedrock.rs index c5e9653167..0bbe13dfe1 100644 --- a/crates/forge_repo/src/provider/bedrock.rs +++ b/crates/forge_repo/src/provider/bedrock.rs @@ -439,6 +439,20 @@ impl FromDomain use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; use aws_sdk_bedrockruntime::types::{InferenceConfiguration, Message, SystemContentBlock}; + // Capture reasoning-related flags before `context.messages` / other fields + // are consumed below. `ModelSpecificReasoning` runs earlier in the pipeline + // and has already normalized `reasoning` per model family, so here we just + // branch on the shape it produced: + // - `max_tokens.is_some()` -> legacy `thinking.enabled` budget shape + // - otherwise -> `thinking.adaptive` (Opus 4.7 / 4.6 / Sonnet 4.6) + let reasoning_on = context.is_reasoning_supported(); + let emits_legacy_thinking = reasoning_on + && context + .reasoning + .as_ref() + .and_then(|r| r.max_tokens) + .is_some(); + // Convert system messages let system: Vec = context .messages @@ -522,18 +536,17 @@ impl FromDomain }; // Convert inference configuration - // When extended thinking is enabled, top_p must be >= 0.95 or unset - let has_thinking = context - .reasoning - .as_ref() - .and_then(|r| r.enabled) - .unwrap_or(false); - let adjusted_top_p = if has_thinking { - // If thinking is enabled and top_p is set, ensure it's at least 0.95 + // When `thinking.enabled` (legacy budget shape) is being emitted below, + // Anthropic-on-Bedrock requires `top_p >= 0.95` or unset. `thinking.adaptive` + // (Opus 4.7 / Opus 4.6 / Sonnet 4.6) has no such constraint, and + // `ModelSpecificReasoning` already strips `top_p` entirely for Opus 4.7. + let adjusted_top_p = if emits_legacy_thinking { + // If legacy thinking is emitted and top_p is set, ensure it's at least 0.95 context.top_p.map(|p| { let value = p.value(); if value < 0.95 { - forge_domain::TopP::new(0.95).unwrap() + // SAFETY: 0.95 is a valid TopP value (between 0.0 and 1.0) + forge_domain::TopP::new(0.95).expect("0.95 is valid TopP") } else { p } @@ -558,29 +571,59 @@ impl FromDomain None }; - // Convert reasoning configuration to additional model request fields - // For Claude models with extended thinking support - // Based on AWS Bedrock docs: additionalModelRequestFields for Claude extended - // thinking https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html + // Convert reasoning configuration to `additional_model_request_fields` + // for Anthropic-on-Bedrock. Two thinking shapes are emitted based on + // `reasoning.max_tokens`, which `ModelSpecificReasoning` has already + // normalized per family: + // + // - `max_tokens: Some(N)` → `{type: "enabled", budget_tokens: N}` + // (Opus 4.5 and older; budget is backfilled to 10k when absent.) + // - `max_tokens: None` → `{type: "adaptive", display: ...}` + // (Opus 4.7 rejects the legacy shape with 400; Opus 4.6 / + // Sonnet 4.6 accept adaptive natively.) + // + // When present, `reasoning.effort` is emitted as `output_config.effort` + // for families that support it (`ModelSpecificReasoning` drops effort + // on LegacyNoEffort, so the Option is already correctly shaped here). + // + // AWS Bedrock passes `additional_model_request_fields` through verbatim + // to Anthropic for Claude models. See + // https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html let additional_model_fields = if let Some(reasoning_config) = &context.reasoning { - if reasoning_config.enabled.unwrap_or(false) { + if !reasoning_on { + None + } else { let mut thinking_config = std::collections::HashMap::new(); - thinking_config.insert( - "type".to_string(), - aws_smithy_types::Document::String("enabled".to_string()), - ); - - // Set budget_tokens (REQUIRED when thinking is enabled) - // The budget_tokens parameter determines the maximum number of tokens - // Claude is allowed to use for its internal reasoning process - // Default to 4000 if not specified (AWS recommendation for good quality) - let budget_tokens = reasoning_config.max_tokens.unwrap_or(4000); - thinking_config.insert( - "budget_tokens".to_string(), - aws_smithy_types::Document::Number(aws_smithy_types::Number::PosInt( - budget_tokens as u64, - )), - ); + if let Some(budget) = reasoning_config.max_tokens { + thinking_config.insert( + "type".to_string(), + aws_smithy_types::Document::String("enabled".to_string()), + ); + thinking_config.insert( + "budget_tokens".to_string(), + aws_smithy_types::Document::Number(aws_smithy_types::Number::PosInt( + budget as u64, + )), + ); + } else { + thinking_config.insert( + "type".to_string(), + aws_smithy_types::Document::String("adaptive".to_string()), + ); + // Opus 4.7 changed the default to `omitted`; preserve the + // caller's `exclude` preference so `exclude: true` stays + // `omitted` and every other case surfaces `summarized` + // (matching the legacy pre-4.7 visible-thinking behavior). + let display = if reasoning_config.exclude == Some(true) { + "omitted" + } else { + "summarized" + }; + thinking_config.insert( + "display".to_string(), + aws_smithy_types::Document::String(display.to_string()), + ); + } let mut fields = std::collections::HashMap::new(); fields.insert( @@ -588,9 +631,29 @@ impl FromDomain aws_smithy_types::Document::Object(thinking_config), ); + if let Some(effort) = reasoning_config.effort.as_ref() { + let effort_str = match effort { + forge_domain::Effort::None => None, + forge_domain::Effort::Minimal | forge_domain::Effort::Low => Some("low"), + forge_domain::Effort::Medium => Some("medium"), + forge_domain::Effort::High => Some("high"), + forge_domain::Effort::XHigh => Some("xhigh"), + forge_domain::Effort::Max => Some("max"), + }; + if let Some(effort_str) = effort_str { + let mut output_config = std::collections::HashMap::new(); + output_config.insert( + "effort".to_string(), + aws_smithy_types::Document::String(effort_str.to_string()), + ); + fields.insert( + "output_config".to_string(), + aws_smithy_types::Document::Object(output_config), + ); + } + } + Some(aws_smithy_types::Document::Object(fields)) - } else { - None } } else { None @@ -1788,6 +1851,183 @@ mod tests { assert!(actual.additional_model_request_fields().is_some()); } + /// Opus 4.7 / Opus 4.6 / Sonnet 4.6 path: `ModelSpecificReasoning` strips + /// `max_tokens`, so Bedrock emits `thinking.adaptive` with the legacy + /// `display: summarized` default (visible thinking). + #[test] + fn test_from_domain_context_emits_adaptive_thinking_when_max_tokens_absent() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, ReasoningConfig}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: None, + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: None, + max_tokens: None, // normalized away by ModelSpecificReasoning for 4.7/4.6 + exclude: None, + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let fields = actual + .additional_model_request_fields() + .expect("adaptive thinking should emit additional_model_request_fields"); + + let thinking = match fields { + aws_smithy_types::Document::Object(m) => m.get("thinking").expect("thinking present"), + _ => panic!("expected object"), + }; + let thinking_map = match thinking { + aws_smithy_types::Document::Object(m) => m, + _ => panic!("expected thinking object"), + }; + assert_eq!( + thinking_map.get("type"), + Some(&aws_smithy_types::Document::String("adaptive".to_string())) + ); + assert_eq!( + thinking_map.get("display"), + Some(&aws_smithy_types::Document::String( + "summarized".to_string() + )) + ); + assert!( + thinking_map.get("budget_tokens").is_none(), + "adaptive must not carry budget_tokens" + ); + } + + /// `exclude: true` preference maps to `display: omitted` on the adaptive shape. + #[test] + fn test_from_domain_context_adaptive_thinking_respects_exclude() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, ReasoningConfig}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: None, + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: None, + max_tokens: None, + exclude: Some(true), + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let fields = actual.additional_model_request_fields().unwrap(); + let thinking = match fields { + aws_smithy_types::Document::Object(m) => m.get("thinking").unwrap(), + _ => panic!("expected object"), + }; + let thinking_map = match thinking { + aws_smithy_types::Document::Object(m) => m, + _ => panic!("expected thinking object"), + }; + assert_eq!( + thinking_map.get("display"), + Some(&aws_smithy_types::Document::String("omitted".to_string())) + ); + } + + /// Adaptive thinking must NOT trigger the legacy `top_p >= 0.95` clamp — + /// that constraint only applies to `thinking.enabled` (budget shape). + #[test] + fn test_from_domain_context_adaptive_thinking_does_not_clamp_top_p() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, ReasoningConfig, TopP}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: Some(TopP::new(0.5).unwrap()), + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: None, + max_tokens: None, + exclude: None, + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let top_p = actual.inference_config().unwrap().top_p().unwrap(); + assert!( + (top_p - 0.5).abs() < f32::EPSILON, + "adaptive thinking must leave top_p untouched, got {top_p}" + ); + } + + /// When `reasoning.effort` survives normalization (i.e. 4.5+/4.6+/4.7 + /// families), it must be emitted as `output_config.effort`. + #[test] + fn test_from_domain_context_emits_output_config_effort() { + use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; + use forge_domain::{Context, Effort, ReasoningConfig}; + + let fixture = Context { + conversation_id: None, + initiator: None, + messages: vec![], + tools: vec![], + tool_choice: None, + temperature: None, + top_p: None, + top_k: None, + max_tokens: None, + reasoning: Some(ReasoningConfig { + effort: Some(Effort::High), + max_tokens: None, + exclude: None, + enabled: Some(true), + }), + stream: None, + response_format: None, + }; + + let actual = ConverseStreamInput::from_domain(fixture).unwrap(); + let fields = actual.additional_model_request_fields().unwrap(); + let output_config = match fields { + aws_smithy_types::Document::Object(m) => m.get("output_config").unwrap(), + _ => panic!("expected object"), + }; + let output_map = match output_config { + aws_smithy_types::Document::Object(m) => m, + _ => panic!("expected output_config object"), + }; + assert_eq!( + output_map.get("effort"), + Some(&aws_smithy_types::Document::String("high".to_string())) + ); + } + #[test] fn test_json_value_to_document_empty_object() { let fixture = serde_json::json!({}); From ecceb066a531c09952c031f450528d1c7770bce4 Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Sat, 18 Apr 2026 21:45:07 +0530 Subject: [PATCH 4/5] chore: apply cargo fmt fixes Co-Authored-By: ForgeCode --- .../forge_app/src/transformers/model_specific_reasoning.rs | 6 ++---- crates/forge_main/src/info.rs | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/forge_app/src/transformers/model_specific_reasoning.rs b/crates/forge_app/src/transformers/model_specific_reasoning.rs index d3d1788b54..7c39c2228c 100644 --- a/crates/forge_app/src/transformers/model_specific_reasoning.rs +++ b/crates/forge_app/src/transformers/model_specific_reasoning.rs @@ -237,8 +237,7 @@ mod tests { exclude: None, }); - let actual = - ModelSpecificReasoning::new("us.anthropic.claude-opus-4-7").transform(fixture); + let actual = ModelSpecificReasoning::new("us.anthropic.claude-opus-4-7").transform(fixture); let expected = Context::default().reasoning(ReasoningConfig { enabled: Some(true), @@ -304,8 +303,7 @@ mod tests { exclude: None, }); - let actual = - ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); + let actual = ModelSpecificReasoning::new("claude-opus-4-5-20251101").transform(fixture); let expected = Context::default().reasoning(ReasoningConfig { enabled: Some(true), diff --git a/crates/forge_main/src/info.rs b/crates/forge_main/src/info.rs index b0815a8799..074e8e9711 100644 --- a/crates/forge_main/src/info.rs +++ b/crates/forge_main/src/info.rs @@ -75,7 +75,7 @@ impl Section { /// # Output Format /// /// ```text -/// +/// /// CONFIGURATION /// model gpt-4 /// provider openai From 47c363635274c79b6f7f1d731bdcaf25f5103009 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Sat, 18 Apr 2026 16:17:56 +0000 Subject: [PATCH 5/5] [autofix.ci] apply automated fixes --- crates/forge_main/src/info.rs | 2 +- crates/forge_repo/src/provider/bedrock.rs | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/crates/forge_main/src/info.rs b/crates/forge_main/src/info.rs index 074e8e9711..b0815a8799 100644 --- a/crates/forge_main/src/info.rs +++ b/crates/forge_main/src/info.rs @@ -75,7 +75,7 @@ impl Section { /// # Output Format /// /// ```text -/// +/// /// CONFIGURATION /// model gpt-4 /// provider openai diff --git a/crates/forge_repo/src/provider/bedrock.rs b/crates/forge_repo/src/provider/bedrock.rs index 0bbe13dfe1..1901044f77 100644 --- a/crates/forge_repo/src/provider/bedrock.rs +++ b/crates/forge_repo/src/provider/bedrock.rs @@ -576,11 +576,11 @@ impl FromDomain // `reasoning.max_tokens`, which `ModelSpecificReasoning` has already // normalized per family: // - // - `max_tokens: Some(N)` → `{type: "enabled", budget_tokens: N}` - // (Opus 4.5 and older; budget is backfilled to 10k when absent.) - // - `max_tokens: None` → `{type: "adaptive", display: ...}` - // (Opus 4.7 rejects the legacy shape with 400; Opus 4.6 / - // Sonnet 4.6 accept adaptive natively.) + // - `max_tokens: Some(N)` → `{type: "enabled", budget_tokens: N}` (Opus 4.5 + // and older; budget is backfilled to 10k when absent.) + // - `max_tokens: None` → `{type: "adaptive", display: ...}` (Opus 4.7 + // rejects the legacy shape with 400; Opus 4.6 / Sonnet 4.6 accept adaptive + // natively.) // // When present, `reasoning.effort` is emitted as `output_config.effort` // for families that support it (`ModelSpecificReasoning` drops effort @@ -1908,7 +1908,8 @@ mod tests { ); } - /// `exclude: true` preference maps to `display: omitted` on the adaptive shape. + /// `exclude: true` preference maps to `display: omitted` on the adaptive + /// shape. #[test] fn test_from_domain_context_adaptive_thinking_respects_exclude() { use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput;