Skip to content
Draft

Meai #4376

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
</Choose>

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="9.9.1" />
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="10.4.0" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
</Choose>

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="9.9.1" />
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="10.4.0" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
<group targetFramework="net472">
<dependency id="AWSSDK.Core" version="4.0.3.17" />
<dependency id="AWSSDK.BedrockRuntime" version="4.0.16.3" />
<dependency id="Microsoft.Extensions.AI.Abstractions" version="9.9.1" />
<dependency id="Microsoft.Extensions.AI.Abstractions" version="10.4.0" />
</group>
<group targetFramework="netstandard2.0">
<dependency id="AWSSDK.Core" version="4.0.3.17" />
<dependency id="AWSSDK.BedrockRuntime" version="4.0.16.3" />
<dependency id="Microsoft.Extensions.AI.Abstractions" version="9.9.1" />
<dependency id="Microsoft.Extensions.AI.Abstractions" version="10.4.0" />
</group>
<group targetFramework="net8.0">
<dependency id="AWSSDK.Core" version="4.0.3.17" />
<dependency id="AWSSDK.BedrockRuntime" version="4.0.16.3" />
<dependency id="Microsoft.Extensions.AI.Abstractions" version="9.9.1" />
<dependency id="Microsoft.Extensions.AI.Abstractions" version="10.4.0" />
</group>
</dependencies>
</metadata>
Expand Down
165 changes: 142 additions & 23 deletions extensions/src/AWSSDK.Extensions.Bedrock.MEAI/BedrockChatClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,18 @@ public void Dispose()

/// <inheritdoc />
/// <remarks>
/// <para>
/// When <see cref="ChatOptions.ResponseFormat"/> is specified, the model must support
/// the ToolChoice feature. Models without this support will return an error from the Bedrock API
/// (typically <see cref="Amazon.BedrockRuntime.AmazonBedrockRuntimeException"/> with ErrorCode "ValidationException").
/// If the model fails to return the expected structured output, <see cref="InvalidOperationException"/>
/// is thrown.
/// </para>
/// <para>
/// When <see cref="ChatOptions.Reasoning"/> is specified with a non-<see cref="ReasoningEffort.None"/> effort,
/// the model must support extended thinking (e.g. Anthropic Claude). Models without this support will return
/// an error from the Bedrock API.
/// </para>
/// </remarks>
public async Task<ChatResponse> GetResponseAsync(
IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default)
Expand All @@ -90,6 +97,7 @@ public async Task<ChatResponse> GetResponseAsync(
request.System = CreateSystem(request.System, messages, options);
request.ToolConfig = CreateToolConfig(request.ToolConfig, options);
request.InferenceConfig = CreateInferenceConfiguration(request.InferenceConfig, options);
request.AdditionalModelRequestFields = ApplyReasoningConfig(request.AdditionalModelRequestFields, request.InferenceConfig, options);

ConverseResponse response = await _runtime.ConverseAsync(request, cancellationToken).ConfigureAwait(false);

Expand Down Expand Up @@ -162,8 +170,9 @@ public async Task<ChatResponse> GetResponseAsync(
TextContent tc = new(citations.Content[i]?.Text) { RawRepresentation = citations.Content[i] };
tc.Annotations = [new CitationAnnotation()
{
Snippet = citations.Citations[i].SourceContent?.Select(c => c.Text).FirstOrDefault() ?? citations.Citations[i].Source,
Title = citations.Citations[i].Title,
Snippet = citations.Citations[i].SourceContent?.Select(c => c.Text).FirstOrDefault(),
Url = Uri.TryCreate(citations.Citations[i].Location?.Web?.Url, UriKind.Absolute, out Uri? uri) ? uri : null,
}];
result.Contents.Add(tc);
}
Expand Down Expand Up @@ -228,6 +237,13 @@ public async Task<ChatResponse> GetResponseAsync(
}

/// <inheritdoc />
/// <remarks>
/// <para>
/// When <see cref="ChatOptions.Reasoning"/> is specified with a non-<see cref="ReasoningEffort.None"/> effort,
/// the model must support extended thinking (e.g. Anthropic Claude). Models without this support will return
/// an error from the Bedrock API.
/// </para>
/// </remarks>
public async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
IEnumerable<ChatMessage> messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
Expand Down Expand Up @@ -256,6 +272,7 @@ public async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
request.System = CreateSystem(request.System, messages, options);
request.ToolConfig = CreateToolConfig(request.ToolConfig, options);
request.InferenceConfig = CreateInferenceConfiguration(request.InferenceConfig, options);
request.AdditionalModelRequestFields = ApplyReasoningConfig(request.AdditionalModelRequestFields, request.InferenceConfig, options);

var result = await _runtime.ConverseStreamAsync(request, cancellationToken).ConfigureAwait(false);

Expand Down Expand Up @@ -422,15 +439,11 @@ private static UsageDetails CreateUsageDetails(TokenUsage usage)
UsageDetails ud = new()
{
InputTokenCount = usage.InputTokens,
CachedInputTokenCount = usage.CacheReadInputTokens,
OutputTokenCount = usage.OutputTokens,
TotalTokenCount = usage.TotalTokens,
};

if (usage.CacheReadInputTokens is int cacheReadTokens)
{
(ud.AdditionalCounts ??= []).Add(nameof(usage.CacheReadInputTokens), cacheReadTokens);
}

if (usage.CacheWriteInputTokens is int cacheWriteTokens)
{
(ud.AdditionalCounts ??= []).Add(nameof(usage.CacheWriteInputTokens), cacheWriteTokens);
Expand Down Expand Up @@ -465,8 +478,7 @@ private static List<SystemContentBlock> CreateSystem(List<SystemContentBlock>? r
});
}

foreach (var message in messages
.Where(m => m.Role == ChatRole.System && m.Contents.Any(c => c is TextContent)))
foreach (var message in messages.Where(m => m.Role == ChatRole.System && m.Contents.Any(c => c is TextContent)))
{
system.Add(new SystemContentBlock()
{
Expand Down Expand Up @@ -567,6 +579,10 @@ private static List<ContentBlock> CreateContents(ChatMessage message)
{
switch (content)
{
case AIContent when content.RawRepresentation is ContentBlock cb:
contents.Add(cb);
break;

case TextContent tc:
if (message.Role == ChatRole.Assistant)
{
Expand Down Expand Up @@ -649,32 +665,54 @@ private static List<ContentBlock> CreateContents(ChatMessage message)
break;

case FunctionResultContent frc:
Document result = frc.Result switch
{
int i => i,
long l => l,
float f => f,
double d => d,
string s => s,
bool b => b,
JsonElement json => ToDocument(json),
{ } other => ToDocument(JsonSerializer.SerializeToElement(other, BedrockJsonContext.DefaultOptions.GetTypeInfo(other.GetType()))),
_ => default,
};

contents.Add(new()
{
ToolResult = new()
{
ToolUseId = frc.CallId,
Content = [new() { Json = new Document(new Dictionary<string, Document>() { ["result"] = result }) }],
Content = ToToolResultContentBlocks(frc.Result),
},
});
break;
}

static List<ToolResultContentBlock> ToToolResultContentBlocks(object? result) =>
result switch
{
AIContent aic => [ToolResultContentBlockFromAIContent(aic)],
IEnumerable<AIContent> aics => [.. aics.Select(ToolResultContentBlockFromAIContent)],
string s => [new () { Text = s }],
_ => [new()
{
Json = new Document(new Dictionary<string, Document>()
{
["result"] = result switch
{
int i => i,
long l => l,
float f => f,
double d => d,
bool b => b,
JsonElement json => ToDocument(json),
{ } other => ToDocument(JsonSerializer.SerializeToElement(other, BedrockJsonContext.DefaultOptions.GetTypeInfo(other.GetType()))),
_ => default,
}
})
}],
};

static ToolResultContentBlock ToolResultContentBlockFromAIContent(AIContent aic) =>
aic switch
{
TextContent tc => new() { Text = tc.Text },
TextReasoningContent trc => new() { Text = trc.Text },
DataContent dc when GetImageFormat(dc.MediaType) is { } imageFormat => new() { Image = new() { Source = new() { Bytes = new(dc.Data.ToArray()) }, Format = imageFormat } },
DataContent dc when GetVideoFormat(dc.MediaType) is { } videoFormat => new() { Video = new() { Source = new() { Bytes = new(dc.Data.ToArray()) }, Format = videoFormat } },
DataContent dc when GetDocumentFormat(dc.MediaType) is { } docFormat => new() { Document = new() { Source = new() { Bytes = new(dc.Data.ToArray()) }, Format = docFormat, Name = dc.Name ?? "file" } },
_ => ToToolResultContentBlocks(JsonSerializer.SerializeToElement(aic, BedrockJsonContext.DefaultOptions.GetTypeInfo(typeof(object)))).First(),
};

if (content.AdditionalProperties?.TryGetValue(nameof(ContentBlock.CachePoint), out var maybeCachePoint) == true)
if (content.AdditionalProperties?.TryGetValue(nameof(ContentBlock.CachePoint), out var maybeCachePoint) is true)
{
if (maybeCachePoint is CachePointBlock cachePointBlock)
{
Expand Down Expand Up @@ -1098,4 +1136,85 @@ private static InferenceConfiguration CreateInferenceConfiguration(InferenceConf

return config;
}

/// <summary>Applies reasoning configuration from ChatOptions to the AdditionalModelRequestFields.</summary>
/// <remarks>
/// Maps <see cref="ChatOptions.Reasoning"/> to Bedrock's extended thinking configuration
/// via the <c>thinking</c> key in <c>AdditionalModelRequestFields</c>.
/// Budget tokens are computed as a ratio of <c>MaxTokens</c> when available, following the
/// approach used by the AWS bedrock-access-gateway. The constraint <c>budget_tokens &lt; max_tokens</c>
/// is always enforced.
/// See https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html
/// </remarks>
private static Document ApplyReasoningConfig(Document additionalModelRequestFields, InferenceConfiguration inferenceConfig, ChatOptions? options)
{
// If Effort is not set or is None, there's nothing to configure.
if (options?.Reasoning is not { } reasoning ||
reasoning.Effort is not { } effort ||
effort is ReasoningEffort.None)
{
return additionalModelRequestFields;
}

// Don't override if the user already configured thinking via AdditionalModelRequestFields.
if (additionalModelRequestFields.IsDictionary() &&
additionalModelRequestFields.AsDictionary().ContainsKey("thinking"))
{
return additionalModelRequestFields;
}

// budget_tokens must be >= 1024 and < max_tokens.
// When max_tokens is known, compute budget_tokens as a ratio (similar to
// https://github.com/aws-samples/bedrock-access-gateway). When it isn't,
// pick fixed budget values and set max_tokens to satisfy the constraint,
// since the model-specific default for max_tokens is unspecified.
int budgetTokens;
if (inferenceConfig.MaxTokens is int maxTokens)
{
double ratio = effort switch
{
ReasoningEffort.Low => 0.25,
ReasoningEffort.Medium => 0.5,
ReasoningEffort.High => 0.75,
_ => 1.0, // ExtraHigh
};

budgetTokens = Math.Max(1024, (int)(maxTokens * ratio));
if (budgetTokens >= maxTokens)
{
budgetTokens = maxTokens - 1;
}
}
else
{
budgetTokens = effort switch
{
ReasoningEffort.Low => 1024,
ReasoningEffort.Medium => 8192,
ReasoningEffort.High => 16384,
_ => 32768, // ExtraHigh
};
inferenceConfig.MaxTokens = budgetTokens * 4;
}

Document thinkingConfig = new(new Dictionary<string, Document>
{
["type"] = new("enabled"),
["budget_tokens"] = new(budgetTokens),
});

if (additionalModelRequestFields.IsDictionary())
{
additionalModelRequestFields.AsDictionary()["thinking"] = thinkingConfig;
}
else
{
additionalModelRequestFields = new(new Dictionary<string, Document>
{
["thinking"] = thinkingConfig,
});
}

return additionalModelRequestFields;
}
}
Loading
Loading