Skip to content

Commit b38ea33

Browse files
committed
feat(meai): Add prompt caching support via WithCacheControl extension
Add AIContentCacheExtensions with WithCacheControl() methods that allow setting Anthropic prompt caching on AIContent instances via AdditionalProperties. Changes: - New AIContentCacheExtensions.cs with WithCacheControl(CacheControlEphemeral) and WithCacheControl(TTL) extension methods - Update CreateMessageParams in both AnthropicClientExtensions and AnthropicBetaClientExtensions to apply cache control when mapping AIContent to Anthropic content block params (TextBlockParam, ImageBlockParam, DocumentBlockParam, ToolUseBlockParam, ToolResultBlockParam) - Fix system message handling in both clients to support cache control - Add unit tests for extension methods and request serialization The Beta client converts from CacheControlEphemeral to BetaCacheControlEphemeral internally, so users can use the same extension methods with both clients. Usage: ```csharp var systemContent = new TextContent(prompt).WithCacheControl(TTL.TTL1h); var lastContent = messages[^1].Contents.Last().WithCacheControl(TTL.TTL5m); ``` Note: ThinkingBlockParam/BetaThinkingBlockParam and RedactedThinkingBlockParam/ BetaRedactedThinkingBlockParam do not support cache control in the Anthropic API.
1 parent a4a5882 commit b38ea33

File tree

4 files changed

+614
-142
lines changed

4 files changed

+614
-142
lines changed

src/Anthropic.Tests/AnthropicClientExtensionsTestsBase.cs

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4292,6 +4292,221 @@ public async Task GetResponseAsync_WithFunctionResultContent_UriContent_PDF()
42924292
Assert.NotNull(response);
42934293
}
42944294

4295+
[Fact]
4296+
public void WithCacheControl_SetsAdditionalProperty()
4297+
{
4298+
var content = new TextContent("Hello, world!");
4299+
4300+
content.WithCacheControl(Anthropic.Models.Messages.TTL.TTL5m);
4301+
4302+
Assert.NotNull(content.AdditionalProperties);
4303+
var cacheControl = content.GetCacheControl();
4304+
Assert.NotNull(cacheControl);
4305+
Assert.True(cacheControl.TTL == Anthropic.Models.Messages.TTL.TTL5m);
4306+
}
4307+
4308+
[Fact]
4309+
public void WithCacheControl_CacheControlEphemeral_SetsAdditionalProperty()
4310+
{
4311+
var content = new TextContent("Hello, world!");
4312+
var cacheControl = new Anthropic.Models.Messages.CacheControlEphemeral
4313+
{
4314+
TTL = Anthropic.Models.Messages.TTL.TTL1h,
4315+
};
4316+
4317+
content.WithCacheControl(cacheControl);
4318+
4319+
var retrieved = content.GetCacheControl();
4320+
Assert.NotNull(retrieved);
4321+
Assert.True(retrieved.TTL == Anthropic.Models.Messages.TTL.TTL1h);
4322+
}
4323+
4324+
[Fact]
4325+
public void WithCacheControl_Null_RemovesCacheControl()
4326+
{
4327+
var content = new TextContent("Hello, world!");
4328+
content.WithCacheControl(Anthropic.Models.Messages.TTL.TTL5m);
4329+
4330+
Assert.NotNull(content.GetCacheControl());
4331+
4332+
content.WithCacheControl((Anthropic.Models.Messages.CacheControlEphemeral?)null);
4333+
4334+
Assert.Null(content.GetCacheControl());
4335+
}
4336+
4337+
[Fact]
4338+
public async Task GetResponseAsync_WithCacheControlOnSystemMessage()
4339+
{
4340+
VerbatimHttpHandler handler = new(
4341+
expectedRequest: """
4342+
{
4343+
"model": "claude-haiku-4-5",
4344+
"messages": [{
4345+
"role": "user",
4346+
"content": [{
4347+
"type": "text",
4348+
"text": "Hello"
4349+
}]
4350+
}],
4351+
"max_tokens": 1024,
4352+
"system": [{
4353+
"type": "text",
4354+
"text": "You are a helpful assistant.",
4355+
"cache_control": {
4356+
"type": "ephemeral",
4357+
"ttl": "1h"
4358+
}
4359+
}]
4360+
}
4361+
""",
4362+
actualResponse: """
4363+
{
4364+
"id": "msg_cache_01",
4365+
"type": "message",
4366+
"role": "assistant",
4367+
"model": "claude-haiku-4-5",
4368+
"content": [{
4369+
"type": "text",
4370+
"text": "Hello!"
4371+
}],
4372+
"stop_reason": "end_turn",
4373+
"usage": {
4374+
"input_tokens": 10,
4375+
"output_tokens": 5
4376+
}
4377+
}
4378+
"""
4379+
);
4380+
4381+
IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
4382+
4383+
var systemContent = new TextContent("You are a helpful assistant.")
4384+
.WithCacheControl(Anthropic.Models.Messages.TTL.TTL1h);
4385+
4386+
List<ChatMessage> messages =
4387+
[
4388+
new(ChatRole.System, [systemContent]),
4389+
new(ChatRole.User, "Hello"),
4390+
];
4391+
4392+
ChatResponse response = await chatClient.GetResponseAsync(messages);
4393+
Assert.NotNull(response);
4394+
}
4395+
4396+
[Fact]
4397+
public async Task GetResponseAsync_WithCacheControlOnUserMessage()
4398+
{
4399+
VerbatimHttpHandler handler = new(
4400+
expectedRequest: """
4401+
{
4402+
"model": "claude-haiku-4-5",
4403+
"messages": [{
4404+
"role": "user",
4405+
"content": [{
4406+
"type": "text",
4407+
"text": "What is the meaning of life?",
4408+
"cache_control": {
4409+
"type": "ephemeral",
4410+
"ttl": "5m"
4411+
}
4412+
}]
4413+
}],
4414+
"max_tokens": 1024
4415+
}
4416+
""",
4417+
actualResponse: """
4418+
{
4419+
"id": "msg_cache_02",
4420+
"type": "message",
4421+
"role": "assistant",
4422+
"model": "claude-haiku-4-5",
4423+
"content": [{
4424+
"type": "text",
4425+
"text": "42"
4426+
}],
4427+
"stop_reason": "end_turn",
4428+
"usage": {
4429+
"input_tokens": 15,
4430+
"output_tokens": 3
4431+
}
4432+
}
4433+
"""
4434+
);
4435+
4436+
IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
4437+
4438+
var userContent = new TextContent("What is the meaning of life?")
4439+
.WithCacheControl(Anthropic.Models.Messages.TTL.TTL5m);
4440+
4441+
List<ChatMessage> messages = [new(ChatRole.User, [userContent])];
4442+
4443+
ChatResponse response = await chatClient.GetResponseAsync(messages);
4444+
Assert.NotNull(response);
4445+
}
4446+
4447+
[Fact]
4448+
public async Task GetResponseAsync_WithCacheControlOnImage()
4449+
{
4450+
VerbatimHttpHandler handler = new(
4451+
expectedRequest: """
4452+
{
4453+
"model": "claude-haiku-4-5",
4454+
"messages": [{
4455+
"role": "user",
4456+
"content": [{
4457+
"type": "image",
4458+
"source": {
4459+
"type": "base64",
4460+
"media_type": "image/png",
4461+
"data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
4462+
},
4463+
"cache_control": {
4464+
"type": "ephemeral",
4465+
"ttl": "1h"
4466+
}
4467+
}, {
4468+
"type": "text",
4469+
"text": "What do you see?"
4470+
}]
4471+
}],
4472+
"max_tokens": 1024
4473+
}
4474+
""",
4475+
actualResponse: """
4476+
{
4477+
"id": "msg_cache_03",
4478+
"type": "message",
4479+
"role": "assistant",
4480+
"model": "claude-haiku-4-5",
4481+
"content": [{
4482+
"type": "text",
4483+
"text": "I see a small image."
4484+
}],
4485+
"stop_reason": "end_turn",
4486+
"usage": {
4487+
"input_tokens": 100,
4488+
"output_tokens": 10
4489+
}
4490+
}
4491+
"""
4492+
);
4493+
4494+
IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
4495+
4496+
var imageContent = new DataContent(
4497+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==",
4498+
"image/png"
4499+
).WithCacheControl(Anthropic.Models.Messages.TTL.TTL1h);
4500+
4501+
List<ChatMessage> messages =
4502+
[
4503+
new(ChatRole.User, [imageContent, new TextContent("What do you see?")]),
4504+
];
4505+
4506+
ChatResponse response = await chatClient.GetResponseAsync(messages);
4507+
Assert.NotNull(response);
4508+
}
4509+
42954510
protected sealed class VerbatimHttpHandler(string expectedRequest, string actualResponse)
42964511
: HttpMessageHandler
42974512
{
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
using Anthropic.Models.Messages;
2+
3+
#pragma warning disable IDE0130 // Namespace does not match folder structure
4+
5+
namespace Microsoft.Extensions.AI;
6+
7+
/// <summary>
8+
/// Extension methods for configuring Anthropic prompt caching on <see cref="AIContent"/> instances.
9+
/// </summary>
10+
/// <remarks>
11+
/// <para>
12+
/// Prompt caching allows you to cache frequently used context between API calls, reducing latency
13+
/// and costs for repetitive workloads. Cache breakpoints are placed at the END of content blocks
14+
/// that have cache control set.
15+
/// </para>
16+
/// <para>
17+
/// These extensions are only effective when used with the <see cref="IChatClient"/> returned by
18+
/// <see cref="AnthropicClientExtensions.AsIChatClient"/>. Other implementations will ignore the cache control settings.
19+
/// </para>
20+
/// </remarks>
21+
public static class AIContentCacheExtensions
22+
{
23+
private const string CacheControlKey = "anthropic:cache_control";
24+
25+
/// <summary>
26+
/// Configures Anthropic prompt caching on this content block.
27+
/// </summary>
28+
/// <typeparam name="T">The type of <see cref="AIContent"/>.</typeparam>
29+
/// <param name="content">The content to configure caching for.</param>
30+
/// <param name="cacheControl">
31+
/// The cache control configuration. Pass <see langword="null"/> to remove any existing cache control.
32+
/// </param>
33+
/// <returns>The same <paramref name="content"/> instance for method chaining.</returns>
34+
/// <remarks>
35+
/// <para>
36+
/// The cache breakpoint is placed at the END of this content block. All content up to and including
37+
/// this block will be cached together.
38+
/// </para>
39+
/// <para>
40+
/// For optimal caching in agentic loops, place cache breakpoints on:
41+
/// <list type="bullet">
42+
/// <item>System prompts (use <see cref="TTL.TTL1h"/> for stable prompts)</item>
43+
/// <item>The last content block before the current turn (use <see cref="TTL.TTL5m"/>)</item>
44+
/// <item>Large tool results that won't change</item>
45+
/// </list>
46+
/// </para>
47+
/// </remarks>
48+
/// <example>
49+
/// <code>
50+
/// var systemContent = new TextContent(systemPrompt).WithCacheControl(new CacheControlEphemeral { TTL = TTL.TTL1h });
51+
/// chatMessages.Add(new ChatMessage(ChatRole.System, [systemContent]));
52+
/// </code>
53+
/// </example>
54+
public static T WithCacheControl<T>(this T content, CacheControlEphemeral? cacheControl)
55+
where T : AIContent
56+
{
57+
if (cacheControl is null)
58+
{
59+
content.AdditionalProperties?.Remove(CacheControlKey);
60+
}
61+
else
62+
{
63+
(content.AdditionalProperties ??= [])[CacheControlKey] = cacheControl;
64+
}
65+
66+
return content;
67+
}
68+
69+
/// <summary>
70+
/// Configures Anthropic prompt caching on this content block with the specified TTL.
71+
/// </summary>
72+
/// <typeparam name="T">The type of <see cref="AIContent"/>.</typeparam>
73+
/// <param name="content">The content to configure caching for.</param>
74+
/// <param name="ttl">
75+
/// The time-to-live for the cache. Use <see cref="TTL.TTL5m"/> (5 minutes) for dynamic content
76+
/// or <see cref="TTL.TTL1h"/> (1 hour) for stable content like system prompts.
77+
/// Pass <see langword="null"/> for the default TTL (5 minutes).
78+
/// </param>
79+
/// <returns>The same <paramref name="content"/> instance for method chaining.</returns>
80+
/// <example>
81+
/// <code>
82+
/// // Cache system prompt for 1 hour
83+
/// var systemContent = new TextContent(systemPrompt).WithCacheControl(TTL.TTL1h);
84+
///
85+
/// // Cache conversation context for 5 minutes (default)
86+
/// var lastMessage = messages[^1].Contents.Last();
87+
/// lastMessage.WithCacheControl(TTL.TTL5m);
88+
/// </code>
89+
/// </example>
90+
public static T WithCacheControl<T>(this T content, TTL? ttl)
91+
where T : AIContent => content.WithCacheControl(new CacheControlEphemeral { TTL = ttl });
92+
93+
/// <summary>
94+
/// Gets the cache control configuration for this content block, if any.
95+
/// </summary>
96+
/// <param name="content">The content to check.</param>
97+
/// <returns>
98+
/// The <see cref="CacheControlEphemeral"/> if configured, or <see langword="null"/> if no cache control is set.
99+
/// </returns>
100+
internal static CacheControlEphemeral? GetCacheControl(this AIContent content) =>
101+
content.AdditionalProperties?.TryGetValue(CacheControlKey, out var cc) == true
102+
? cc as CacheControlEphemeral
103+
: null;
104+
}

0 commit comments

Comments
 (0)