Skip to content

Commit 80b1ad2

Browse files
Copilotstephentoubericstj
authored
Use ReadOnlyMemory<byte> for binary data to eliminate UTF-16 transcoding (#1070)
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> Co-authored-by: ericstj <8918108+ericstj@users.noreply.github.com> Co-authored-by: Eric StJohn <ericstj@microsoft.com>
1 parent a3ce9e9 commit 80b1ad2

32 files changed

+400
-103
lines changed

samples/EverythingServer/Resources/SimpleResourceType.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using ModelContextProtocol.Protocol;
22
using ModelContextProtocol.Server;
33
using System.ComponentModel;
4+
using System.Text;
45

56
namespace EverythingServer.Resources;
67

@@ -31,7 +32,7 @@ public static ResourceContents TemplateResource(RequestContext<ReadResourceReque
3132
} :
3233
new BlobResourceContents
3334
{
34-
Blob = resource.Description!,
35+
Blob = Encoding.UTF8.GetBytes(resource.Description!),
3536
MimeType = resource.MimeType,
3637
Uri = resource.Uri,
3738
};

samples/EverythingServer/Tools/AnnotatedMessageTool.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using ModelContextProtocol.Protocol;
22
using ModelContextProtocol.Server;
33
using System.ComponentModel;
4+
using System.Text;
45

56
namespace EverythingServer.Tools;
67

@@ -41,7 +42,7 @@ public static IEnumerable<ContentBlock> AnnotatedMessage(MessageType messageType
4142
{
4243
contents.Add(new ImageContentBlock
4344
{
44-
Data = TinyImageTool.MCP_TINY_IMAGE.Split(",").Last(),
45+
Data = Encoding.UTF8.GetBytes(TinyImageTool.MCP_TINY_IMAGE.Split(",").Last()),
4546
MimeType = "image/png",
4647
Annotations = new() { Audience = [Role.User], Priority = 0.5f }
4748
});

src/Common/EncodingUtilities.cs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
using System.Buffers;
2+
using System.Buffers.Text;
3+
using System.Diagnostics;
4+
using System.Text;
5+
6+
namespace ModelContextProtocol;
7+
8+
/// <summary>Provides helper methods for encoding operations.</summary>
9+
internal static class EncodingUtilities
10+
{
11+
/// <summary>
12+
/// Converts UTF-16 characters to UTF-8 bytes without intermediate string allocations.
13+
/// </summary>
14+
/// <param name="utf16">The UTF-16 character span to convert.</param>
15+
/// <returns>A byte array containing the UTF-8 encoded bytes.</returns>
16+
public static byte[] GetUtf8Bytes(ReadOnlySpan<char> utf16)
17+
{
18+
byte[] bytes = new byte[Encoding.UTF8.GetByteCount(utf16)];
19+
Encoding.UTF8.GetBytes(utf16, bytes);
20+
return bytes;
21+
}
22+
23+
/// <summary>
24+
/// Encodes binary data to base64-encoded UTF-8 bytes.
25+
/// </summary>
26+
/// <param name="data">The binary data to encode.</param>
27+
/// <returns>A ReadOnlyMemory containing the base64-encoded UTF-8 bytes.</returns>
28+
public static ReadOnlyMemory<byte> EncodeToBase64Utf8(ReadOnlyMemory<byte> data)
29+
{
30+
int maxLength = Base64.GetMaxEncodedToUtf8Length(data.Length);
31+
byte[] buffer = new byte[maxLength];
32+
OperationStatus status = Base64.EncodeToUtf8(data.Span, buffer, out _, out int bytesWritten);
33+
Debug.Assert(status == OperationStatus.Done, "Base64 encoding should succeed for valid input data");
34+
Debug.Assert(bytesWritten == buffer.Length, "Base64 encoding should always produce the same length as the max length");
35+
return buffer.AsMemory(0, bytesWritten);
36+
}
37+
38+
/// <summary>
39+
/// Decodes base64-encoded UTF-8 bytes to binary data.
40+
/// </summary>
41+
/// <param name="base64Data">The base64-encoded UTF-8 bytes to decode.</param>
42+
/// <returns>A ReadOnlyMemory containing the decoded binary data.</returns>
43+
/// <exception cref="FormatException">The input is not valid base64 data.</exception>
44+
public static ReadOnlyMemory<byte> DecodeFromBase64Utf8(ReadOnlyMemory<byte> base64Data)
45+
{
46+
int maxLength = Base64.GetMaxDecodedFromUtf8Length(base64Data.Length);
47+
byte[] buffer = new byte[maxLength];
48+
if (Base64.DecodeFromUtf8(base64Data.Span, buffer, out _, out int bytesWritten) == OperationStatus.Done)
49+
{
50+
// Base64 decoding may produce fewer bytes than the max length, due to whitespace anywhere in the string or padding.
51+
Debug.Assert(bytesWritten <= buffer.Length, "Base64 decoding should never produce more bytes than the max length");
52+
return buffer.AsMemory(0, bytesWritten);
53+
}
54+
else
55+
{
56+
throw new FormatException("Invalid base64 data");
57+
}
58+
}
59+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#if !NET
5+
6+
namespace System.Text;
7+
8+
internal static class EncodingExtensions
9+
{
10+
/// <summary>
11+
/// Gets the number of bytes required to encode the specified characters.
12+
/// </summary>
13+
public static int GetByteCount(this Encoding encoding, ReadOnlySpan<char> chars)
14+
{
15+
if (chars.IsEmpty)
16+
{
17+
return 0;
18+
}
19+
20+
unsafe
21+
{
22+
fixed (char* charsPtr = chars)
23+
{
24+
return encoding.GetByteCount(charsPtr, chars.Length);
25+
}
26+
}
27+
}
28+
29+
/// <summary>
30+
/// Encodes the specified characters into the specified byte span.
31+
/// </summary>
32+
public static int GetBytes(this Encoding encoding, ReadOnlySpan<char> chars, Span<byte> bytes)
33+
{
34+
if (chars.IsEmpty)
35+
{
36+
return 0;
37+
}
38+
39+
unsafe
40+
{
41+
fixed (char* charsPtr = chars)
42+
fixed (byte* bytesPtr = bytes)
43+
{
44+
return encoding.GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
45+
}
46+
}
47+
}
48+
}
49+
50+
#endif

src/Common/ServerSentEvents/SseEventWriterHelpers.cs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,12 @@ public static void WriteUtf8String(this IBufferWriter<byte> writer, ReadOnlySpan
4747
return;
4848
}
4949

50-
#if NET
5150
int maxByteCount = Encoding.UTF8.GetMaxByteCount(value.Length);
5251
Span<byte> buffer = writer.GetSpan(maxByteCount);
5352
Debug.Assert(buffer.Length >= maxByteCount);
5453

5554
int bytesWritten = Encoding.UTF8.GetBytes(value, buffer);
5655
writer.Advance(bytesWritten);
57-
#else
58-
// netstandard2.0 doesn't have the Span overload of GetBytes
59-
byte[] bytes = Encoding.UTF8.GetBytes(value.ToString());
60-
Span<byte> buffer = writer.GetSpan(bytes.Length);
61-
bytes.AsSpan().CopyTo(buffer);
62-
writer.Advance(bytes.Length);
63-
#endif
6456
}
6557

6658
public static bool ContainsLineBreaks(this ReadOnlySpan<char> text) =>

src/ModelContextProtocol.Core/AIContentExtensions.cs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
using Microsoft.Extensions.AI;
22
using ModelContextProtocol.Client;
33
using ModelContextProtocol.Protocol;
4-
#if !NET
5-
using System.Runtime.InteropServices;
6-
#endif
4+
using System.Text;
75
using System.Text.Json;
86
using System.Text.Json.Nodes;
97

@@ -281,9 +279,9 @@ public static IList<PromptMessage> ToPromptMessages(this ChatMessage chatMessage
281279
{
282280
TextContentBlock textContent => new TextContent(textContent.Text),
283281

284-
ImageContentBlock imageContent => new DataContent(Convert.FromBase64String(imageContent.Data), imageContent.MimeType),
282+
ImageContentBlock imageContent => new DataContent(imageContent.DecodedData, imageContent.MimeType),
285283

286-
AudioContentBlock audioContent => new DataContent(Convert.FromBase64String(audioContent.Data), audioContent.MimeType),
284+
AudioContentBlock audioContent => new DataContent(audioContent.DecodedData, audioContent.MimeType),
287285

288286
EmbeddedResourceBlock resourceContent => resourceContent.Resource.ToAIContent(),
289287

@@ -324,7 +322,7 @@ public static AIContent ToAIContent(this ResourceContents content)
324322

325323
AIContent ac = content switch
326324
{
327-
BlobResourceContents blobResource => new DataContent(Convert.FromBase64String(blobResource.Blob), blobResource.MimeType ?? "application/octet-stream"),
325+
BlobResourceContents blobResource => new DataContent(blobResource.DecodedData, blobResource.MimeType ?? "application/octet-stream"),
328326
TextResourceContents textResource => new TextContent(textResource.Text),
329327
_ => throw new NotSupportedException($"Resource type '{content.GetType().Name}' is not supported.")
330328
};
@@ -401,21 +399,21 @@ public static ContentBlock ToContentBlock(this AIContent content, JsonSerializer
401399

402400
DataContent dataContent when dataContent.HasTopLevelMediaType("image") => new ImageContentBlock
403401
{
404-
Data = dataContent.Base64Data.ToString(),
402+
Data = EncodingUtilities.GetUtf8Bytes(dataContent.Base64Data.Span),
405403
MimeType = dataContent.MediaType,
406404
},
407405

408406
DataContent dataContent when dataContent.HasTopLevelMediaType("audio") => new AudioContentBlock
409407
{
410-
Data = dataContent.Base64Data.ToString(),
408+
Data = EncodingUtilities.GetUtf8Bytes(dataContent.Base64Data.Span),
411409
MimeType = dataContent.MediaType,
412410
},
413411

414412
DataContent dataContent => new EmbeddedResourceBlock
415413
{
416414
Resource = new BlobResourceContents
417415
{
418-
Blob = dataContent.Base64Data.ToString(),
416+
Blob = EncodingUtilities.GetUtf8Bytes(dataContent.Base64Data.Span),
419417
MimeType = dataContent.MediaType,
420418
Uri = string.Empty,
421419
}

src/ModelContextProtocol.Core/ModelContextProtocol.Core.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
<Compile Include="..\Common\Throw.cs" Link="Throw.cs" />
2626
<Compile Include="..\Common\Obsoletions.cs" Link="Obsoletions.cs" />
2727
<Compile Include="..\Common\Experimentals.cs" Link="Experimentals.cs" />
28+
<Compile Include="..\Common\EncodingUtilities.cs" Link="EncodingUtilities.cs" />
2829
<Compile Include="..\Common\HttpResponseMessageExtensions.cs" Link="HttpResponseMessageExtensions.cs" />
2930
<Compile Include="..\Common\ServerSentEvents\**\*.cs" Link="ServerSentEvents\%(RecursiveDir)%(FileName)%(Extension)" />
3031
</ItemGroup>

src/ModelContextProtocol.Core/Protocol/BlobResourceContents.cs

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
using System.Buffers;
2+
using System.Buffers.Text;
13
using System.Diagnostics;
4+
using System.Runtime.InteropServices;
25
using System.Text.Json.Serialization;
36

47
namespace ModelContextProtocol.Protocol;
@@ -9,7 +12,7 @@ namespace ModelContextProtocol.Protocol;
912
/// <remarks>
1013
/// <para>
1114
/// <see cref="BlobResourceContents"/> is used when binary data needs to be exchanged through
12-
/// the Model Context Protocol. The binary data is represented as a base64-encoded string
15+
/// the Model Context Protocol. The binary data is represented as base64-encoded UTF-8 bytes
1316
/// in the <see cref="Blob"/> property.
1417
/// </para>
1518
/// <para>
@@ -24,18 +27,76 @@ namespace ModelContextProtocol.Protocol;
2427
[DebuggerDisplay("{DebuggerDisplay,nq}")]
2528
public sealed class BlobResourceContents : ResourceContents
2629
{
30+
private ReadOnlyMemory<byte>? _decodedData;
31+
private ReadOnlyMemory<byte> _blob;
32+
33+
/// <summary>
34+
/// Creates an <see cref="BlobResourceContents"/> from raw data.
35+
/// </summary>
36+
/// <param name="bytes">The raw unencoded data.</param>
37+
/// <param name="uri">The URI of the blob resource.</param>
38+
/// <param name="mimeType">The optional MIME type of the data.</param>
39+
/// <returns>A new <see cref="BlobResourceContents"/> instance.</returns>
40+
/// <exception cref="InvalidOperationException"></exception>
41+
public static BlobResourceContents FromBytes(ReadOnlyMemory<byte> bytes, string uri, string? mimeType = null)
42+
{
43+
ReadOnlyMemory<byte> blob = EncodingUtilities.EncodeToBase64Utf8(bytes);
44+
45+
return new()
46+
{
47+
_decodedData = bytes,
48+
Blob = blob,
49+
MimeType = mimeType,
50+
Uri = uri
51+
};
52+
}
53+
2754
/// <summary>
28-
/// Gets or sets the base64-encoded string representing the binary data of the item.
55+
/// Gets or sets the base64-encoded UTF-8 bytes representing the binary data of the item.
2956
/// </summary>
57+
/// <remarks>
58+
/// Setting this value will invalidate any cached value of <see cref="DecodedData"/>.
59+
/// </remarks>
3060
[JsonPropertyName("blob")]
31-
public required string Blob { get; set; }
61+
public required ReadOnlyMemory<byte> Blob
62+
{
63+
get => _blob;
64+
set
65+
{
66+
_blob = value;
67+
_decodedData = null; // Invalidate cache
68+
}
69+
}
70+
71+
/// <summary>
72+
/// Gets the decoded data represented by <see cref="Blob"/>.
73+
/// </summary>
74+
/// <remarks>
75+
/// <para>
76+
/// When getting, this member will decode the value in <see cref="Blob"/> and cache the result.
77+
/// Subsequent accesses return the cached value unless <see cref="Blob"/> is modified.
78+
/// </para>
79+
/// </remarks>
80+
[JsonIgnore]
81+
public ReadOnlyMemory<byte> DecodedData
82+
{
83+
get
84+
{
85+
if (_decodedData is null)
86+
{
87+
_decodedData = EncodingUtilities.DecodeFromBase64Utf8(Blob);
88+
}
89+
90+
return _decodedData.Value;
91+
}
92+
}
3293

3394
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
3495
private string DebuggerDisplay
3596
{
3697
get
3798
{
38-
string lengthDisplay = DebuggerDisplayHelper.GetBase64LengthDisplay(Blob);
99+
string lengthDisplay = _decodedData is null ? DebuggerDisplayHelper.GetBase64LengthDisplay(Blob) : $"{DecodedData.Length} bytes";
39100
string mimeInfo = MimeType is not null ? $", MimeType = {MimeType}" : "";
40101
return $"Uri = \"{Uri}\"{mimeInfo}, Length = {lengthDisplay}";
41102
}

0 commit comments

Comments
 (0)