From b7c8c73f1b662c4e29d4b930de8067fcb81b72be Mon Sep 17 00:00:00 2001 From: EliotJones Date: Tue, 8 Jul 2025 18:14:29 -0500 Subject: [PATCH] skip single letter final blocks align with the behavior of pdfbox and c implementations where single character final blocks are ignored rather than being written. also makes the error more informative in case it is ever encountered again. add more test cases. it is possible this is hiding the problem and will move the error elsewhere but this matches the implementation behavior of the 2 reference implementations. one other potential source for the error is if pdf supports '<~' as a start of data marker which i can't find in the spec but wikipedia says might be possible? without documents to trigger the error i think this is the best fix for now --- .../Filters/Ascii85FilterTests.cs | 50 +++++++++++++++---- src/UglyToad.PdfPig/Filters/Ascii85Filter.cs | 25 +++++++--- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Filters/Ascii85FilterTests.cs b/src/UglyToad.PdfPig.Tests/Filters/Ascii85FilterTests.cs index 73312b549..ca29ebde5 100644 --- a/src/UglyToad.PdfPig.Tests/Filters/Ascii85FilterTests.cs +++ b/src/UglyToad.PdfPig.Tests/Filters/Ascii85FilterTests.cs @@ -34,10 +34,39 @@ public void DecodesWikipediaExample() text); } - [Fact] - public void ReplacesZWithEmptyBytes() + [Theory] + [InlineData("BE", "h")] + [InlineData("BOq", "he")] + [InlineData("BOtu", "hel")] + [InlineData("BOu!r", "hell")] + [InlineData("BOu!rDZ", "hello")] + [InlineData("BOu!rD]f", "hello ")] + [InlineData("BOu!rD]j6", "hello w")] + [InlineData("BOu!rD]j7B", "hello wo")] + [InlineData("BOu!rD]j7BEW", "hello wor")] + [InlineData("BOu!rD]j7BEbk", "hello worl")] + [InlineData("BOu!rD]j7BEbo7", "hello world")] + [InlineData("BOu!rD]j7BEbo80", "hello world!")] + public void DecodesHelloWorld(string encoded, string decoded) { - var bytes = Encoding.ASCII.GetBytes("9jqo^zBlbD-"); + var result = filter.Decode( + Encoding.ASCII.GetBytes(encoded), + dictionary, + TestFilterProvider.Instance, + 0); + + Assert.Equal(decoded, Encoding.ASCII.GetString(result.ToArray())); + } + + [Theory] + [InlineData("9jqo^zBlbD-", "Man \0\0\0\0is d")] + [InlineData("", "")] + [InlineData("z", "\0\0\0\0")] + [InlineData("zz", "\0\0\0\0\0\0\0\0")] + [InlineData("zzz", "\0\0\0\0\0\0\0\0\0\0\0\0")] + public void ReplacesZWithEmptyBytes(string encoded, string decoded) + { + var bytes = Encoding.ASCII.GetBytes(encoded); var result = filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1); @@ -47,7 +76,7 @@ public void ReplacesZWithEmptyBytes() string text = Encoding.ASCII.GetString(result.Span); #endif - Assert.Equal("Man \0\0\0\0is d", text); + Assert.Equal(decoded, text); } [Fact] @@ -60,14 +89,17 @@ public void ZInMiddleOf5CharacterSequenceThrows() Assert.Throws(action); } - [Fact] - public void SingleCharacterLastThrows() + [Theory] + [InlineData("@rH:%B", "cool")] + [InlineData("A~>", "")] + [InlineData("@rH:%A~>", "cool")] + public void SingleCharacterLastIgnores(string encoded, string decoded) { - var bytes = Encoding.ASCII.GetBytes("9jqo^B"); + var bytes = Encoding.ASCII.GetBytes(encoded); - Action action = () => filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1); + var result = filter.Decode(bytes, dictionary, TestFilterProvider.Instance, 1); - Assert.Throws(action); + Assert.Equal(decoded, Encoding.ASCII.GetString(result.ToArray())); } private const string PdfContent = @"1 0 obj diff --git a/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs b/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs index cd3988c66..4bb8a4562 100644 --- a/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs +++ b/src/UglyToad.PdfPig/Filters/Ascii85Filter.cs @@ -2,6 +2,7 @@ { using System; using Core; + using System.Text; using Tokens; /// @@ -13,7 +14,7 @@ public sealed class Ascii85Filter : IFilter private const byte Offset = (byte)'!'; private const byte EmptyCharacterPadding = (byte)'u'; - private static ReadOnlySpan EndOfDataBytes => [(byte)'~', (byte)'>']; + private static ReadOnlySpan EndOfDataBytes => "~>"u8; private static readonly int[] PowerByIndex = [ @@ -52,7 +53,7 @@ public Memory Decode(Memory input, DictionaryToken streamDictionary, { if (index > 0) { - WriteData(asciiBuffer, index, writer); + WriteData(asciiBuffer, index, writer, true); } index = 0; @@ -88,24 +89,36 @@ public Memory Decode(Memory input, DictionaryToken streamDictionary, if (index == 5) { - WriteData(asciiBuffer, index, writer); + WriteData(asciiBuffer, index, writer, false); index = 0; } } if (index > 0) { - WriteData(asciiBuffer, index, writer); + WriteData(asciiBuffer, index, writer, true); } return writer.WrittenMemory.ToArray(); } - private static void WriteData(Span ascii, int index, ArrayPoolBufferWriter writer) + private static void WriteData( + Span ascii, + int index, + ArrayPoolBufferWriter writer, + bool isAtEnd) { if (index < 2) { - throw new ArgumentOutOfRangeException(nameof(index), "Cannot convert a block padded by 4 'u' characters."); + if (isAtEnd) + { + return; + } + + var bufferTxt = Encoding.ASCII.GetString(ascii); + var soFar = Encoding.ASCII.GetString(writer.GetSpan()); + throw new ArgumentOutOfRangeException(nameof(index), + $"Cannot convert a this block because we're not at the end of the stream. Chunk: '{bufferTxt}'. Content: '{soFar}'"); } // Write any empty padding if the block ended early.