UglyToad · BobLd · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/src/UglyToad.PdfPig.Core/ReadHelper.cs b/src/UglyToad.PdfPig.Core/ReadHelper.cs
@@ -24,17 +24,12 @@ public static class ReadHelper
         /// </summary>
         public const byte AsciiCarriageReturn = 13;
 
-        /// <summary>
-        /// The tab '\t' character.
-        /// </summary>
-        public const byte AsciiTab = 9;
-
         private static readonly HashSet<int> EndOfNameCharacters =
         [
             ' ',
             AsciiCarriageReturn,
             AsciiLineFeed,
-            AsciiTab,
+            9,
             '>',
             '<',
             '[',

diff --git a/src/UglyToad.PdfPig.Core/StreamInputBytes.cs b/src/UglyToad.PdfPig.Core/StreamInputBytes.cs
@@ -96,17 +96,6 @@ public bool IsAtEnd()
         /// <inheritdoc />
         public void Seek(long position)
         {
-            var current = CurrentOffset;
-            if (position == current)
-            {
-                return;
-            }
-            else if (peekByte.HasValue && position == current + 1)
-            {
-                MoveNext();
-                return;
-            }
-
             isAtEnd = false;
             peekByte = null;
 

diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
@@ -14,7 +14,7 @@ public sealed class Type1ArrayTokenizer : ITokenizer
         /// <inheritdoc />
         public bool ReadsNextByte { get; } = false;
 
-        private static readonly char[] Space = [' '];
+        private static readonly string[] Space = [" "];
 
         /// <inheritdoc />
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)

diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
@@ -88,11 +88,6 @@ public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
                         {
                             int offset = 0;
 
-                            while (inputBytes.Peek() is { } b && ReadHelper.IsWhitespace(b))
-                            {
-                                inputBytes.MoveNext();
-                            }
-
                             while (inputBytes.MoveNext())
                             {
                                 if (inputBytes.CurrentByte == (byte)ClearToMark[offset])

diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
@@ -2,7 +2,6 @@
 {
     using System;
     using System.Collections.Generic;
-    using System.Diagnostics;
     using System.Globalization;
     using System.Text;
     using Core;
@@ -42,43 +41,35 @@ private Type1Token ReadNextToken()
             do
             {
                 skip = false;
-                while (bytes.Peek() is { } b)
+                while (bytes.MoveNext())
                 {
+                    var b = bytes.CurrentByte;
                     var c = (char)b;
 
                     switch (c)
                     {
                         case '%':
-                            bytes.MoveNext();
                             comments.Add(ReadComment());
                             break;
                         case '(':
-                            bytes.MoveNext();
                             return ReadString();
                         case ')':
                             throw new InvalidOperationException("Encountered an end of string ')' outside of string.");
                         case '[':
-                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.StartArray);
                         case ']':
-                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.EndArray);
                         case '{':
-                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.StartProc);
                         case '}':
-                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.EndProc);
                         case '/':
                             {
-                                bytes.MoveNext();
-                                TryReadLiteral(out var name);
-                                Debug.Assert(name != null);
+                                var name = ReadLiteral();
                                 return new Type1Token(name, Type1Token.TokenType.Literal);
                             }
                         case '<':
                             {
-                                bytes.MoveNext();
                                 var following = bytes.Peek();
                                 if (following == '<')
                                 {
@@ -90,7 +81,6 @@ private Type1Token ReadNextToken()
                             }
                         case '>':
                             {
-                                bytes.MoveNext();
                                 var following = bytes.Peek();
                                 if (following == '>')
                                 {
@@ -104,24 +94,23 @@ private Type1Token ReadNextToken()
                             {
                                 if (ReadHelper.IsWhitespace(b))
                                 {
-                                    bytes.MoveNext();
                                     skip = true;
                                     break;
                                 }
 
                                 if (b == 0)
                                 {
-                                    bytes.MoveNext();
                                     skip = true;
                                     break;
                                 }
 
-                                if (TryReadNumber(out var number))
+                                if (TryReadNumber(c, out var number))
                                 {
                                     return number;
                                 }
 
-                                if (!TryReadLiteral(out var name))
+                                var name = ReadLiteral(c);
+                                if (name == null)
                                 {
                                     throw new InvalidOperationException($"The binary portion of the type 1 font was invalid at position {bytes.CurrentOffset}.");
                                 }
@@ -208,21 +197,12 @@ char GetNext()
             return null;
         }
 
-        private bool TryReadNumber(out Type1Token numberToken)
+        private bool TryReadNumber(char c, out Type1Token numberToken)
         {
             char GetNext()
             {
                 bytes.MoveNext();
-                return (char)(bytes.Peek() ?? 0);
-            }
-
-            char c = (char)(bytes.Peek() ?? 0);
-
-            if (!((c >= '0' && c <= '9') || c is '+' or '-'))
-            {
-                // Easy out. Not a valid number
-                numberToken = null;
-                return false;
+                return (char)bytes.CurrentByte;
             }
 
             numberToken = null;
@@ -271,6 +251,8 @@ char GetNext()
             else
             {
                 // integer
+                bytes.Seek(bytes.CurrentOffset - 1);
+
                 numberToken = new Type1Token(sb.ToString(), Type1Token.TokenType.Integer);
                 return true;
             }
@@ -327,6 +309,7 @@ char GetNext()
                 }
             }
 
+            bytes.Seek(bytes.CurrentOffset - 1);
             if (radix != null)
             {
                 var number = Convert.ToInt32(sb.ToString(), int.Parse(radix.ToString(), CultureInfo.InvariantCulture));
@@ -340,9 +323,14 @@ char GetNext()
             return true;
         }
 
-        private bool TryReadLiteral(out string? value)
+        private string ReadLiteral(char? previousCharacter = null)
         {
             literalBuffer.Clear();
+            if (previousCharacter.HasValue)
+            {
+                literalBuffer.Append(previousCharacter);
+            }
+
             do
             {
                 var b = bytes.Peek();
@@ -362,16 +350,8 @@ private bool TryReadLiteral(out string? value)
                 literalBuffer.Append(c);
             } while (bytes.MoveNext());
 
-            if (literalBuffer.Length > 0)
-            {
-                value = literalBuffer.ToString();
-                return true;
-            }
-            else
-            {
-                value = null;
-                return false;
-            }
+            var literal = literalBuffer.ToString();
+            return literal.Length == 0 ? null : literal;
         }
 
         private string ReadComment()
@@ -395,10 +375,9 @@ private string ReadComment()
         private Type1DataToken ReadCharString(int length)
         {
             // Skip preceding space.
-            if (bytes.Peek() is { } ws && ReadHelper.IsWhitespace(ws))
-            {
-                bytes.MoveNext();
-            }
+            bytes.MoveNext();
+            // TODO: may be wrong
+           // bytes.MoveNext();
 
             byte[] data = new byte[length];
             for (int i = 0; i < length; i++)

diff --git a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
@@ -4,10 +4,43 @@
     using DocumentLayoutAnalysis.PageSegmenter;
     using DocumentLayoutAnalysis.WordExtractor;
     using PdfPig.Core;
+    using PdfPig.Tokens;
     using SkiaSharp;
 
     public class GithubIssuesTests
     {
+        [Fact]
+        public void Revert_e11dc6b()
+        {
+            var path = IntegrationHelpers.GetDocumentPath("GHOSTSCRIPT-699488-0.pdf");
+
+            using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }))
+            {
+                var page = document.GetPage(1);
+                var images = page.GetImages().ToArray();
+
+                Assert.Equal(9, images.Length);
+
+                foreach (var image in images)
+                {
+                    if (image.ImageDictionary.TryGet(NameToken.Filter, out var token) && token is NameToken nt)
+                    {
+                        if (nt.Data.Contains("DCT"))
+                        {
+                            continue;
+                        }
+                    }
+
+                    Assert.True(image.TryGetPng(out _));
+                }
+
+                var paths = page.Paths;
+                Assert.Equal(66, paths.Count);
+                var letters = page.Letters;
+                Assert.Equal(2685, letters.Count);
+            }
+        }
+
         [Fact]
         public void Issue1199()
         {

diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs
@@ -91,10 +91,7 @@ public void OnlyParsesNumberPart()
             Assert.True(result);
             Assert.Equal(135.6654, AssertNumericToken(token).Data);
 
-            if (tokenizer.ReadsNextByte)
-                Assert.Equal('/', (char)input.Bytes.CurrentByte);
-            else
-                Assert.Equal('4', (char)input.Bytes.CurrentByte);
+            Assert.Equal('/', (char)input.Bytes.CurrentByte);
         }
 
         [Fact]

diff --git a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
@@ -9,7 +9,7 @@ internal sealed class ArrayTokenizer : ITokenizer
     {
         private readonly bool usePdfDocEncoding;
 
-        public bool ReadsNextByte => false;
+        public bool ReadsNextByte { get; } = false;
 
         public ArrayTokenizer(bool usePdfDocEncoding)
         {

diff --git a/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
@@ -6,7 +6,7 @@
 
     internal sealed class CommentTokenizer : ITokenizer
     {
-        public bool ReadsNextByte => false;
+        public bool ReadsNextByte { get; } = true;
 
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
         {
@@ -17,11 +17,10 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
                 return false;
             }
 
-            using var builder = new ValueStringBuilder(stackalloc char[32]);
+            using var builder = new ValueStringBuilder();
 
-            while (inputBytes.Peek() is { } c && !ReadHelper.IsEndOfLine(c))
+            while (inputBytes.MoveNext() && !ReadHelper.IsEndOfLine(inputBytes.CurrentByte))
             {
-                inputBytes.MoveNext();
                 builder.Append((char) inputBytes.CurrentByte);
             }
 

diff --git a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
@@ -11,7 +11,7 @@ internal class DictionaryTokenizer : ITokenizer
         private readonly IReadOnlyList<NameToken> requiredKeys;
         private readonly bool useLenientParsing;
 
-        public bool ReadsNextByte => false;
+        public bool ReadsNextByte { get; } = false;
 
         /// <summary>
         /// Create a new <see cref="DictionaryTokenizer"/>.

diff --git a/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
@@ -9,7 +9,7 @@
     public sealed class EndOfLineTokenizer : ITokenizer
     {
         /// <inheritdoc />
-        public bool ReadsNextByte => false;
+        public bool ReadsNextByte { get; } = false;
 
         /// <inheritdoc />
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)

diff --git a/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
@@ -5,7 +5,7 @@
 
     internal sealed class HexTokenizer : ITokenizer
     {
-        public bool ReadsNextByte => false;
+        public bool ReadsNextByte { get; } = false;
 
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
         {