Skip to content

Commit 4443672

Browse files
committed
Fix line break algorithm
Now that the test actually does assertions, numerous bugs were encountered with the algo implementation. Fix algo so all tests pass.
1 parent 62e36ee commit 4443672

File tree

11 files changed

+1547
-148
lines changed

11 files changed

+1547
-148
lines changed

Netwolf.Generator.Test/Netwolf.Generator.Test.csproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
<LogicalName>Netwolf.Generator.Test.EastAsianWidth.txt</LogicalName>
4747
<Visible>false</Visible>
4848
</EmbeddedResource>
49+
<EmbeddedResource Include="..\Netwolf.Unicode\Data\emoji-data.txt">
50+
<LogicalName>Netwolf.Generator.Test.emoji-data.txt</LogicalName>
51+
<Visible>false</Visible>
52+
</EmbeddedResource>
4953
<EmbeddedResource Include="..\Netwolf.Unicode\Data\HangulSyllableType.txt">
5054
<LogicalName>Netwolf.Generator.Test.HangulSyllableType.txt</LogicalName>
5155
<Visible>false</Visible>

Netwolf.Generator.Test/UnicodePropertyTests.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ internal static partial class UnicodeProperty
4343
internal static partial bool IsJoinControl(Rune rune);
4444
internal static partial bool IsNoncharacterCodePoint(Rune rune);
4545
internal static partial bool IsDefaultIgnorableCodePoint(Rune rune);
46+
internal static partial bool IsExtendedPictographic(Rune rune);
4647
internal static partial BidiClass GetBidiClass(Rune rune);
4748
internal static partial HangulSyllableType GetHangulSyllableType(Rune rune);
4849
internal static partial CombiningClass GetCombiningClass(Rune rune);
@@ -72,6 +73,7 @@ internal static partial class UnicodeProperty
7273
new EmbeddedAdditionalText("DerivedCoreProperties.txt"),
7374
new EmbeddedAdditionalText("DerivedJoiningType.txt"),
7475
new EmbeddedAdditionalText("EastAsianWidth.txt"),
76+
new EmbeddedAdditionalText("emoji-data.txt"),
7577
new EmbeddedAdditionalText("HangulSyllableType.txt"),
7678
new EmbeddedAdditionalText("LineBreak.txt"),
7779
new EmbeddedAdditionalText("PropList.txt"),
@@ -82,7 +84,7 @@ internal static partial class UnicodeProperty
8284

8385
var outputDiagnostics = output.GetDiagnostics();
8486
Assert.IsEmpty(outputDiagnostics, "Diagnostics were created while compiling generated source.");
85-
// 9 source files specified above, plus 8 generated files (PropList and DerivedCoreProperties both go to the same file)
87+
// 9 source files specified above, plus 8 generated files (PropList/DerivedCoreProperties/emoji-data all go to the same file)
8688
Assert.AreEqual(17, output.SyntaxTrees.Count());
8789

8890
using MemoryStream assemblyStream = new();

Netwolf.Generator/Unicode/UnicodeDataGenerator.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ public class UnicodeDataGenerator : IIncrementalGenerator
2323
"Join_Control",
2424
"Noncharacter_Code_Point",
2525
"Default_Ignorable_Code_Point",
26+
"Extended_Pictographic",
2627
];
2728

2829
public static StringBuilder GetDecompositionSourceText(ImmutableArray<DecompositionMapping?> mappings)
@@ -269,7 +270,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
269270
.Collect();
270271

271272
var props = context.AdditionalTextsProvider
272-
.Where(static text => text.Path.EndsWith("PropList.txt") || text.Path.EndsWith("DerivedCoreProperties.txt"))
273+
.Where(static text => text.Path.EndsWith("PropList.txt") || text.Path.EndsWith("DerivedCoreProperties.txt") || text.Path.EndsWith("emoji-data.txt"))
273274
.Combine(assembly)
274275
.Where(static model => model.Right == "Netwolf.Unicode")
275276
.SelectMany(static (model, token) => model.Left.GetText(token)?.ToString().Split(['\r', '\n'], StringSplitOptions.RemoveEmptyEntries) ?? [])

Netwolf.Test/Unicode/LineBreakTests.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,22 @@ public void Test_split_algorithm(string test, string _)
3434
}
3535

3636
// this should split on every break opportunity (both optional and mandatory)
37-
var lines = LineBreakHelper.SplitText(sb.ToString(), 1, true);
37+
var lines = LineBreakHelper.SplitText(sb.ToString(), 1, SplitTextOptions.AllowOverflow | SplitTextOptions.IncludeBreakCharacters);
3838

3939
foreach (var (line, _) in lines)
4040
{
41-
for (var i = 0; i < line.Length - 1; ++i)
41+
var enumerator = line.EnumerateRunes();
42+
bool first = true;
43+
44+
while (enumerator.MoveNext())
4245
{
46+
if (first)
47+
{
48+
// skip the first character (the number of non-breaks in the string is one fewer than the number of grapheme clusters in the string)
49+
first = false;
50+
continue;
51+
}
52+
4353
actual.Add(false);
4454
}
4555

Netwolf.Transport/Commands/CommandFactory.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ public ICommand[] PrepareClientMessage(
292292
multilineEnabled = false;
293293
}
294294

295-
var lines = LineBreakHelper.SplitText(text, maxlen, false);
295+
var lines = LineBreakHelper.SplitText(text, maxlen);
296296

297297
string batchId = Guid.NewGuid().ToString();
298298
int batchLines = 0;

0 commit comments

Comments
 (0)