diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs
deleted file mode 100644
index 86d39a4283..0000000000
--- a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs
+++ /dev/null
@@ -1,56 +0,0 @@
-namespace Avalonia.Media.TextFormatting.Unicode
-{
- internal static class BreakPairTable
- {
- private static readonly byte[][] s_breakPairTable =
- {
- new byte[] {4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4},
- new byte[] {0,4,4,1,1,4,4,4,4,1,1,0,0,0,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,4,4,4,4,1,1,1,1,1,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {4,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
- new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,0,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,1,0,1,1,0,0,4,2,4,1,1,1,1,1,0,1,1,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,0,1,4,4,4,0,0,1,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,0,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,4,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,1,1,1,1,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,1,0,0,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,1,1,0},
- new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- new byte[] {0,4,4,1,1,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
- };
-
- public static PairBreakType Map(LineBreakClass first, LineBreakClass second)
- {
- return (PairBreakType)s_breakPairTable[(int)first][(int)second];
- }
- }
-
- internal enum PairBreakType : byte
- {
- DI = 0, // Direct break opportunity
- IN = 1, // Indirect break opportunity
- CI = 2, // Indirect break opportunity for combining marks
- CP = 3, // Prohibited break for combining marks
- PR = 4 // Prohibited break
- }
-}
diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs
index 2f46fdd9d0..43a95310c6 100644
--- a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs
+++ b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs
@@ -9,37 +9,40 @@ namespace Avalonia.Media.TextFormatting.Unicode
///
public static readonly Codepoint ReplacementCodepoint = new Codepoint('\uFFFD');
- private readonly int _value;
-
public Codepoint(int value)
{
- _value = value;
+ Value = value;
}
+ ///
+ /// Get the codepoint's value.
+ ///
+ public int Value { get; }
+
///
/// Gets the .
///
- public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(_value);
+ public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(Value);
///
/// Gets the .
///
- public Script Script => UnicodeData.GetScript(_value);
+ public Script Script => UnicodeData.GetScript(Value);
///
/// Gets the .
///
- public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(_value);
+ public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(Value);
///
/// Gets the .
///
- public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(_value);
+ public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(Value);
///
/// Gets the .
///
- public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(_value);
+ public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(Value);
///
/// Determines whether this is a break char.
@@ -51,7 +54,7 @@ namespace Avalonia.Media.TextFormatting.Unicode
{
get
{
- switch (_value)
+ switch (Value)
{
case '\u000A':
case '\u000B':
@@ -93,12 +96,12 @@ namespace Avalonia.Media.TextFormatting.Unicode
public static implicit operator int(Codepoint codepoint)
{
- return codepoint._value;
+ return codepoint.Value;
}
public static implicit operator uint(Codepoint codepoint)
{
- return (uint)codepoint._value;
+ return (uint)codepoint.Value;
}
///
@@ -112,7 +115,7 @@ namespace Avalonia.Media.TextFormatting.Unicode
{
count = 1;
- if (index > text.Length)
+ if (index >= text.Length)
{
return ReplacementCodepoint;
}
diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs
index 76bb9ac44f..4d02f94cad 100644
--- a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs
+++ b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs
@@ -1,160 +1,460 @@
-// RichTextKit
-// Copyright © 2019 Topten Software. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may
-// not use this product except in compliance with the License. You may obtain
-// a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations
-// under the License.
-//
-// Ported from: https://github.com/foliojs/linebreak
-// Copied from: https://github.com/toptensoftware/RichTextKit
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+// Ported from: https://github.com/SixLabors/Fonts/
using Avalonia.Utilities;
namespace Avalonia.Media.TextFormatting.Unicode
{
///
- /// Implementation of the Unicode Line Break Algorithm
+ /// Implementation of the Unicode Line Break Algorithm. UAX:14
+ ///
///
public ref struct LineBreakEnumerator
{
- // State
private readonly ReadOnlySlice _text;
- private int _pos;
- private int _lastPos;
- private LineBreakClass? _curClass;
- private LineBreakClass? _nextClass;
+ private int _position;
+ private int _lastPosition;
+ private LineBreakClass _currentClass;
+ private LineBreakClass _nextClass;
+ private bool _first;
+ private int _alphaNumericCount;
+ private bool _lb8a;
+ private bool _lb21a;
+ private bool _lb22ex;
+ private bool _lb24ex;
+ private bool _lb25ex;
+ private bool _lb30;
+ private int _lb30a;
+ private bool _lb31;
public LineBreakEnumerator(ReadOnlySlice text)
+ : this()
{
_text = text;
- _pos = 0;
- _lastPos = 0;
- _curClass = null;
- _nextClass = null;
- Current = default;
+ _position = 0;
+ _currentClass = LineBreakClass.Unknown;
+ _nextClass = LineBreakClass.Unknown;
+ _first = true;
+ _lb8a = false;
+ _lb21a = false;
+ _lb22ex = false;
+ _lb24ex = false;
+ _lb25ex = false;
+ _alphaNumericCount = 0;
+ _lb31 = false;
+ _lb30 = false;
+ _lb30a = 0;
}
-
+
public LineBreak Current { get; private set; }
-
+
public bool MoveNext()
{
- // get the first char if we're at the beginning of the string
- if (!_curClass.HasValue)
+ // Get the first char if we're at the beginning of the string.
+ if (_first)
{
- _curClass = PeekCharClass() == LineBreakClass.Space ? LineBreakClass.WordJoiner : MapFirst(ReadCharClass());
+ var firstClass = NextCharClass();
+ _first = false;
+ _currentClass = MapFirst(firstClass);
+ _nextClass = firstClass;
+ _lb8a = firstClass == LineBreakClass.ZWJ;
+ _lb30a = 0;
}
- while (_pos < _text.Length)
+ while (_position < _text.Length)
{
- _lastPos = _pos;
+ _lastPosition = _position;
var lastClass = _nextClass;
- _nextClass = ReadCharClass();
+ _nextClass = NextCharClass();
- // explicit newline
- if (_curClass.HasValue && (_curClass == LineBreakClass.MandatoryBreak || _curClass == LineBreakClass.CarriageReturn && _nextClass != LineBreakClass.LineFeed))
+ // Explicit newline
+ switch (_currentClass)
{
- _curClass = MapFirst(MapClass(_nextClass.Value));
- Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true);
+ case LineBreakClass.MandatoryBreak:
+ case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed:
+ {
+ _currentClass = MapFirst(_nextClass);
+ Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, true);
+ return true;
+ }
+ }
+
+ var shouldBreak = GetSimpleBreak() ?? (bool?)GetPairTableBreak(lastClass);
+
+ // Rule LB8a
+ _lb8a = _nextClass == LineBreakClass.ZWJ;
+
+ if (shouldBreak.Value)
+ {
+ Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition);
return true;
}
+ }
- // handle classes not handled by the pair table
- LineBreakClass? cur = null;
- switch (_nextClass.Value)
+ if (_position >= _text.Length)
+ {
+ if (_lastPosition < _text.Length)
{
- case LineBreakClass.Space:
- cur = _curClass;
- break;
+ _lastPosition = _text.Length;
+
+ var required = false;
+
+ switch (_currentClass)
+ {
+ case LineBreakClass.MandatoryBreak:
+ case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed:
+ required = true;
+ break;
+ }
+
+ Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, required);
+ return true;
+ }
+ }
+
+ Current = default;
+
+ return false;
+ }
+
+ private static LineBreakClass MapClass(Codepoint cp)
+ {
+ if (cp.Value == 327685)
+ {
+ return LineBreakClass.Alphabetic;
+ }
+
+ // LB 1
+ // ==========================================
+ // Resolved Original General_Category
+ // ==========================================
+ // AL AI, SG, XX Any
+ // CM SA Only Mn or Mc
+ // AL SA Any except Mn and Mc
+ // NS CJ Any
+ switch (cp.LineBreakClass)
+ {
+ case LineBreakClass.Ambiguous:
+ case LineBreakClass.Surrogate:
+ case LineBreakClass.Unknown:
+ return LineBreakClass.Alphabetic;
+ case LineBreakClass.ComplexContext:
+ return cp.GeneralCategory == GeneralCategory.NonspacingMark || cp.GeneralCategory == GeneralCategory.SpacingMark
+ ? LineBreakClass.CombiningMark
+ : LineBreakClass.Alphabetic;
+
+ case LineBreakClass.ConditionalJapaneseStarter:
+ return LineBreakClass.Nonstarter;
+
+ default:
+ return cp.LineBreakClass;
+ }
+ }
+
+ private static LineBreakClass MapFirst(LineBreakClass c)
+ {
+ switch (c)
+ {
+ case LineBreakClass.LineFeed:
+ case LineBreakClass.NextLine:
+ return LineBreakClass.MandatoryBreak;
+
+ case LineBreakClass.Space:
+ return LineBreakClass.WordJoiner;
+
+ default:
+ return c;
+ }
+ }
+
+ private static bool IsAlphaNumeric(LineBreakClass cls)
+ => cls == LineBreakClass.Alphabetic
+ || cls == LineBreakClass.HebrewLetter
+ || cls == LineBreakClass.Numeric;
+
+ private LineBreakClass PeekNextCharClass()
+ {
+ var cp = Codepoint.ReadAt(_text, _position, out _);
+
+ return MapClass(cp);
+ }
+
+ // Get the next character class
+ private LineBreakClass NextCharClass()
+ {
+ var cp = Codepoint.ReadAt(_text, _position, out var count);
+ var cls = MapClass(cp);
+ _position += count;
+
+ // Keep track of alphanumeric + any combining marks.
+ // This is used for LB22 and LB30.
+ if (IsAlphaNumeric(_currentClass) || _alphaNumericCount > 0 && cls == LineBreakClass.CombiningMark)
+ {
+ _alphaNumericCount++;
+ }
+
+ // Track combining mark exceptions. LB22
+ if (cls == LineBreakClass.CombiningMark)
+ {
+ switch (_currentClass)
+ {
case LineBreakClass.MandatoryBreak:
+ case LineBreakClass.ContingentBreak:
+ case LineBreakClass.Exclamation:
case LineBreakClass.LineFeed:
case LineBreakClass.NextLine:
- cur = LineBreakClass.MandatoryBreak;
- break;
-
+ case LineBreakClass.Space:
+ case LineBreakClass.ZWSpace:
case LineBreakClass.CarriageReturn:
- cur = LineBreakClass.CarriageReturn;
+ _lb22ex = true;
break;
+ }
+ }
+ // Track combining mark exceptions. LB31
+ if (_first && cls == LineBreakClass.CombiningMark)
+ {
+ _lb31 = true;
+ }
+
+ if (cls == LineBreakClass.CombiningMark)
+ {
+ switch (_currentClass)
+ {
+ case LineBreakClass.MandatoryBreak:
case LineBreakClass.ContingentBreak:
- cur = LineBreakClass.BreakAfter;
+ case LineBreakClass.Exclamation:
+ case LineBreakClass.LineFeed:
+ case LineBreakClass.NextLine:
+ case LineBreakClass.Space:
+ case LineBreakClass.ZWSpace:
+ case LineBreakClass.CarriageReturn:
+ case LineBreakClass.ZWJ:
+ _lb31 = true;
break;
}
+ }
+
+ if (_first
+ && (cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space))
+ {
+ _lb31 = true;
+ }
+
+ if (_currentClass == LineBreakClass.Alphabetic &&
+ (cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space))
+ {
+ _lb31 = true;
+ }
+
+ // Reset LB31 if next is U+0028 (Left Opening Parenthesis)
+ if (_lb31
+ && _currentClass != LineBreakClass.PostfixNumeric
+ && _currentClass != LineBreakClass.PrefixNumeric
+ && cls == LineBreakClass.OpenPunctuation && cp.Value == 0x0028)
+ {
+ _lb31 = false;
+ }
+
+ // Rule LB24
+ if (_first && (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.CloseParenthesis))
+ {
+ _lb24ex = true;
+ }
- if (cur != null)
+ // Rule LB25
+ if (_first
+ && (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.InfixNumeric || cls == LineBreakClass.BreakSymbols))
+ {
+ _lb25ex = true;
+ }
+
+ if (cls == LineBreakClass.Space || cls == LineBreakClass.WordJoiner || cls == LineBreakClass.Alphabetic)
+ {
+ var next = PeekNextCharClass();
+ if (next == LineBreakClass.ClosePunctuation || next == LineBreakClass.InfixNumeric || next == LineBreakClass.BreakSymbols)
{
- _curClass = cur;
+ _lb25ex = true;
+ }
+ }
+
+ // AlphaNumeric + and combining marks can break for OP except.
+ // - U+0028 (Left Opening Parenthesis)
+ // - U+005B (Opening Square Bracket)
+ // - U+007B (Left Curly Bracket)
+ // See custom colums|rules in the text pair table.
+ // https://www.unicode.org/Public/13.0.0/ucd/auxiliary/LineBreakTest.html
+ _lb30 = _alphaNumericCount > 0
+ && cls == LineBreakClass.OpenPunctuation
+ && cp.Value != 0x0028
+ && cp.Value != 0x005B
+ && cp.Value != 0x007B;
+
+ return cls;
+ }
+
+ private bool? GetSimpleBreak()
+ {
+ // handle classes not handled by the pair table
+ switch (_nextClass)
+ {
+ case LineBreakClass.Space:
+ return false;
- if (_nextClass.Value == LineBreakClass.MandatoryBreak)
+ case LineBreakClass.MandatoryBreak:
+ case LineBreakClass.LineFeed:
+ case LineBreakClass.NextLine:
+ _currentClass = LineBreakClass.MandatoryBreak;
+ return false;
+
+ case LineBreakClass.CarriageReturn:
+ _currentClass = LineBreakClass.CarriageReturn;
+ return false;
+ }
+
+ return null;
+ }
+
+ private bool GetPairTableBreak(LineBreakClass lastClass)
+ {
+ // If not handled already, use the pair table
+ bool shouldBreak = false;
+ switch (LineBreakPairTable.Table[(int)_currentClass][(int)_nextClass])
+ {
+ case LineBreakPairTable.DIBRK: // Direct break
+ shouldBreak = true;
+ break;
+
+ // TODO: Rewrite this so that it defaults to true and rules are set as exceptions.
+ case LineBreakPairTable.INBRK: // Possible indirect break
+
+ // LB31
+ if (_lb31 && _nextClass == LineBreakClass.OpenPunctuation)
{
- _lastPos = _pos;
- Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true);
- return true;
+ shouldBreak = true;
+ _lb31 = false;
+ break;
}
- continue;
- }
-
- // if not handled already, use the pair table
- var shouldBreak = false;
- switch (BreakPairTable.Map(_curClass.Value,_nextClass.Value))
- {
- case PairBreakType.DI: // Direct break
+ // LB30
+ if (_lb30)
+ {
shouldBreak = true;
+ _lb30 = false;
+ _alphaNumericCount = 0;
break;
+ }
- case PairBreakType.IN: // possible indirect break
- shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space;
+ // LB25
+ if (_lb25ex && (_nextClass == LineBreakClass.PrefixNumeric || _nextClass == LineBreakClass.Numeric))
+ {
+ shouldBreak = true;
+ _lb25ex = false;
break;
+ }
- case PairBreakType.CI:
- shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space;
- if (!shouldBreak)
- {
- continue;
- }
+ // LB24
+ if (_lb24ex && (_nextClass == LineBreakClass.PostfixNumeric || _nextClass == LineBreakClass.PrefixNumeric))
+ {
+ shouldBreak = true;
+ _lb24ex = false;
break;
+ }
+
+ // LB18
+ shouldBreak = lastClass == LineBreakClass.Space;
+ break;
+
+ case LineBreakPairTable.CIBRK:
+ shouldBreak = lastClass == LineBreakClass.Space;
+ if (!shouldBreak)
+ {
+ return false;
+ }
- case PairBreakType.CP: // prohibited for combining marks
- if (!lastClass.HasValue || lastClass.Value != LineBreakClass.Space)
+ break;
+
+ case LineBreakPairTable.CPBRK: // prohibited for combining marks
+ if (lastClass != LineBreakClass.Space)
+ {
+ return false;
+ }
+
+ break;
+
+ case LineBreakPairTable.PRBRK:
+ break;
+ }
+
+ // Rule LB22
+ if (_nextClass == LineBreakClass.Inseparable)
+ {
+ switch (lastClass)
+ {
+ case LineBreakClass.MandatoryBreak:
+ case LineBreakClass.ContingentBreak:
+ case LineBreakClass.Exclamation:
+ case LineBreakClass.LineFeed:
+ case LineBreakClass.NextLine:
+ case LineBreakClass.Space:
+ case LineBreakClass.ZWSpace:
+
+ // Allow break
+ break;
+ case LineBreakClass.CombiningMark:
+ if (_lb22ex)
{
- continue;
+ // Allow break
+ _lb22ex = false;
+ break;
}
+
+ shouldBreak = false;
+ break;
+ default:
+ shouldBreak = false;
break;
}
+ }
- _curClass = _nextClass;
+ if (_lb8a)
+ {
+ shouldBreak = false;
+ }
- if (shouldBreak)
- {
- Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos);
- return true;
- }
+ // Rule LB21a
+ if (_lb21a && (_currentClass == LineBreakClass.Hyphen || _currentClass == LineBreakClass.BreakAfter))
+ {
+ shouldBreak = false;
+ _lb21a = false;
+ }
+ else
+ {
+ _lb21a = _currentClass == LineBreakClass.HebrewLetter;
}
- if (_pos >= _text.Length)
+ // Rule LB30a
+ if (_currentClass == LineBreakClass.RegionalIndicator)
{
- if (_lastPos < _text.Length)
+ _lb30a++;
+ if (_lb30a == 2 && _nextClass == LineBreakClass.RegionalIndicator)
{
- _lastPos = _text.Length;
- var cls = Codepoint.ReadAt(_text, _text.Length - 1, out _).LineBreakClass;
- bool required = cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn;
- Current = new LineBreak(FindPriorNonWhitespace(_text.Length), _text.Length, required);
- return true;
+ shouldBreak = true;
+ _lb30a = 0;
}
}
+ else
+ {
+ _lb30a = 0;
+ }
- return false;
- }
+ _currentClass = _nextClass;
+ return shouldBreak;
+ }
+
private int FindPriorNonWhitespace(int from)
{
if (from > 0)
@@ -163,7 +463,8 @@ namespace Avalonia.Media.TextFormatting.Unicode
var cls = cp.LineBreakClass;
- if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn)
+ if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed ||
+ cls == LineBreakClass.CarriageReturn)
{
from -= count;
}
@@ -184,61 +485,8 @@ namespace Avalonia.Media.TextFormatting.Unicode
break;
}
}
- return from;
- }
- // Get the next character class
- private LineBreakClass ReadCharClass()
- {
- var cp = Codepoint.ReadAt(_text, _pos, out var count);
-
- _pos += count;
-
- return MapClass(cp.LineBreakClass);
- }
-
- private LineBreakClass PeekCharClass()
- {
- return MapClass(Codepoint.ReadAt(_text, _pos, out _).LineBreakClass);
- }
-
- private static LineBreakClass MapClass(LineBreakClass c)
- {
- switch (c)
- {
- case LineBreakClass.Ambiguous:
- return LineBreakClass.Alphabetic;
-
- case LineBreakClass.ComplexContext:
- case LineBreakClass.Surrogate:
- case LineBreakClass.Unknown:
- return LineBreakClass.Alphabetic;
-
- case LineBreakClass.ConditionalJapaneseStarter:
- return LineBreakClass.Nonstarter;
-
- default:
- return c;
- }
- }
-
- private static LineBreakClass MapFirst(LineBreakClass c)
- {
- switch (c)
- {
- case LineBreakClass.LineFeed:
- case LineBreakClass.NextLine:
- return LineBreakClass.MandatoryBreak;
-
- case LineBreakClass.ContingentBreak:
- return LineBreakClass.BreakAfter;
-
- case LineBreakClass.Space:
- return LineBreakClass.WordJoiner;
-
- default:
- return c;
- }
+ return from;
}
}
}
diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs
new file mode 100644
index 0000000000..fd37eed68d
--- /dev/null
+++ b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs
@@ -0,0 +1,74 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+// Ported from: https://github.com/SixLabors/Fonts/
+
+namespace Avalonia.Media.TextFormatting.Unicode
+{
+ internal static class LineBreakPairTable
+ {
+ ///
+ /// Direct break opportunity
+ ///
+ public const byte DIBRK = 0;
+
+ ///
+ /// Indirect break opportunity
+ ///
+ public const byte INBRK = 1;
+
+ ///
+ /// Indirect break opportunity for combining marks
+ ///
+ public const byte CIBRK = 2;
+
+ ///
+ /// Prohibited break for combining marks
+ ///
+ public const byte CPBRK = 3;
+
+ ///
+ /// Prohibited break
+ ///
+ public const byte PRBRK = 4;
+
+ // Based on example pair table from https://www.unicode.org/reports/tr14/tr14-37.html#Table2
+ // - ZWJ special processing for LB8a
+ // - CB manually added as per Rule LB20
+ public static byte[][] Table { get; } = {
+ // . OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT RI EB EM ZWJ CB
+ new[] { PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, CPBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK }, // OP
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CL
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CP
+ new[] { PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // QU
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // GL
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NS
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EX
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // SY
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IS
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK }, // PR
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // PO
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NU
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // AL
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HL
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ID
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IN
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HY
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // BA
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK }, // BB
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // B2
+ new[] { DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK }, // ZW
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CM
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // WJ
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H2
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H3
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JL
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JV
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JT
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, DIBRK }, // RI
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK }, // EB
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EM
+ new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ZWJ
+ new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK } // CB
+ };
+ }
+}
diff --git a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt
index 814ce15d0a..93d531c700 100644
--- a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt
+++ b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt
@@ -1,7 +1,7 @@
OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT RI EB EM ZWJ CB
OP ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ @ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
-CL _ ^ ^ % % ^ ^ ^ ^ % % _ _ _ _ ^ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
-CP _ ^ ^ % % ^ ^ ^ ^ % % % % % _ ^ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
+CL _ ^ ^ % % ^ ^ ^ ^ % % _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
+CP _ ^ ^ % % ^ ^ ^ ^ % % % % % _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
QU ^ ^ ^ % % % ^ ^ ^ % % % % % % % % % % % ^ # ^ % % % % % % % % % %
GL % ^ ^ % % % ^ ^ ^ % % % % % % % % % % % ^ # ^ % % % % % % % % % %
NS _ ^ ^ % % % ^ ^ ^ _ _ _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
diff --git a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs
new file mode 100644
index 0000000000..a90be6d519
--- /dev/null
+++ b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs
@@ -0,0 +1,259 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Net.Http;
+using Avalonia.Media.TextFormatting.Unicode;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
+{
+ public class LineBreakEnumeratorTests
+ {
+ private readonly ITestOutputHelper _outputHelper;
+
+ public LineBreakEnumeratorTests(ITestOutputHelper outputHelper)
+ {
+ _outputHelper = outputHelper;
+ }
+
+ [Fact]
+ public void BasicLatinTest()
+ {
+ var lineBreaker = new LineBreakEnumerator("Hello World\r\nThis is a test.".AsMemory());
+
+ Assert.True(lineBreaker.MoveNext());
+ Assert.Equal(6, lineBreaker.Current.PositionWrap);
+ Assert.False(lineBreaker.Current.Required);
+
+ Assert.True(lineBreaker.MoveNext());
+ Assert.Equal(13, lineBreaker.Current.PositionWrap);
+ Assert.True(lineBreaker.Current.Required);
+
+ Assert.True(lineBreaker.MoveNext());
+ Assert.Equal(18, lineBreaker.Current.PositionWrap);
+ Assert.False(lineBreaker.Current.Required);
+
+ Assert.True(lineBreaker.MoveNext());
+ Assert.Equal(21, lineBreaker.Current.PositionWrap);
+ Assert.False(lineBreaker.Current.Required);
+
+ Assert.True(lineBreaker.MoveNext());
+ Assert.Equal(23, lineBreaker.Current.PositionWrap);
+ Assert.False(lineBreaker.Current.Required);
+
+ Assert.True(lineBreaker.MoveNext());
+ Assert.Equal(28, lineBreaker.Current.PositionWrap);
+ Assert.False(lineBreaker.Current.Required);
+
+ Assert.False(lineBreaker.MoveNext());
+ }
+
+
+ [Fact]
+ public void ForwardTextWithOuterWhitespace()
+ {
+ var lineBreaker = new LineBreakEnumerator(" Apples Pears Bananas ".AsMemory());
+ var positionsF = GetBreaks(lineBreaker);
+ Assert.Equal(1, positionsF[0].PositionWrap);
+ Assert.Equal(0, positionsF[0].PositionMeasure);
+ Assert.Equal(8, positionsF[1].PositionWrap);
+ Assert.Equal(7, positionsF[1].PositionMeasure);
+ Assert.Equal(14, positionsF[2].PositionWrap);
+ Assert.Equal(13, positionsF[2].PositionMeasure);
+ Assert.Equal(24, positionsF[3].PositionWrap);
+ Assert.Equal(21, positionsF[3].PositionMeasure);
+ }
+
+ private static List GetBreaks(LineBreakEnumerator lineBreaker)
+ {
+ var breaks = new List();
+
+ while (lineBreaker.MoveNext())
+ {
+ breaks.Add(lineBreaker.Current);
+ }
+
+ return breaks;
+ }
+
+ [Fact]
+ public void ForwardTest()
+ {
+ var lineBreaker = new LineBreakEnumerator("Apples Pears Bananas".AsMemory());
+
+ var positionsF = GetBreaks(lineBreaker);
+ Assert.Equal(7, positionsF[0].PositionWrap);
+ Assert.Equal(6, positionsF[0].PositionMeasure);
+ Assert.Equal(13, positionsF[1].PositionWrap);
+ Assert.Equal(12, positionsF[1].PositionMeasure);
+ Assert.Equal(20, positionsF[2].PositionWrap);
+ Assert.Equal(20, positionsF[2].PositionMeasure);
+ }
+
+ [Theory(Skip = "Only run when the Unicode spec changes.")]
+ [ClassData(typeof(LineBreakTestDataGenerator))]
+ public void ShouldFindBreaks(int lineNumber, int[] codePoints, int[] breakPoints)
+ {
+ var text = string.Join(null, codePoints.Select(char.ConvertFromUtf32));
+
+ var lineBreaker = new LineBreakEnumerator(text.AsMemory());
+
+ var foundBreaks = new List();
+
+ while (lineBreaker.MoveNext())
+ {
+ foundBreaks.Add(lineBreaker.Current.PositionWrap);
+ }
+
+ // Check the same
+ var pass = true;
+
+ if (foundBreaks.Count != breakPoints.Length)
+ {
+ pass = false;
+ }
+ else
+ {
+ for (var i = 0; i < foundBreaks.Count; i++)
+ {
+ if (foundBreaks[i] != breakPoints[i])
+ {
+ pass = false;
+ }
+ }
+ }
+
+ if (!pass)
+ {
+ _outputHelper.WriteLine($"Failed test on line {lineNumber}");
+ _outputHelper.WriteLine("");
+ _outputHelper.WriteLine($" Code Points: {string.Join(" ", codePoints)}");
+ _outputHelper.WriteLine($"Expected Breaks: {string.Join(" ", breakPoints)}");
+ _outputHelper.WriteLine($" Actual Breaks: {string.Join(" ", foundBreaks)}");
+ _outputHelper.WriteLine($" Text: {text}");
+ _outputHelper.WriteLine($" Char Props: {string.Join(" ", codePoints.Select(x => new Codepoint(x).LineBreakClass))}");
+ _outputHelper.WriteLine("");
+ }
+
+ Assert.True(pass);
+ }
+
+ private class LineBreakTestDataGenerator : IEnumerable