diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs deleted file mode 100644 index 86d39a4283..0000000000 --- a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs +++ /dev/null @@ -1,56 +0,0 @@ -namespace Avalonia.Media.TextFormatting.Unicode -{ - internal static class BreakPairTable - { - private static readonly byte[][] s_breakPairTable = - { - new byte[] {4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4}, - new byte[] {0,4,4,1,1,4,4,4,4,1,1,0,0,0,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,4,4,4,4,1,1,1,1,1,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {4,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1}, - new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,0,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,1,0,1,1,0,0,4,2,4,1,1,1,1,1,0,1,1,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,0,1,4,4,4,0,0,1,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,0,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,4,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,1,1,1,1,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,1,0,0,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,1,1,0}, - new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - new byte[] {0,4,4,1,1,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0}, - }; - - public static PairBreakType Map(LineBreakClass first, LineBreakClass second) - { - return (PairBreakType)s_breakPairTable[(int)first][(int)second]; - } - } - - internal enum PairBreakType : byte - { - DI = 0, // Direct break opportunity - IN = 1, // Indirect break opportunity - CI = 2, // Indirect break opportunity for combining marks - CP = 3, // Prohibited break for combining marks - PR = 4 // Prohibited break - } -} diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs index 2f46fdd9d0..43a95310c6 100644 --- a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs +++ b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs @@ -9,37 +9,40 @@ namespace Avalonia.Media.TextFormatting.Unicode /// public static readonly Codepoint ReplacementCodepoint = new Codepoint('\uFFFD'); - private readonly int _value; - public Codepoint(int value) { - _value = value; + Value = value; } + /// + /// Get the codepoint's value. + /// + public int Value { get; } + /// /// Gets the . /// - public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(_value); + public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(Value); /// /// Gets the . /// - public Script Script => UnicodeData.GetScript(_value); + public Script Script => UnicodeData.GetScript(Value); /// /// Gets the . /// - public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(_value); + public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(Value); /// /// Gets the . /// - public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(_value); + public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(Value); /// /// Gets the . /// - public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(_value); + public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(Value); /// /// Determines whether this is a break char. @@ -51,7 +54,7 @@ namespace Avalonia.Media.TextFormatting.Unicode { get { - switch (_value) + switch (Value) { case '\u000A': case '\u000B': @@ -93,12 +96,12 @@ namespace Avalonia.Media.TextFormatting.Unicode public static implicit operator int(Codepoint codepoint) { - return codepoint._value; + return codepoint.Value; } public static implicit operator uint(Codepoint codepoint) { - return (uint)codepoint._value; + return (uint)codepoint.Value; } /// @@ -112,7 +115,7 @@ namespace Avalonia.Media.TextFormatting.Unicode { count = 1; - if (index > text.Length) + if (index >= text.Length) { return ReplacementCodepoint; } diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs index 76bb9ac44f..4d02f94cad 100644 --- a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs +++ b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs @@ -1,160 +1,460 @@ -// RichTextKit -// Copyright © 2019 Topten Software. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may -// not use this product except in compliance with the License. You may obtain -// a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations -// under the License. -// -// Ported from: https://github.com/foliojs/linebreak -// Copied from: https://github.com/toptensoftware/RichTextKit +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. +// Ported from: https://github.com/SixLabors/Fonts/ using Avalonia.Utilities; namespace Avalonia.Media.TextFormatting.Unicode { /// - /// Implementation of the Unicode Line Break Algorithm + /// Implementation of the Unicode Line Break Algorithm. UAX:14 + /// /// public ref struct LineBreakEnumerator { - // State private readonly ReadOnlySlice _text; - private int _pos; - private int _lastPos; - private LineBreakClass? _curClass; - private LineBreakClass? _nextClass; + private int _position; + private int _lastPosition; + private LineBreakClass _currentClass; + private LineBreakClass _nextClass; + private bool _first; + private int _alphaNumericCount; + private bool _lb8a; + private bool _lb21a; + private bool _lb22ex; + private bool _lb24ex; + private bool _lb25ex; + private bool _lb30; + private int _lb30a; + private bool _lb31; public LineBreakEnumerator(ReadOnlySlice text) + : this() { _text = text; - _pos = 0; - _lastPos = 0; - _curClass = null; - _nextClass = null; - Current = default; + _position = 0; + _currentClass = LineBreakClass.Unknown; + _nextClass = LineBreakClass.Unknown; + _first = true; + _lb8a = false; + _lb21a = false; + _lb22ex = false; + _lb24ex = false; + _lb25ex = false; + _alphaNumericCount = 0; + _lb31 = false; + _lb30 = false; + _lb30a = 0; } - + public LineBreak Current { get; private set; } - + public bool MoveNext() { - // get the first char if we're at the beginning of the string - if (!_curClass.HasValue) + // Get the first char if we're at the beginning of the string. + if (_first) { - _curClass = PeekCharClass() == LineBreakClass.Space ? LineBreakClass.WordJoiner : MapFirst(ReadCharClass()); + var firstClass = NextCharClass(); + _first = false; + _currentClass = MapFirst(firstClass); + _nextClass = firstClass; + _lb8a = firstClass == LineBreakClass.ZWJ; + _lb30a = 0; } - while (_pos < _text.Length) + while (_position < _text.Length) { - _lastPos = _pos; + _lastPosition = _position; var lastClass = _nextClass; - _nextClass = ReadCharClass(); + _nextClass = NextCharClass(); - // explicit newline - if (_curClass.HasValue && (_curClass == LineBreakClass.MandatoryBreak || _curClass == LineBreakClass.CarriageReturn && _nextClass != LineBreakClass.LineFeed)) + // Explicit newline + switch (_currentClass) { - _curClass = MapFirst(MapClass(_nextClass.Value)); - Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true); + case LineBreakClass.MandatoryBreak: + case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed: + { + _currentClass = MapFirst(_nextClass); + Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, true); + return true; + } + } + + var shouldBreak = GetSimpleBreak() ?? (bool?)GetPairTableBreak(lastClass); + + // Rule LB8a + _lb8a = _nextClass == LineBreakClass.ZWJ; + + if (shouldBreak.Value) + { + Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition); return true; } + } - // handle classes not handled by the pair table - LineBreakClass? cur = null; - switch (_nextClass.Value) + if (_position >= _text.Length) + { + if (_lastPosition < _text.Length) { - case LineBreakClass.Space: - cur = _curClass; - break; + _lastPosition = _text.Length; + + var required = false; + + switch (_currentClass) + { + case LineBreakClass.MandatoryBreak: + case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed: + required = true; + break; + } + + Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, required); + return true; + } + } + + Current = default; + + return false; + } + + private static LineBreakClass MapClass(Codepoint cp) + { + if (cp.Value == 327685) + { + return LineBreakClass.Alphabetic; + } + + // LB 1 + // ========================================== + // Resolved Original General_Category + // ========================================== + // AL AI, SG, XX Any + // CM SA Only Mn or Mc + // AL SA Any except Mn and Mc + // NS CJ Any + switch (cp.LineBreakClass) + { + case LineBreakClass.Ambiguous: + case LineBreakClass.Surrogate: + case LineBreakClass.Unknown: + return LineBreakClass.Alphabetic; + case LineBreakClass.ComplexContext: + return cp.GeneralCategory == GeneralCategory.NonspacingMark || cp.GeneralCategory == GeneralCategory.SpacingMark + ? LineBreakClass.CombiningMark + : LineBreakClass.Alphabetic; + + case LineBreakClass.ConditionalJapaneseStarter: + return LineBreakClass.Nonstarter; + + default: + return cp.LineBreakClass; + } + } + + private static LineBreakClass MapFirst(LineBreakClass c) + { + switch (c) + { + case LineBreakClass.LineFeed: + case LineBreakClass.NextLine: + return LineBreakClass.MandatoryBreak; + + case LineBreakClass.Space: + return LineBreakClass.WordJoiner; + + default: + return c; + } + } + + private static bool IsAlphaNumeric(LineBreakClass cls) + => cls == LineBreakClass.Alphabetic + || cls == LineBreakClass.HebrewLetter + || cls == LineBreakClass.Numeric; + + private LineBreakClass PeekNextCharClass() + { + var cp = Codepoint.ReadAt(_text, _position, out _); + + return MapClass(cp); + } + + // Get the next character class + private LineBreakClass NextCharClass() + { + var cp = Codepoint.ReadAt(_text, _position, out var count); + var cls = MapClass(cp); + _position += count; + + // Keep track of alphanumeric + any combining marks. + // This is used for LB22 and LB30. + if (IsAlphaNumeric(_currentClass) || _alphaNumericCount > 0 && cls == LineBreakClass.CombiningMark) + { + _alphaNumericCount++; + } + + // Track combining mark exceptions. LB22 + if (cls == LineBreakClass.CombiningMark) + { + switch (_currentClass) + { case LineBreakClass.MandatoryBreak: + case LineBreakClass.ContingentBreak: + case LineBreakClass.Exclamation: case LineBreakClass.LineFeed: case LineBreakClass.NextLine: - cur = LineBreakClass.MandatoryBreak; - break; - + case LineBreakClass.Space: + case LineBreakClass.ZWSpace: case LineBreakClass.CarriageReturn: - cur = LineBreakClass.CarriageReturn; + _lb22ex = true; break; + } + } + // Track combining mark exceptions. LB31 + if (_first && cls == LineBreakClass.CombiningMark) + { + _lb31 = true; + } + + if (cls == LineBreakClass.CombiningMark) + { + switch (_currentClass) + { + case LineBreakClass.MandatoryBreak: case LineBreakClass.ContingentBreak: - cur = LineBreakClass.BreakAfter; + case LineBreakClass.Exclamation: + case LineBreakClass.LineFeed: + case LineBreakClass.NextLine: + case LineBreakClass.Space: + case LineBreakClass.ZWSpace: + case LineBreakClass.CarriageReturn: + case LineBreakClass.ZWJ: + _lb31 = true; break; } + } + + if (_first + && (cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space)) + { + _lb31 = true; + } + + if (_currentClass == LineBreakClass.Alphabetic && + (cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space)) + { + _lb31 = true; + } + + // Reset LB31 if next is U+0028 (Left Opening Parenthesis) + if (_lb31 + && _currentClass != LineBreakClass.PostfixNumeric + && _currentClass != LineBreakClass.PrefixNumeric + && cls == LineBreakClass.OpenPunctuation && cp.Value == 0x0028) + { + _lb31 = false; + } + + // Rule LB24 + if (_first && (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.CloseParenthesis)) + { + _lb24ex = true; + } - if (cur != null) + // Rule LB25 + if (_first + && (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.InfixNumeric || cls == LineBreakClass.BreakSymbols)) + { + _lb25ex = true; + } + + if (cls == LineBreakClass.Space || cls == LineBreakClass.WordJoiner || cls == LineBreakClass.Alphabetic) + { + var next = PeekNextCharClass(); + if (next == LineBreakClass.ClosePunctuation || next == LineBreakClass.InfixNumeric || next == LineBreakClass.BreakSymbols) { - _curClass = cur; + _lb25ex = true; + } + } + + // AlphaNumeric + and combining marks can break for OP except. + // - U+0028 (Left Opening Parenthesis) + // - U+005B (Opening Square Bracket) + // - U+007B (Left Curly Bracket) + // See custom colums|rules in the text pair table. + // https://www.unicode.org/Public/13.0.0/ucd/auxiliary/LineBreakTest.html + _lb30 = _alphaNumericCount > 0 + && cls == LineBreakClass.OpenPunctuation + && cp.Value != 0x0028 + && cp.Value != 0x005B + && cp.Value != 0x007B; + + return cls; + } + + private bool? GetSimpleBreak() + { + // handle classes not handled by the pair table + switch (_nextClass) + { + case LineBreakClass.Space: + return false; - if (_nextClass.Value == LineBreakClass.MandatoryBreak) + case LineBreakClass.MandatoryBreak: + case LineBreakClass.LineFeed: + case LineBreakClass.NextLine: + _currentClass = LineBreakClass.MandatoryBreak; + return false; + + case LineBreakClass.CarriageReturn: + _currentClass = LineBreakClass.CarriageReturn; + return false; + } + + return null; + } + + private bool GetPairTableBreak(LineBreakClass lastClass) + { + // If not handled already, use the pair table + bool shouldBreak = false; + switch (LineBreakPairTable.Table[(int)_currentClass][(int)_nextClass]) + { + case LineBreakPairTable.DIBRK: // Direct break + shouldBreak = true; + break; + + // TODO: Rewrite this so that it defaults to true and rules are set as exceptions. + case LineBreakPairTable.INBRK: // Possible indirect break + + // LB31 + if (_lb31 && _nextClass == LineBreakClass.OpenPunctuation) { - _lastPos = _pos; - Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true); - return true; + shouldBreak = true; + _lb31 = false; + break; } - continue; - } - - // if not handled already, use the pair table - var shouldBreak = false; - switch (BreakPairTable.Map(_curClass.Value,_nextClass.Value)) - { - case PairBreakType.DI: // Direct break + // LB30 + if (_lb30) + { shouldBreak = true; + _lb30 = false; + _alphaNumericCount = 0; break; + } - case PairBreakType.IN: // possible indirect break - shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space; + // LB25 + if (_lb25ex && (_nextClass == LineBreakClass.PrefixNumeric || _nextClass == LineBreakClass.Numeric)) + { + shouldBreak = true; + _lb25ex = false; break; + } - case PairBreakType.CI: - shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space; - if (!shouldBreak) - { - continue; - } + // LB24 + if (_lb24ex && (_nextClass == LineBreakClass.PostfixNumeric || _nextClass == LineBreakClass.PrefixNumeric)) + { + shouldBreak = true; + _lb24ex = false; break; + } + + // LB18 + shouldBreak = lastClass == LineBreakClass.Space; + break; + + case LineBreakPairTable.CIBRK: + shouldBreak = lastClass == LineBreakClass.Space; + if (!shouldBreak) + { + return false; + } - case PairBreakType.CP: // prohibited for combining marks - if (!lastClass.HasValue || lastClass.Value != LineBreakClass.Space) + break; + + case LineBreakPairTable.CPBRK: // prohibited for combining marks + if (lastClass != LineBreakClass.Space) + { + return false; + } + + break; + + case LineBreakPairTable.PRBRK: + break; + } + + // Rule LB22 + if (_nextClass == LineBreakClass.Inseparable) + { + switch (lastClass) + { + case LineBreakClass.MandatoryBreak: + case LineBreakClass.ContingentBreak: + case LineBreakClass.Exclamation: + case LineBreakClass.LineFeed: + case LineBreakClass.NextLine: + case LineBreakClass.Space: + case LineBreakClass.ZWSpace: + + // Allow break + break; + case LineBreakClass.CombiningMark: + if (_lb22ex) { - continue; + // Allow break + _lb22ex = false; + break; } + + shouldBreak = false; + break; + default: + shouldBreak = false; break; } + } - _curClass = _nextClass; + if (_lb8a) + { + shouldBreak = false; + } - if (shouldBreak) - { - Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos); - return true; - } + // Rule LB21a + if (_lb21a && (_currentClass == LineBreakClass.Hyphen || _currentClass == LineBreakClass.BreakAfter)) + { + shouldBreak = false; + _lb21a = false; + } + else + { + _lb21a = _currentClass == LineBreakClass.HebrewLetter; } - if (_pos >= _text.Length) + // Rule LB30a + if (_currentClass == LineBreakClass.RegionalIndicator) { - if (_lastPos < _text.Length) + _lb30a++; + if (_lb30a == 2 && _nextClass == LineBreakClass.RegionalIndicator) { - _lastPos = _text.Length; - var cls = Codepoint.ReadAt(_text, _text.Length - 1, out _).LineBreakClass; - bool required = cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn; - Current = new LineBreak(FindPriorNonWhitespace(_text.Length), _text.Length, required); - return true; + shouldBreak = true; + _lb30a = 0; } } + else + { + _lb30a = 0; + } - return false; - } + _currentClass = _nextClass; + return shouldBreak; + } + private int FindPriorNonWhitespace(int from) { if (from > 0) @@ -163,7 +463,8 @@ namespace Avalonia.Media.TextFormatting.Unicode var cls = cp.LineBreakClass; - if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn) + if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || + cls == LineBreakClass.CarriageReturn) { from -= count; } @@ -184,61 +485,8 @@ namespace Avalonia.Media.TextFormatting.Unicode break; } } - return from; - } - // Get the next character class - private LineBreakClass ReadCharClass() - { - var cp = Codepoint.ReadAt(_text, _pos, out var count); - - _pos += count; - - return MapClass(cp.LineBreakClass); - } - - private LineBreakClass PeekCharClass() - { - return MapClass(Codepoint.ReadAt(_text, _pos, out _).LineBreakClass); - } - - private static LineBreakClass MapClass(LineBreakClass c) - { - switch (c) - { - case LineBreakClass.Ambiguous: - return LineBreakClass.Alphabetic; - - case LineBreakClass.ComplexContext: - case LineBreakClass.Surrogate: - case LineBreakClass.Unknown: - return LineBreakClass.Alphabetic; - - case LineBreakClass.ConditionalJapaneseStarter: - return LineBreakClass.Nonstarter; - - default: - return c; - } - } - - private static LineBreakClass MapFirst(LineBreakClass c) - { - switch (c) - { - case LineBreakClass.LineFeed: - case LineBreakClass.NextLine: - return LineBreakClass.MandatoryBreak; - - case LineBreakClass.ContingentBreak: - return LineBreakClass.BreakAfter; - - case LineBreakClass.Space: - return LineBreakClass.WordJoiner; - - default: - return c; - } + return from; } } } diff --git a/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs new file mode 100644 index 0000000000..fd37eed68d --- /dev/null +++ b/src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs @@ -0,0 +1,74 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. +// Ported from: https://github.com/SixLabors/Fonts/ + +namespace Avalonia.Media.TextFormatting.Unicode +{ + internal static class LineBreakPairTable + { + /// + /// Direct break opportunity + /// + public const byte DIBRK = 0; + + /// + /// Indirect break opportunity + /// + public const byte INBRK = 1; + + /// + /// Indirect break opportunity for combining marks + /// + public const byte CIBRK = 2; + + /// + /// Prohibited break for combining marks + /// + public const byte CPBRK = 3; + + /// + /// Prohibited break + /// + public const byte PRBRK = 4; + + // Based on example pair table from https://www.unicode.org/reports/tr14/tr14-37.html#Table2 + // - ZWJ special processing for LB8a + // - CB manually added as per Rule LB20 + public static byte[][] Table { get; } = { + // . OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT RI EB EM ZWJ CB + new[] { PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, CPBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK }, // OP + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CL + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CP + new[] { PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // QU + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // GL + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NS + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EX + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // SY + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IS + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK }, // PR + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // PO + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NU + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // AL + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HL + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ID + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IN + new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HY + new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // BA + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK }, // BB + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // B2 + new[] { DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK }, // ZW + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CM + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // WJ + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H2 + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H3 + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JL + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JV + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JT + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, DIBRK }, // RI + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK }, // EB + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EM + new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ZWJ + new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK } // CB + }; + } +} diff --git a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt index 814ce15d0a..93d531c700 100644 --- a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt +++ b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt @@ -1,7 +1,7 @@ OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT RI EB EM ZWJ CB OP ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ @ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ -CL _ ^ ^ % % ^ ^ ^ ^ % % _ _ _ _ ^ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _ -CP _ ^ ^ % % ^ ^ ^ ^ % % % % % _ ^ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _ +CL _ ^ ^ % % ^ ^ ^ ^ % % _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _ +CP _ ^ ^ % % ^ ^ ^ ^ % % % % % _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _ QU ^ ^ ^ % % % ^ ^ ^ % % % % % % % % % % % ^ # ^ % % % % % % % % % % GL % ^ ^ % % % ^ ^ ^ % % % % % % % % % % % ^ # ^ % % % % % % % % % % NS _ ^ ^ % % % ^ ^ ^ _ _ _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _ diff --git a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs new file mode 100644 index 0000000000..a90be6d519 --- /dev/null +++ b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs @@ -0,0 +1,259 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net.Http; +using Avalonia.Media.TextFormatting.Unicode; +using Xunit; +using Xunit.Abstractions; + +namespace Avalonia.Visuals.UnitTests.Media.TextFormatting +{ + public class LineBreakEnumeratorTests + { + private readonly ITestOutputHelper _outputHelper; + + public LineBreakEnumeratorTests(ITestOutputHelper outputHelper) + { + _outputHelper = outputHelper; + } + + [Fact] + public void BasicLatinTest() + { + var lineBreaker = new LineBreakEnumerator("Hello World\r\nThis is a test.".AsMemory()); + + Assert.True(lineBreaker.MoveNext()); + Assert.Equal(6, lineBreaker.Current.PositionWrap); + Assert.False(lineBreaker.Current.Required); + + Assert.True(lineBreaker.MoveNext()); + Assert.Equal(13, lineBreaker.Current.PositionWrap); + Assert.True(lineBreaker.Current.Required); + + Assert.True(lineBreaker.MoveNext()); + Assert.Equal(18, lineBreaker.Current.PositionWrap); + Assert.False(lineBreaker.Current.Required); + + Assert.True(lineBreaker.MoveNext()); + Assert.Equal(21, lineBreaker.Current.PositionWrap); + Assert.False(lineBreaker.Current.Required); + + Assert.True(lineBreaker.MoveNext()); + Assert.Equal(23, lineBreaker.Current.PositionWrap); + Assert.False(lineBreaker.Current.Required); + + Assert.True(lineBreaker.MoveNext()); + Assert.Equal(28, lineBreaker.Current.PositionWrap); + Assert.False(lineBreaker.Current.Required); + + Assert.False(lineBreaker.MoveNext()); + } + + + [Fact] + public void ForwardTextWithOuterWhitespace() + { + var lineBreaker = new LineBreakEnumerator(" Apples Pears Bananas ".AsMemory()); + var positionsF = GetBreaks(lineBreaker); + Assert.Equal(1, positionsF[0].PositionWrap); + Assert.Equal(0, positionsF[0].PositionMeasure); + Assert.Equal(8, positionsF[1].PositionWrap); + Assert.Equal(7, positionsF[1].PositionMeasure); + Assert.Equal(14, positionsF[2].PositionWrap); + Assert.Equal(13, positionsF[2].PositionMeasure); + Assert.Equal(24, positionsF[3].PositionWrap); + Assert.Equal(21, positionsF[3].PositionMeasure); + } + + private static List GetBreaks(LineBreakEnumerator lineBreaker) + { + var breaks = new List(); + + while (lineBreaker.MoveNext()) + { + breaks.Add(lineBreaker.Current); + } + + return breaks; + } + + [Fact] + public void ForwardTest() + { + var lineBreaker = new LineBreakEnumerator("Apples Pears Bananas".AsMemory()); + + var positionsF = GetBreaks(lineBreaker); + Assert.Equal(7, positionsF[0].PositionWrap); + Assert.Equal(6, positionsF[0].PositionMeasure); + Assert.Equal(13, positionsF[1].PositionWrap); + Assert.Equal(12, positionsF[1].PositionMeasure); + Assert.Equal(20, positionsF[2].PositionWrap); + Assert.Equal(20, positionsF[2].PositionMeasure); + } + + [Theory(Skip = "Only run when the Unicode spec changes.")] + [ClassData(typeof(LineBreakTestDataGenerator))] + public void ShouldFindBreaks(int lineNumber, int[] codePoints, int[] breakPoints) + { + var text = string.Join(null, codePoints.Select(char.ConvertFromUtf32)); + + var lineBreaker = new LineBreakEnumerator(text.AsMemory()); + + var foundBreaks = new List(); + + while (lineBreaker.MoveNext()) + { + foundBreaks.Add(lineBreaker.Current.PositionWrap); + } + + // Check the same + var pass = true; + + if (foundBreaks.Count != breakPoints.Length) + { + pass = false; + } + else + { + for (var i = 0; i < foundBreaks.Count; i++) + { + if (foundBreaks[i] != breakPoints[i]) + { + pass = false; + } + } + } + + if (!pass) + { + _outputHelper.WriteLine($"Failed test on line {lineNumber}"); + _outputHelper.WriteLine(""); + _outputHelper.WriteLine($" Code Points: {string.Join(" ", codePoints)}"); + _outputHelper.WriteLine($"Expected Breaks: {string.Join(" ", breakPoints)}"); + _outputHelper.WriteLine($" Actual Breaks: {string.Join(" ", foundBreaks)}"); + _outputHelper.WriteLine($" Text: {text}"); + _outputHelper.WriteLine($" Char Props: {string.Join(" ", codePoints.Select(x => new Codepoint(x).LineBreakClass))}"); + _outputHelper.WriteLine(""); + } + + Assert.True(pass); + } + + private class LineBreakTestDataGenerator : IEnumerable + { + private readonly List _testData; + + public LineBreakTestDataGenerator() + { + _testData = GenerateTestData(); + } + + public IEnumerator GetEnumerator() + { + return _testData.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + private static List GenerateTestData() + { + // Process each line + var tests = new List(); + + // Read the test file + var url = Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/LineBreakTest.txt"); + + using (var client = new HttpClient()) + using (var result = client.GetAsync(url).GetAwaiter().GetResult()) + { + if (!result.IsSuccessStatusCode) + { + return tests; + } + + using (var stream = result.Content.ReadAsStreamAsync().GetAwaiter().GetResult()) + using (var reader = new StreamReader(stream)) + { + var lineNumber = 1; + + while (!reader.EndOfStream) + { + var line = reader.ReadLine(); + + if (line is null) + { + break; + } + + // Get the line, remove comments + line = line.Split('#')[0].Trim(); + + // Ignore blank/comment only lines + if (string.IsNullOrWhiteSpace(line)) + { + lineNumber++; + continue; + } + + var codePoints = new List(); + var breakPoints = new List(); + + // Parse the test + var p = 0; + + while (p < line.Length) + { + // Ignore white space + if (char.IsWhiteSpace(line[p])) + { + p++; + continue; + } + + if (line[p] == '×') + { + p++; + continue; + } + + if (line[p] == '÷') + { + breakPoints.Add(codePoints.Select(x=> x > ushort.MaxValue ? 2 : 1).Sum()); + p++; + continue; + } + + var codePointPos = p; + + while (p < line.Length && IsHexDigit(line[p])) + { + p++; + } + + var codePointStr = line.Substring(codePointPos, p - codePointPos); + var codePoint = Convert.ToInt32(codePointStr, 16); + codePoints.Add(codePoint); + } + + tests.Add(new object[] { lineNumber, codePoints.ToArray(), breakPoints.ToArray() }); + + lineNumber++; + } + } + } + + return tests; + } + + private static bool IsHexDigit(char ch) + { + return char.IsDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'); + } + } + } +} diff --git a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakerTests.cs b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakerTests.cs deleted file mode 100644 index 3d489af3a2..0000000000 --- a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakerTests.cs +++ /dev/null @@ -1,56 +0,0 @@ -using System; -using Avalonia.Media.TextFormatting.Unicode; -using Avalonia.Utilities; -using Xunit; - -namespace Avalonia.Visuals.UnitTests.Media.TextFormatting -{ - public class LineBreakerTests - { - [Fact] - public void Should_Split_Text_By_Explicit_Breaks() - { - //ABC [0 3] - //DEF\r[4 7] - //\r[8] - //Hello\r\n[9 15] - const string text = "ABC DEF\r\rHELLO\r\n"; - - var buffer = new ReadOnlySlice(text.AsMemory()); - - var lineBreaker = new LineBreakEnumerator(buffer); - - var current = 0; - - Assert.True(lineBreaker.MoveNext()); - - var a = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1); - - Assert.Equal("ABC ", a); - - current += a.Length; - - Assert.True(lineBreaker.MoveNext()); - - var b = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1); - - Assert.Equal("DEF\r", b); - - current += b.Length; - - Assert.True(lineBreaker.MoveNext()); - - var c = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1); - - Assert.Equal("\r", c); - - current += c.Length; - - Assert.True(lineBreaker.MoveNext()); - - var d = text.Substring(current, text.Length - current); - - Assert.Equal("HELLO\r\n", d); - } - } -} diff --git a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/UnicodeDataGeneratorTests.cs b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/UnicodeDataGeneratorTests.cs index 5c705ba0c7..47aef84533 100644 --- a/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/UnicodeDataGeneratorTests.cs +++ b/tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/UnicodeDataGeneratorTests.cs @@ -1,6 +1,4 @@ -using System; -using Avalonia.Media.TextFormatting.Unicode; -using Xunit; +using Xunit; namespace Avalonia.Visuals.UnitTests.Media.TextFormatting { @@ -15,26 +13,5 @@ namespace Avalonia.Visuals.UnitTests.Media.TextFormatting { UnicodeDataGenerator.Execute(); } - [Theory(Skip = "Only run when we update the trie.")] - [ClassData(typeof(LineBreakTestDataGenerator))] - - public void Should_Enumerate_LineBreaks(string text, int expectedLength) - { - var textMemory = text.AsMemory(); - - var enumerator = new LineBreakEnumerator(textMemory); - - Assert.True(enumerator.MoveNext()); - - Assert.Equal(expectedLength, enumerator.Current.PositionWrap); - } - - private class LineBreakTestDataGenerator : TestDataGenerator - { - public LineBreakTestDataGenerator() - : base("auxiliary/LineBreakTest.txt") - { - } - } } }