Browse Source

Merge pull request #5410 from Gillibald/fixes/LineBreakEnumerator

Replace LineBreakEnumerator implementation
pull/5462/head
Benedikt Stebner 5 years ago
committed by GitHub
parent
commit
6ff478bdb1
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 56
      src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs
  2. 27
      src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs
  3. 540
      src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs
  4. 74
      src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs
  5. 4
      tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt
  6. 259
      tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs
  7. 56
      tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakerTests.cs
  8. 25
      tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/UnicodeDataGeneratorTests.cs

56
src/Avalonia.Visuals/Media/TextFormatting/Unicode/BreakPairTable.cs

@ -1,56 +0,0 @@
namespace Avalonia.Media.TextFormatting.Unicode
{
internal static class BreakPairTable
{
private static readonly byte[][] s_breakPairTable =
{
new byte[] {4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4},
new byte[] {0,4,4,1,1,4,4,4,4,1,1,0,0,0,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,4,4,4,4,1,1,1,1,1,0,4,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {4,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,0,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,1,0,1,1,0,0,4,2,4,1,1,1,1,1,0,1,1,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,0,0,1,1,1,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,0,1,4,4,4,0,0,1,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,0,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,4,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,1,1,1,1,1,1,4,2,4,1,1,1,1,1,1,1,1,1,1},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,1,1,1,1,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,1,1,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,1,0,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,0,0,0,0,0,0,1,1,0,0,4,2,4,0,0,0,0,0,1,0,0,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,1,1,0},
new byte[] {0,4,4,1,1,1,4,4,4,0,1,0,0,0,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {1,4,4,1,1,1,4,4,4,1,1,1,1,1,0,1,1,1,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
new byte[] {0,4,4,1,1,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,4,2,4,0,0,0,0,0,0,0,0,1,0},
};
public static PairBreakType Map(LineBreakClass first, LineBreakClass second)
{
return (PairBreakType)s_breakPairTable[(int)first][(int)second];
}
}
internal enum PairBreakType : byte
{
DI = 0, // Direct break opportunity
IN = 1, // Indirect break opportunity
CI = 2, // Indirect break opportunity for combining marks
CP = 3, // Prohibited break for combining marks
PR = 4 // Prohibited break
}
}

27
src/Avalonia.Visuals/Media/TextFormatting/Unicode/Codepoint.cs

@ -9,37 +9,40 @@ namespace Avalonia.Media.TextFormatting.Unicode
/// </summary>
public static readonly Codepoint ReplacementCodepoint = new Codepoint('\uFFFD');
private readonly int _value;
public Codepoint(int value)
{
_value = value;
Value = value;
}
/// <summary>
/// Get the codepoint's value.
/// </summary>
public int Value { get; }
/// <summary>
/// Gets the <see cref="Unicode.GeneralCategory"/>.
/// </summary>
public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(_value);
public GeneralCategory GeneralCategory => UnicodeData.GetGeneralCategory(Value);
/// <summary>
/// Gets the <see cref="Unicode.Script"/>.
/// </summary>
public Script Script => UnicodeData.GetScript(_value);
public Script Script => UnicodeData.GetScript(Value);
/// <summary>
/// Gets the <see cref="Unicode.BiDiClass"/>.
/// </summary>
public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(_value);
public BiDiClass BiDiClass => UnicodeData.GetBiDiClass(Value);
/// <summary>
/// Gets the <see cref="Unicode.LineBreakClass"/>.
/// </summary>
public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(_value);
public LineBreakClass LineBreakClass => UnicodeData.GetLineBreakClass(Value);
/// <summary>
/// Gets the <see cref="GraphemeBreakClass"/>.
/// </summary>
public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(_value);
public GraphemeBreakClass GraphemeBreakClass => UnicodeData.GetGraphemeClusterBreak(Value);
/// <summary>
/// Determines whether this <see cref="Codepoint"/> is a break char.
@ -51,7 +54,7 @@ namespace Avalonia.Media.TextFormatting.Unicode
{
get
{
switch (_value)
switch (Value)
{
case '\u000A':
case '\u000B':
@ -93,12 +96,12 @@ namespace Avalonia.Media.TextFormatting.Unicode
public static implicit operator int(Codepoint codepoint)
{
return codepoint._value;
return codepoint.Value;
}
public static implicit operator uint(Codepoint codepoint)
{
return (uint)codepoint._value;
return (uint)codepoint.Value;
}
/// <summary>
@ -112,7 +115,7 @@ namespace Avalonia.Media.TextFormatting.Unicode
{
count = 1;
if (index > text.Length)
if (index >= text.Length)
{
return ReplacementCodepoint;
}

540
src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakEnumerator.cs

@ -1,160 +1,460 @@
// RichTextKit
// Copyright © 2019 Topten Software. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this product except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
//
// Ported from: https://github.com/foliojs/linebreak
// Copied from: https://github.com/toptensoftware/RichTextKit
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
// Ported from: https://github.com/SixLabors/Fonts/
using Avalonia.Utilities;
namespace Avalonia.Media.TextFormatting.Unicode
{
/// <summary>
/// Implementation of the Unicode Line Break Algorithm
/// Implementation of the Unicode Line Break Algorithm. UAX:14
/// <see href="https://www.unicode.org/reports/tr14/tr14-37.html"/>
/// </summary>
public ref struct LineBreakEnumerator
{
// State
private readonly ReadOnlySlice<char> _text;
private int _pos;
private int _lastPos;
private LineBreakClass? _curClass;
private LineBreakClass? _nextClass;
private int _position;
private int _lastPosition;
private LineBreakClass _currentClass;
private LineBreakClass _nextClass;
private bool _first;
private int _alphaNumericCount;
private bool _lb8a;
private bool _lb21a;
private bool _lb22ex;
private bool _lb24ex;
private bool _lb25ex;
private bool _lb30;
private int _lb30a;
private bool _lb31;
public LineBreakEnumerator(ReadOnlySlice<char> text)
: this()
{
_text = text;
_pos = 0;
_lastPos = 0;
_curClass = null;
_nextClass = null;
Current = default;
_position = 0;
_currentClass = LineBreakClass.Unknown;
_nextClass = LineBreakClass.Unknown;
_first = true;
_lb8a = false;
_lb21a = false;
_lb22ex = false;
_lb24ex = false;
_lb25ex = false;
_alphaNumericCount = 0;
_lb31 = false;
_lb30 = false;
_lb30a = 0;
}
public LineBreak Current { get; private set; }
public bool MoveNext()
{
// get the first char if we're at the beginning of the string
if (!_curClass.HasValue)
// Get the first char if we're at the beginning of the string.
if (_first)
{
_curClass = PeekCharClass() == LineBreakClass.Space ? LineBreakClass.WordJoiner : MapFirst(ReadCharClass());
var firstClass = NextCharClass();
_first = false;
_currentClass = MapFirst(firstClass);
_nextClass = firstClass;
_lb8a = firstClass == LineBreakClass.ZWJ;
_lb30a = 0;
}
while (_pos < _text.Length)
while (_position < _text.Length)
{
_lastPos = _pos;
_lastPosition = _position;
var lastClass = _nextClass;
_nextClass = ReadCharClass();
_nextClass = NextCharClass();
// explicit newline
if (_curClass.HasValue && (_curClass == LineBreakClass.MandatoryBreak || _curClass == LineBreakClass.CarriageReturn && _nextClass != LineBreakClass.LineFeed))
// Explicit newline
switch (_currentClass)
{
_curClass = MapFirst(MapClass(_nextClass.Value));
Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true);
case LineBreakClass.MandatoryBreak:
case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed:
{
_currentClass = MapFirst(_nextClass);
Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, true);
return true;
}
}
var shouldBreak = GetSimpleBreak() ?? (bool?)GetPairTableBreak(lastClass);
// Rule LB8a
_lb8a = _nextClass == LineBreakClass.ZWJ;
if (shouldBreak.Value)
{
Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition);
return true;
}
}
// handle classes not handled by the pair table
LineBreakClass? cur = null;
switch (_nextClass.Value)
if (_position >= _text.Length)
{
if (_lastPosition < _text.Length)
{
case LineBreakClass.Space:
cur = _curClass;
break;
_lastPosition = _text.Length;
var required = false;
switch (_currentClass)
{
case LineBreakClass.MandatoryBreak:
case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed:
required = true;
break;
}
Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, required);
return true;
}
}
Current = default;
return false;
}
private static LineBreakClass MapClass(Codepoint cp)
{
if (cp.Value == 327685)
{
return LineBreakClass.Alphabetic;
}
// LB 1
// ==========================================
// Resolved Original General_Category
// ==========================================
// AL AI, SG, XX Any
// CM SA Only Mn or Mc
// AL SA Any except Mn and Mc
// NS CJ Any
switch (cp.LineBreakClass)
{
case LineBreakClass.Ambiguous:
case LineBreakClass.Surrogate:
case LineBreakClass.Unknown:
return LineBreakClass.Alphabetic;
case LineBreakClass.ComplexContext:
return cp.GeneralCategory == GeneralCategory.NonspacingMark || cp.GeneralCategory == GeneralCategory.SpacingMark
? LineBreakClass.CombiningMark
: LineBreakClass.Alphabetic;
case LineBreakClass.ConditionalJapaneseStarter:
return LineBreakClass.Nonstarter;
default:
return cp.LineBreakClass;
}
}
private static LineBreakClass MapFirst(LineBreakClass c)
{
switch (c)
{
case LineBreakClass.LineFeed:
case LineBreakClass.NextLine:
return LineBreakClass.MandatoryBreak;
case LineBreakClass.Space:
return LineBreakClass.WordJoiner;
default:
return c;
}
}
private static bool IsAlphaNumeric(LineBreakClass cls)
=> cls == LineBreakClass.Alphabetic
|| cls == LineBreakClass.HebrewLetter
|| cls == LineBreakClass.Numeric;
private LineBreakClass PeekNextCharClass()
{
var cp = Codepoint.ReadAt(_text, _position, out _);
return MapClass(cp);
}
// Get the next character class
private LineBreakClass NextCharClass()
{
var cp = Codepoint.ReadAt(_text, _position, out var count);
var cls = MapClass(cp);
_position += count;
// Keep track of alphanumeric + any combining marks.
// This is used for LB22 and LB30.
if (IsAlphaNumeric(_currentClass) || _alphaNumericCount > 0 && cls == LineBreakClass.CombiningMark)
{
_alphaNumericCount++;
}
// Track combining mark exceptions. LB22
if (cls == LineBreakClass.CombiningMark)
{
switch (_currentClass)
{
case LineBreakClass.MandatoryBreak:
case LineBreakClass.ContingentBreak:
case LineBreakClass.Exclamation:
case LineBreakClass.LineFeed:
case LineBreakClass.NextLine:
cur = LineBreakClass.MandatoryBreak;
break;
case LineBreakClass.Space:
case LineBreakClass.ZWSpace:
case LineBreakClass.CarriageReturn:
cur = LineBreakClass.CarriageReturn;
_lb22ex = true;
break;
}
}
// Track combining mark exceptions. LB31
if (_first && cls == LineBreakClass.CombiningMark)
{
_lb31 = true;
}
if (cls == LineBreakClass.CombiningMark)
{
switch (_currentClass)
{
case LineBreakClass.MandatoryBreak:
case LineBreakClass.ContingentBreak:
cur = LineBreakClass.BreakAfter;
case LineBreakClass.Exclamation:
case LineBreakClass.LineFeed:
case LineBreakClass.NextLine:
case LineBreakClass.Space:
case LineBreakClass.ZWSpace:
case LineBreakClass.CarriageReturn:
case LineBreakClass.ZWJ:
_lb31 = true;
break;
}
}
if (_first
&& (cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space))
{
_lb31 = true;
}
if (_currentClass == LineBreakClass.Alphabetic &&
(cls == LineBreakClass.PostfixNumeric || cls == LineBreakClass.PrefixNumeric || cls == LineBreakClass.Space))
{
_lb31 = true;
}
// Reset LB31 if next is U+0028 (Left Opening Parenthesis)
if (_lb31
&& _currentClass != LineBreakClass.PostfixNumeric
&& _currentClass != LineBreakClass.PrefixNumeric
&& cls == LineBreakClass.OpenPunctuation && cp.Value == 0x0028)
{
_lb31 = false;
}
// Rule LB24
if (_first && (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.CloseParenthesis))
{
_lb24ex = true;
}
if (cur != null)
// Rule LB25
if (_first
&& (cls == LineBreakClass.ClosePunctuation || cls == LineBreakClass.InfixNumeric || cls == LineBreakClass.BreakSymbols))
{
_lb25ex = true;
}
if (cls == LineBreakClass.Space || cls == LineBreakClass.WordJoiner || cls == LineBreakClass.Alphabetic)
{
var next = PeekNextCharClass();
if (next == LineBreakClass.ClosePunctuation || next == LineBreakClass.InfixNumeric || next == LineBreakClass.BreakSymbols)
{
_curClass = cur;
_lb25ex = true;
}
}
// AlphaNumeric + and combining marks can break for OP except.
// - U+0028 (Left Opening Parenthesis)
// - U+005B (Opening Square Bracket)
// - U+007B (Left Curly Bracket)
// See custom colums|rules in the text pair table.
// https://www.unicode.org/Public/13.0.0/ucd/auxiliary/LineBreakTest.html
_lb30 = _alphaNumericCount > 0
&& cls == LineBreakClass.OpenPunctuation
&& cp.Value != 0x0028
&& cp.Value != 0x005B
&& cp.Value != 0x007B;
return cls;
}
private bool? GetSimpleBreak()
{
// handle classes not handled by the pair table
switch (_nextClass)
{
case LineBreakClass.Space:
return false;
if (_nextClass.Value == LineBreakClass.MandatoryBreak)
case LineBreakClass.MandatoryBreak:
case LineBreakClass.LineFeed:
case LineBreakClass.NextLine:
_currentClass = LineBreakClass.MandatoryBreak;
return false;
case LineBreakClass.CarriageReturn:
_currentClass = LineBreakClass.CarriageReturn;
return false;
}
return null;
}
private bool GetPairTableBreak(LineBreakClass lastClass)
{
// If not handled already, use the pair table
bool shouldBreak = false;
switch (LineBreakPairTable.Table[(int)_currentClass][(int)_nextClass])
{
case LineBreakPairTable.DIBRK: // Direct break
shouldBreak = true;
break;
// TODO: Rewrite this so that it defaults to true and rules are set as exceptions.
case LineBreakPairTable.INBRK: // Possible indirect break
// LB31
if (_lb31 && _nextClass == LineBreakClass.OpenPunctuation)
{
_lastPos = _pos;
Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos, true);
return true;
shouldBreak = true;
_lb31 = false;
break;
}
continue;
}
// if not handled already, use the pair table
var shouldBreak = false;
switch (BreakPairTable.Map(_curClass.Value,_nextClass.Value))
{
case PairBreakType.DI: // Direct break
// LB30
if (_lb30)
{
shouldBreak = true;
_lb30 = false;
_alphaNumericCount = 0;
break;
}
case PairBreakType.IN: // possible indirect break
shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space;
// LB25
if (_lb25ex && (_nextClass == LineBreakClass.PrefixNumeric || _nextClass == LineBreakClass.Numeric))
{
shouldBreak = true;
_lb25ex = false;
break;
}
case PairBreakType.CI:
shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.Space;
if (!shouldBreak)
{
continue;
}
// LB24
if (_lb24ex && (_nextClass == LineBreakClass.PostfixNumeric || _nextClass == LineBreakClass.PrefixNumeric))
{
shouldBreak = true;
_lb24ex = false;
break;
}
// LB18
shouldBreak = lastClass == LineBreakClass.Space;
break;
case LineBreakPairTable.CIBRK:
shouldBreak = lastClass == LineBreakClass.Space;
if (!shouldBreak)
{
return false;
}
case PairBreakType.CP: // prohibited for combining marks
if (!lastClass.HasValue || lastClass.Value != LineBreakClass.Space)
break;
case LineBreakPairTable.CPBRK: // prohibited for combining marks
if (lastClass != LineBreakClass.Space)
{
return false;
}
break;
case LineBreakPairTable.PRBRK:
break;
}
// Rule LB22
if (_nextClass == LineBreakClass.Inseparable)
{
switch (lastClass)
{
case LineBreakClass.MandatoryBreak:
case LineBreakClass.ContingentBreak:
case LineBreakClass.Exclamation:
case LineBreakClass.LineFeed:
case LineBreakClass.NextLine:
case LineBreakClass.Space:
case LineBreakClass.ZWSpace:
// Allow break
break;
case LineBreakClass.CombiningMark:
if (_lb22ex)
{
continue;
// Allow break
_lb22ex = false;
break;
}
shouldBreak = false;
break;
default:
shouldBreak = false;
break;
}
}
_curClass = _nextClass;
if (_lb8a)
{
shouldBreak = false;
}
if (shouldBreak)
{
Current = new LineBreak(FindPriorNonWhitespace(_lastPos), _lastPos);
return true;
}
// Rule LB21a
if (_lb21a && (_currentClass == LineBreakClass.Hyphen || _currentClass == LineBreakClass.BreakAfter))
{
shouldBreak = false;
_lb21a = false;
}
else
{
_lb21a = _currentClass == LineBreakClass.HebrewLetter;
}
if (_pos >= _text.Length)
// Rule LB30a
if (_currentClass == LineBreakClass.RegionalIndicator)
{
if (_lastPos < _text.Length)
_lb30a++;
if (_lb30a == 2 && _nextClass == LineBreakClass.RegionalIndicator)
{
_lastPos = _text.Length;
var cls = Codepoint.ReadAt(_text, _text.Length - 1, out _).LineBreakClass;
bool required = cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn;
Current = new LineBreak(FindPriorNonWhitespace(_text.Length), _text.Length, required);
return true;
shouldBreak = true;
_lb30a = 0;
}
}
else
{
_lb30a = 0;
}
return false;
}
_currentClass = _nextClass;
return shouldBreak;
}
private int FindPriorNonWhitespace(int from)
{
if (from > 0)
@ -163,7 +463,8 @@ namespace Avalonia.Media.TextFormatting.Unicode
var cls = cp.LineBreakClass;
if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed || cls == LineBreakClass.CarriageReturn)
if (cls == LineBreakClass.MandatoryBreak || cls == LineBreakClass.LineFeed ||
cls == LineBreakClass.CarriageReturn)
{
from -= count;
}
@ -184,61 +485,8 @@ namespace Avalonia.Media.TextFormatting.Unicode
break;
}
}
return from;
}
// Get the next character class
private LineBreakClass ReadCharClass()
{
var cp = Codepoint.ReadAt(_text, _pos, out var count);
_pos += count;
return MapClass(cp.LineBreakClass);
}
private LineBreakClass PeekCharClass()
{
return MapClass(Codepoint.ReadAt(_text, _pos, out _).LineBreakClass);
}
private static LineBreakClass MapClass(LineBreakClass c)
{
switch (c)
{
case LineBreakClass.Ambiguous:
return LineBreakClass.Alphabetic;
case LineBreakClass.ComplexContext:
case LineBreakClass.Surrogate:
case LineBreakClass.Unknown:
return LineBreakClass.Alphabetic;
case LineBreakClass.ConditionalJapaneseStarter:
return LineBreakClass.Nonstarter;
default:
return c;
}
}
private static LineBreakClass MapFirst(LineBreakClass c)
{
switch (c)
{
case LineBreakClass.LineFeed:
case LineBreakClass.NextLine:
return LineBreakClass.MandatoryBreak;
case LineBreakClass.ContingentBreak:
return LineBreakClass.BreakAfter;
case LineBreakClass.Space:
return LineBreakClass.WordJoiner;
default:
return c;
}
return from;
}
}
}

74
src/Avalonia.Visuals/Media/TextFormatting/Unicode/LineBreakPairTable.cs

@ -0,0 +1,74 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
// Ported from: https://github.com/SixLabors/Fonts/
namespace Avalonia.Media.TextFormatting.Unicode
{
internal static class LineBreakPairTable
{
/// <summary>
/// Direct break opportunity
/// </summary>
public const byte DIBRK = 0;
/// <summary>
/// Indirect break opportunity
/// </summary>
public const byte INBRK = 1;
/// <summary>
/// Indirect break opportunity for combining marks
/// </summary>
public const byte CIBRK = 2;
/// <summary>
/// Prohibited break for combining marks
/// </summary>
public const byte CPBRK = 3;
/// <summary>
/// Prohibited break
/// </summary>
public const byte PRBRK = 4;
// Based on example pair table from https://www.unicode.org/reports/tr14/tr14-37.html#Table2
// - ZWJ special processing for LB8a
// - CB manually added as per Rule LB20
public static byte[][] Table { get; } = {
// . OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT RI EB EM ZWJ CB
new[] { PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, CPBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK, PRBRK }, // OP
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CL
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CP
new[] { PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // QU
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // GL
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NS
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EX
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // SY
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IS
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK }, // PR
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // PO
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // NU
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // AL
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HL
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ID
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // IN
new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // HY
new[] { DIBRK, PRBRK, PRBRK, INBRK, DIBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // BA
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK }, // BB
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // B2
new[] { DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK }, // ZW
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // CM
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK, INBRK }, // WJ
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H2
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // H3
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JL
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JV
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // JT
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK, DIBRK, INBRK, DIBRK }, // RI
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, DIBRK }, // EB
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, DIBRK, INBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // EM
new[] { INBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, PRBRK, PRBRK, PRBRK, INBRK, INBRK, INBRK, INBRK, INBRK, DIBRK, INBRK, INBRK, INBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK }, // ZWJ
new[] { DIBRK, PRBRK, PRBRK, INBRK, INBRK, DIBRK, PRBRK, PRBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, PRBRK, CIBRK, PRBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, DIBRK, INBRK, DIBRK } // CB
};
}
}

4
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/BreakPairTable.txt

@ -1,7 +1,7 @@
OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT RI EB EM ZWJ CB
OP ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ @ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
CL _ ^ ^ % % ^ ^ ^ ^ % % _ _ _ _ ^ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
CP _ ^ ^ % % ^ ^ ^ ^ % % % % % _ ^ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
CL _ ^ ^ % % ^ ^ ^ ^ % % _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
CP _ ^ ^ % % ^ ^ ^ ^ % % % % % _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _
QU ^ ^ ^ % % % ^ ^ ^ % % % % % % % % % % % ^ # ^ % % % % % % % % % %
GL % ^ ^ % % % ^ ^ ^ % % % % % % % % % % % ^ # ^ % % % % % % % % % %
NS _ ^ ^ % % % ^ ^ ^ _ _ _ _ _ _ _ % % _ _ ^ # ^ _ _ _ _ _ _ _ _ % _

259
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakEnumuratorTests.cs

@ -0,0 +1,259 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using Avalonia.Media.TextFormatting.Unicode;
using Xunit;
using Xunit.Abstractions;
namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
{
public class LineBreakEnumeratorTests
{
private readonly ITestOutputHelper _outputHelper;
public LineBreakEnumeratorTests(ITestOutputHelper outputHelper)
{
_outputHelper = outputHelper;
}
[Fact]
public void BasicLatinTest()
{
var lineBreaker = new LineBreakEnumerator("Hello World\r\nThis is a test.".AsMemory());
Assert.True(lineBreaker.MoveNext());
Assert.Equal(6, lineBreaker.Current.PositionWrap);
Assert.False(lineBreaker.Current.Required);
Assert.True(lineBreaker.MoveNext());
Assert.Equal(13, lineBreaker.Current.PositionWrap);
Assert.True(lineBreaker.Current.Required);
Assert.True(lineBreaker.MoveNext());
Assert.Equal(18, lineBreaker.Current.PositionWrap);
Assert.False(lineBreaker.Current.Required);
Assert.True(lineBreaker.MoveNext());
Assert.Equal(21, lineBreaker.Current.PositionWrap);
Assert.False(lineBreaker.Current.Required);
Assert.True(lineBreaker.MoveNext());
Assert.Equal(23, lineBreaker.Current.PositionWrap);
Assert.False(lineBreaker.Current.Required);
Assert.True(lineBreaker.MoveNext());
Assert.Equal(28, lineBreaker.Current.PositionWrap);
Assert.False(lineBreaker.Current.Required);
Assert.False(lineBreaker.MoveNext());
}
[Fact]
public void ForwardTextWithOuterWhitespace()
{
var lineBreaker = new LineBreakEnumerator(" Apples Pears Bananas ".AsMemory());
var positionsF = GetBreaks(lineBreaker);
Assert.Equal(1, positionsF[0].PositionWrap);
Assert.Equal(0, positionsF[0].PositionMeasure);
Assert.Equal(8, positionsF[1].PositionWrap);
Assert.Equal(7, positionsF[1].PositionMeasure);
Assert.Equal(14, positionsF[2].PositionWrap);
Assert.Equal(13, positionsF[2].PositionMeasure);
Assert.Equal(24, positionsF[3].PositionWrap);
Assert.Equal(21, positionsF[3].PositionMeasure);
}
private static List<LineBreak> GetBreaks(LineBreakEnumerator lineBreaker)
{
var breaks = new List<LineBreak>();
while (lineBreaker.MoveNext())
{
breaks.Add(lineBreaker.Current);
}
return breaks;
}
[Fact]
public void ForwardTest()
{
var lineBreaker = new LineBreakEnumerator("Apples Pears Bananas".AsMemory());
var positionsF = GetBreaks(lineBreaker);
Assert.Equal(7, positionsF[0].PositionWrap);
Assert.Equal(6, positionsF[0].PositionMeasure);
Assert.Equal(13, positionsF[1].PositionWrap);
Assert.Equal(12, positionsF[1].PositionMeasure);
Assert.Equal(20, positionsF[2].PositionWrap);
Assert.Equal(20, positionsF[2].PositionMeasure);
}
[Theory(Skip = "Only run when the Unicode spec changes.")]
[ClassData(typeof(LineBreakTestDataGenerator))]
public void ShouldFindBreaks(int lineNumber, int[] codePoints, int[] breakPoints)
{
var text = string.Join(null, codePoints.Select(char.ConvertFromUtf32));
var lineBreaker = new LineBreakEnumerator(text.AsMemory());
var foundBreaks = new List<int>();
while (lineBreaker.MoveNext())
{
foundBreaks.Add(lineBreaker.Current.PositionWrap);
}
// Check the same
var pass = true;
if (foundBreaks.Count != breakPoints.Length)
{
pass = false;
}
else
{
for (var i = 0; i < foundBreaks.Count; i++)
{
if (foundBreaks[i] != breakPoints[i])
{
pass = false;
}
}
}
if (!pass)
{
_outputHelper.WriteLine($"Failed test on line {lineNumber}");
_outputHelper.WriteLine("");
_outputHelper.WriteLine($" Code Points: {string.Join(" ", codePoints)}");
_outputHelper.WriteLine($"Expected Breaks: {string.Join(" ", breakPoints)}");
_outputHelper.WriteLine($" Actual Breaks: {string.Join(" ", foundBreaks)}");
_outputHelper.WriteLine($" Text: {text}");
_outputHelper.WriteLine($" Char Props: {string.Join(" ", codePoints.Select(x => new Codepoint(x).LineBreakClass))}");
_outputHelper.WriteLine("");
}
Assert.True(pass);
}
private class LineBreakTestDataGenerator : IEnumerable<object[]>
{
private readonly List<object[]> _testData;
public LineBreakTestDataGenerator()
{
_testData = GenerateTestData();
}
public IEnumerator<object[]> GetEnumerator()
{
return _testData.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
private static List<object[]> GenerateTestData()
{
// Process each line
var tests = new List<object[]>();
// Read the test file
var url = Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/LineBreakTest.txt");
using (var client = new HttpClient())
using (var result = client.GetAsync(url).GetAwaiter().GetResult())
{
if (!result.IsSuccessStatusCode)
{
return tests;
}
using (var stream = result.Content.ReadAsStreamAsync().GetAwaiter().GetResult())
using (var reader = new StreamReader(stream))
{
var lineNumber = 1;
while (!reader.EndOfStream)
{
var line = reader.ReadLine();
if (line is null)
{
break;
}
// Get the line, remove comments
line = line.Split('#')[0].Trim();
// Ignore blank/comment only lines
if (string.IsNullOrWhiteSpace(line))
{
lineNumber++;
continue;
}
var codePoints = new List<int>();
var breakPoints = new List<int>();
// Parse the test
var p = 0;
while (p < line.Length)
{
// Ignore white space
if (char.IsWhiteSpace(line[p]))
{
p++;
continue;
}
if (line[p] == '×')
{
p++;
continue;
}
if (line[p] == '÷')
{
breakPoints.Add(codePoints.Select(x=> x > ushort.MaxValue ? 2 : 1).Sum());
p++;
continue;
}
var codePointPos = p;
while (p < line.Length && IsHexDigit(line[p]))
{
p++;
}
var codePointStr = line.Substring(codePointPos, p - codePointPos);
var codePoint = Convert.ToInt32(codePointStr, 16);
codePoints.Add(codePoint);
}
tests.Add(new object[] { lineNumber, codePoints.ToArray(), breakPoints.ToArray() });
lineNumber++;
}
}
}
return tests;
}
private static bool IsHexDigit(char ch)
{
return char.IsDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
}
}
}
}

56
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/LineBreakerTests.cs

@ -1,56 +0,0 @@
using System;
using Avalonia.Media.TextFormatting.Unicode;
using Avalonia.Utilities;
using Xunit;
namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
{
public class LineBreakerTests
{
[Fact]
public void Should_Split_Text_By_Explicit_Breaks()
{
//ABC [0 3]
//DEF\r[4 7]
//\r[8]
//Hello\r\n[9 15]
const string text = "ABC DEF\r\rHELLO\r\n";
var buffer = new ReadOnlySlice<char>(text.AsMemory());
var lineBreaker = new LineBreakEnumerator(buffer);
var current = 0;
Assert.True(lineBreaker.MoveNext());
var a = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1);
Assert.Equal("ABC ", a);
current += a.Length;
Assert.True(lineBreaker.MoveNext());
var b = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1);
Assert.Equal("DEF\r", b);
current += b.Length;
Assert.True(lineBreaker.MoveNext());
var c = text.Substring(current, lineBreaker.Current.PositionMeasure - current + 1);
Assert.Equal("\r", c);
current += c.Length;
Assert.True(lineBreaker.MoveNext());
var d = text.Substring(current, text.Length - current);
Assert.Equal("HELLO\r\n", d);
}
}
}

25
tests/Avalonia.Visuals.UnitTests/Media/TextFormatting/UnicodeDataGeneratorTests.cs

@ -1,6 +1,4 @@
using System;
using Avalonia.Media.TextFormatting.Unicode;
using Xunit;
using Xunit;
namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
{
@ -15,26 +13,5 @@ namespace Avalonia.Visuals.UnitTests.Media.TextFormatting
{
UnicodeDataGenerator.Execute();
}
[Theory(Skip = "Only run when we update the trie.")]
[ClassData(typeof(LineBreakTestDataGenerator))]
public void Should_Enumerate_LineBreaks(string text, int expectedLength)
{
var textMemory = text.AsMemory();
var enumerator = new LineBreakEnumerator(textMemory);
Assert.True(enumerator.MoveNext());
Assert.Equal(expectedLength, enumerator.Current.PositionWrap);
}
private class LineBreakTestDataGenerator : TestDataGenerator
{
public LineBreakTestDataGenerator()
: base("auxiliary/LineBreakTest.txt")
{
}
}
}
}

Loading…
Cancel
Save