From 10a3b79d128e2053161833de95e8029727614962 Mon Sep 17 00:00:00 2001 From: Julien Lebosquain Date: Fri, 20 Jan 2023 12:18:38 +0100 Subject: [PATCH] Perf: various misc text layout optimizations --- src/Avalonia.Base/Avalonia.Base.csproj | 1 + .../Media/Fonts/FamilyNameCollection.cs | 74 ++---- .../Media/TextFormatting/TextCharacters.cs | 9 +- .../Media/TextFormatting/TextFormatterImpl.cs | 11 +- .../TextFormatting/Unicode/BiDiAlgorithm.cs | 41 ++-- .../Media/TextFormatting/Unicode/Codepoint.cs | 117 +++++----- .../Media/TextFormatting/Unicode/Grapheme.cs | 4 + .../Unicode/GraphemeEnumerator.cs | 217 ++++++++---------- src/Skia/Avalonia.Skia/TextShaperImpl.cs | 6 +- .../Media/TextShaperImpl.cs | 6 +- .../Text/HugeTextLayout.cs | 12 +- .../HarfBuzzTextShaperImpl.cs | 6 +- .../Avalonia.UnitTests/MockTextShaperImpl.cs | 3 +- 13 files changed, 241 insertions(+), 266 deletions(-) diff --git a/src/Avalonia.Base/Avalonia.Base.csproj b/src/Avalonia.Base/Avalonia.Base.csproj index 4a67191132..35a453ce59 100644 --- a/src/Avalonia.Base/Avalonia.Base.csproj +++ b/src/Avalonia.Base/Avalonia.Base.csproj @@ -30,6 +30,7 @@ + diff --git a/src/Avalonia.Base/Media/Fonts/FamilyNameCollection.cs b/src/Avalonia.Base/Media/Fonts/FamilyNameCollection.cs index eb42f6443b..f2350f5aea 100644 --- a/src/Avalonia.Base/Media/Fonts/FamilyNameCollection.cs +++ b/src/Avalonia.Base/Media/Fonts/FamilyNameCollection.cs @@ -1,13 +1,14 @@ using System; using System.Collections; using System.Collections.Generic; -using System.Text; using Avalonia.Utilities; namespace Avalonia.Media.Fonts { public sealed class FamilyNameCollection : IReadOnlyList { + private readonly string[] _names; + /// /// Initializes a new instance of the class. /// @@ -20,13 +21,20 @@ namespace Avalonia.Media.Fonts throw new ArgumentNullException(nameof(familyNames)); } - Names = Array.ConvertAll(familyNames.Split(','), p => p.Trim()); + _names = SplitNames(familyNames); - PrimaryFamilyName = Names[0]; + PrimaryFamilyName = _names[0]; - HasFallbacks = Names.Count > 1; + HasFallbacks = _names.Length > 1; } + private static string[] SplitNames(string names) +#if NET6_0_OR_GREATER + => names.Split(',', StringSplitOptions.TrimEntries); +#else + => Array.ConvertAll(names.Split(','), p => p.Trim()); +#endif + /// /// Gets the primary family name. /// @@ -43,14 +51,6 @@ namespace Avalonia.Media.Fonts /// public bool HasFallbacks { get; } - /// - /// Gets the internal collection of names. - /// - /// - /// The names. - /// - internal IReadOnlyList Names { get; } - /// /// Returns an enumerator for the name collection. /// @@ -76,23 +76,7 @@ namespace Avalonia.Media.Fonts /// A that represents this instance. /// public override string ToString() - { - var builder = StringBuilderCache.Acquire(); - - for (var index = 0; index < Names.Count; index++) - { - builder.Append(Names[index]); - - if (index == Names.Count - 1) - { - break; - } - - builder.Append(", "); - } - - return StringBuilderCache.GetStringAndRelease(builder); - } + => String.Join(", ", _names); /// /// Returns a hash code for this instance. @@ -102,7 +86,7 @@ namespace Avalonia.Media.Fonts /// public override int GetHashCode() { - if (Count == 0) + if (_names.Length == 0) { return 0; } @@ -111,9 +95,9 @@ namespace Avalonia.Media.Fonts { int hash = 17; - for (var i = 0; i < Names.Count; i++) + for (var i = 0; i < _names.Length; i++) { - string name = Names[i]; + string name = _names[i]; hash = hash * 23 + name.GetHashCode(); } @@ -145,30 +129,10 @@ namespace Avalonia.Media.Fonts /// true if the specified is equal to this instance; otherwise, false. /// public override bool Equals(object? obj) - { - if (!(obj is FamilyNameCollection other)) - { - return false; - } - - if (other.Count != Count) - { - return false; - } - - for (int i = 0; i < Count; i++) - { - if (Names[i] != other.Names[i]) - { - return false; - } - } - - return true; - } + => obj is FamilyNameCollection other && _names.AsSpan().SequenceEqual(other._names); - public int Count => Names.Count; + public int Count => _names.Length; - public string this[int index] => Names[index]; + public string this[int index] => _names[index]; } } diff --git a/src/Avalonia.Base/Media/TextFormatting/TextCharacters.cs b/src/Avalonia.Base/Media/TextFormatting/TextCharacters.cs index c1f3816e54..9e76418ac9 100644 --- a/src/Avalonia.Base/Media/TextFormatting/TextCharacters.cs +++ b/src/Avalonia.Base/Media/TextFormatting/TextCharacters.cs @@ -47,13 +47,13 @@ namespace Avalonia.Media.TextFormatting /// /// The shapeable text characters. internal void GetShapeableCharacters(ReadOnlyMemory text, sbyte biDiLevel, - ref TextRunProperties? previousProperties, RentedList results) + FontManager fontManager, ref TextRunProperties? previousProperties, RentedList results) { var properties = Properties; while (!text.IsEmpty) { - var shapeableRun = CreateShapeableRun(text, properties, biDiLevel, ref previousProperties); + var shapeableRun = CreateShapeableRun(text, properties, biDiLevel, fontManager, ref previousProperties); results.Add(shapeableRun); @@ -72,7 +72,8 @@ namespace Avalonia.Media.TextFormatting /// /// A list of shapeable text runs. private static UnshapedTextRun CreateShapeableRun(ReadOnlyMemory text, - TextRunProperties defaultProperties, sbyte biDiLevel, ref TextRunProperties? previousProperties) + TextRunProperties defaultProperties, sbyte biDiLevel, FontManager fontManager, + ref TextRunProperties? previousProperties) { var defaultTypeface = defaultProperties.Typeface; var currentTypeface = defaultTypeface; @@ -121,7 +122,7 @@ namespace Avalonia.Media.TextFormatting //ToDo: Fix FontFamily fallback var matchFound = - FontManager.Current.TryMatchCharacter(codepoint, defaultTypeface.Style, defaultTypeface.Weight, + fontManager.TryMatchCharacter(codepoint, defaultTypeface.Style, defaultTypeface.Weight, defaultTypeface.Stretch, defaultTypeface.FontFamily, defaultProperties.CultureInfo, out currentTypeface); diff --git a/src/Avalonia.Base/Media/TextFormatting/TextFormatterImpl.cs b/src/Avalonia.Base/Media/TextFormatting/TextFormatterImpl.cs index bf9f6f77f8..b0242be87e 100644 --- a/src/Avalonia.Base/Media/TextFormatting/TextFormatterImpl.cs +++ b/src/Avalonia.Base/Media/TextFormatting/TextFormatterImpl.cs @@ -393,6 +393,7 @@ namespace Avalonia.Media.TextFormatting TextRunProperties? previousProperties = null; TextCharacters? currentRun = null; ReadOnlyMemory runText = default; + var fontManager = FontManager.Current; for (var i = 0; i < textCharacters.Count; i++) { @@ -427,8 +428,8 @@ namespace Avalonia.Media.TextFormatting if (j == runTextSpan.Length) { - currentRun.GetShapeableCharacters(runText.Slice(0, j), runLevel, ref previousProperties, - processedRuns); + currentRun.GetShapeableCharacters(runText.Slice(0, j), runLevel, fontManager, + ref previousProperties, processedRuns); runLevel = levels[levelIndex]; @@ -441,8 +442,8 @@ namespace Avalonia.Media.TextFormatting } // End of this run - currentRun.GetShapeableCharacters(runText.Slice(0, j), runLevel, ref previousProperties, - processedRuns); + currentRun.GetShapeableCharacters(runText.Slice(0, j), runLevel, fontManager, + ref previousProperties, processedRuns); runText = runText.Slice(j); runTextSpan = runText.Span; @@ -459,7 +460,7 @@ namespace Avalonia.Media.TextFormatting return; } - currentRun.GetShapeableCharacters(runText, runLevel, ref previousProperties, processedRuns); + currentRun.GetShapeableCharacters(runText, runLevel, fontManager, ref previousProperties, processedRuns); } /// diff --git a/src/Avalonia.Base/Media/TextFormatting/Unicode/BiDiAlgorithm.cs b/src/Avalonia.Base/Media/TextFormatting/Unicode/BiDiAlgorithm.cs index 3a81784152..36e9e6eb79 100644 --- a/src/Avalonia.Base/Media/TextFormatting/Unicode/BiDiAlgorithm.cs +++ b/src/Avalonia.Base/Media/TextFormatting/Unicode/BiDiAlgorithm.cs @@ -343,6 +343,17 @@ namespace Avalonia.Media.TextFormatting.Unicode return 0; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsIsolateStart(BidiClass type) + { + const uint mask = + (1U << (int)BidiClass.LeftToRightIsolate) | + (1U << (int)BidiClass.RightToLeftIsolate) | + (1U << (int)BidiClass.FirstStrongIsolate); + + return ((1U << (int)type) & mask) != 0U; + } + /// /// Build a list of matching isolates for a directionality slice /// Implements BD9 @@ -701,28 +712,19 @@ namespace Avalonia.Media.TextFormatting.Unicode var lastType = _workingClasses[lastCharIndex]; int nextLevel; - switch (lastType) + if (IsIsolateStart(lastType)) { - case BidiClass.LeftToRightIsolate: - case BidiClass.RightToLeftIsolate: - case BidiClass.FirstStrongIsolate: + nextLevel = _paragraphEmbeddingLevel; + } + else + { + i = lastCharIndex + 1; + while (i < _originalClasses.Length && IsRemovedByX9(_originalClasses[i])) { - nextLevel = _paragraphEmbeddingLevel; - - break; + i++; } - default: - { - i = lastCharIndex + 1; - while (i < _originalClasses.Length && IsRemovedByX9(_originalClasses[i])) - { - i++; - } - nextLevel = i >= _originalClasses.Length ? _paragraphEmbeddingLevel : _resolvedLevels[i]; - - break; - } + nextLevel = i >= _originalClasses.Length ? _paragraphEmbeddingLevel : _resolvedLevels[i]; } var eos = DirectionFromLevel(Math.Max(nextLevel, level)); @@ -831,8 +833,7 @@ namespace Avalonia.Media.TextFormatting.Unicode // PDI and concatenate that run to this one var lastCharacterIndex = _isolatedRunMapping[_isolatedRunMapping.Length - 1]; var lastType = _originalClasses[lastCharacterIndex]; - if ((lastType == BidiClass.LeftToRightIsolate || lastType == BidiClass.RightToLeftIsolate || lastType == BidiClass.FirstStrongIsolate) && - _isolatePairs.TryGetValue(lastCharacterIndex, out var nextRunIndex)) + if (IsIsolateStart(lastType) && _isolatePairs.TryGetValue(lastCharacterIndex, out var nextRunIndex)) { // Find the continuing run index runIndex = FindRunForIndex(nextRunIndex); diff --git a/src/Avalonia.Base/Media/TextFormatting/Unicode/Codepoint.cs b/src/Avalonia.Base/Media/TextFormatting/Unicode/Codepoint.cs index 22f7b50fd4..6433a37b22 100644 --- a/src/Avalonia.Base/Media/TextFormatting/Unicode/Codepoint.cs +++ b/src/Avalonia.Base/Media/TextFormatting/Unicode/Codepoint.cs @@ -1,5 +1,4 @@ using System; -using System.Collections.Generic; using System.Runtime.CompilerServices; namespace Avalonia.Media.TextFormatting.Unicode @@ -11,13 +10,19 @@ namespace Avalonia.Media.TextFormatting.Unicode /// /// The replacement codepoint that is used for non supported values. /// - public static readonly Codepoint ReplacementCodepoint = new Codepoint('\uFFFD'); - - public Codepoint(uint value) + public static Codepoint ReplacementCodepoint { - _value = value; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => new('\uFFFD'); } + /// + /// Creates a new instance of with the specified value. + /// + /// The codepoint value. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Codepoint(uint value) => _value = value; + /// /// Get the codepoint's value. /// @@ -87,19 +92,17 @@ namespace Avalonia.Media.TextFormatting.Unicode /// public bool IsWhiteSpace { + [MethodImpl(MethodImplOptions.AggressiveInlining)] get { - switch (GeneralCategory) - { - case GeneralCategory.Control: - case GeneralCategory.NonspacingMark: - case GeneralCategory.Format: - case GeneralCategory.SpaceSeparator: - case GeneralCategory.SpacingMark: - return true; - } - - return false; + const ulong whiteSpaceMask = + (1UL << (int)GeneralCategory.Control) | + (1UL << (int)GeneralCategory.NonspacingMark) | + (1UL << (int)GeneralCategory.Format) | + (1UL << (int)GeneralCategory.SpaceSeparator) | + (1UL << (int)GeneralCategory.SpacingMark); + + return ((1UL << (int)GeneralCategory) & whiteSpaceMask) != 0L; } } @@ -166,56 +169,62 @@ namespace Avalonia.Media.TextFormatting.Unicode /// The index to read at. /// The count of character that were read. /// +#if NET6_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] +#else + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif public static Codepoint ReadAt(ReadOnlySpan text, int index, out int count) { + // Perf note: this method is performance critical for text layout, modify with care! + count = 1; - if (index >= text.Length) + // Perf note: uint check allows the JIT to ellide the next bound check + if ((uint)index >= (uint)text.Length) { return ReplacementCodepoint; } - var code = text[index]; - - ushort hi, low; + uint code = text[index]; - //# High surrogate - if (0xD800 <= code && code <= 0xDBFF) + //# Surrogate + if (IsInRangeInclusive(code, 0xD800U, 0xDFFFU)) { - hi = code; - - if (index + 1 == text.Length) - { - return ReplacementCodepoint; - } - - low = text[index + 1]; - - if (0xDC00 <= low && low <= 0xDFFF) - { - count = 2; - return new Codepoint((uint)((hi - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000)); - } - - return ReplacementCodepoint; - } + uint hi, low; - //# Low surrogate - if (0xDC00 <= code && code <= 0xDFFF) - { - if (index == 0) + //# High surrogate + if (code <= 0xDBFF) { - return ReplacementCodepoint; + if ((uint)(index + 1) < (uint)text.Length) + { + hi = code; + low = text[index + 1]; + + if (IsInRangeInclusive(low, 0xDC00U, 0xDFFFU)) + { + count = 2; + // Perf note: the code is written as below to become just two instructions: shl, lea. + // See https://github.com/dotnet/runtime/blob/7ec3634ee579d89b6024f72b595bfd7118093fc5/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeUtility.cs#L38 + return new Codepoint((hi << 10) + low - ((0xD800U << 10) + 0xDC00U - (1 << 16))); + } + } } - hi = text[index - 1]; - - low = code; - - if (0xD800 <= hi && hi <= 0xDBFF) + //# Low surrogate + else { - count = 2; - return new Codepoint((uint)((hi - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000)); + if (index > 0) + { + low = code; + hi = text[index - 1]; + + if (IsInRangeInclusive(hi, 0xD800U, 0xDBFFU)) + { + count = 2; + return new Codepoint((hi << 10) + low - ((0xD800U << 10) + 0xDC00U - (1 << 16))); + } + } } return ReplacementCodepoint; @@ -224,12 +233,16 @@ namespace Avalonia.Media.TextFormatting.Unicode return new Codepoint(code); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsInRangeInclusive(uint value, uint lowerBound, uint upperBound) + => value - lowerBound <= upperBound - lowerBound; + /// /// Returns if is between /// and , inclusive. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsInRangeInclusive(Codepoint cp, uint lowerBound, uint upperBound) - => (cp._value - lowerBound) <= (upperBound - lowerBound); + => IsInRangeInclusive(cp._value, lowerBound, upperBound); } } diff --git a/src/Avalonia.Base/Media/TextFormatting/Unicode/Grapheme.cs b/src/Avalonia.Base/Media/TextFormatting/Unicode/Grapheme.cs index fa8e8ac976..5a4d891917 100644 --- a/src/Avalonia.Base/Media/TextFormatting/Unicode/Grapheme.cs +++ b/src/Avalonia.Base/Media/TextFormatting/Unicode/Grapheme.cs @@ -22,5 +22,9 @@ namespace Avalonia.Media.TextFormatting.Unicode /// The text of the grapheme cluster /// public ReadOnlySpan Text { get; } + + /// + public override string ToString() + => Text.ToString(); } } diff --git a/src/Avalonia.Base/Media/TextFormatting/Unicode/GraphemeEnumerator.cs b/src/Avalonia.Base/Media/TextFormatting/Unicode/GraphemeEnumerator.cs index 812bb99d99..a6a9453b8a 100644 --- a/src/Avalonia.Base/Media/TextFormatting/Unicode/GraphemeEnumerator.cs +++ b/src/Avalonia.Base/Media/TextFormatting/Unicode/GraphemeEnumerator.cs @@ -4,57 +4,79 @@ // Licensed to The Avalonia Project under MIT License, courtesy of The .NET Foundation. using System; -using System.Runtime.InteropServices; namespace Avalonia.Media.TextFormatting.Unicode { public ref struct GraphemeEnumerator { - private ReadOnlySpan _text; + private readonly ReadOnlySpan _text; + private int _currentCodeUnitOffset; + private int _codeUnitLengthOfCurrentCodepoint; + private Codepoint _currentCodepoint; + + /// + /// Will be if invalid data or EOF reached. + /// Caller shouldn't need to special-case this since the normal rules will halt on this condition. + /// + private GraphemeBreakClass _currentType; public GraphemeEnumerator(ReadOnlySpan text) { _text = text; - Current = default; + _currentCodeUnitOffset = 0; + _codeUnitLengthOfCurrentCodepoint = 0; + _currentCodepoint = Codepoint.ReplacementCodepoint; + _currentType = GraphemeBreakClass.Other; } - /// - /// Gets the current . - /// - public Grapheme Current { get; private set; } - /// /// Moves to the next . /// /// - public bool MoveNext() + public bool MoveNext(out Grapheme grapheme) { - if (_text.IsEmpty) + var startOffset = _currentCodeUnitOffset; + + if ((uint)startOffset >= (uint)_text.Length) { + grapheme = default; return false; } // Algorithm given at https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules. - var processor = new Processor(_text); - - processor.MoveNext(); + if (startOffset == 0) + { + ReadNextCodepoint(); + } - var firstCodepoint = processor.CurrentCodepoint; + var firstCodepoint = _currentCodepoint; // First, consume as many Prepend scalars as we can (rule GB9b). - while (processor.CurrentType == GraphemeBreakClass.Prepend) + if (_currentType == GraphemeBreakClass.Prepend) { - processor.MoveNext(); + do + { + ReadNextCodepoint(); + } while (_currentType == GraphemeBreakClass.Prepend); + + // There were only Prepend scalars in the text + if ((uint)_currentCodeUnitOffset >= (uint)_text.Length) + { + goto Return; + } } // Next, make sure we're not about to violate control character restrictions. // Essentially, if we saw Prepend data, we can't have Control | CR | LF data afterward (rule GB5). - if (processor.CurrentCodeUnitOffset > 0) + if (_currentCodeUnitOffset > startOffset) { - if (processor.CurrentType == GraphemeBreakClass.Control - || processor.CurrentType == GraphemeBreakClass.CR - || processor.CurrentType == GraphemeBreakClass.LF) + const uint controlCrLfMask = + (1U << (int)GraphemeBreakClass.Control) | + (1U << (int)GraphemeBreakClass.CR) | + (1U << (int)GraphemeBreakClass.LF); + + if (((1U << (int)_currentType) & controlCrLfMask) != 0U) { goto Return; } @@ -62,19 +84,19 @@ namespace Avalonia.Media.TextFormatting.Unicode // Now begin the main state machine. - var previousClusterBreakType = processor.CurrentType; + var previousClusterBreakType = _currentType; - processor.MoveNext(); + ReadNextCodepoint(); switch (previousClusterBreakType) { case GraphemeBreakClass.CR: - if (processor.CurrentType != GraphemeBreakClass.LF) + if (_currentType != GraphemeBreakClass.LF) { goto Return; // rules GB3 & GB4 (only can follow ) } - processor.MoveNext(); + ReadNextCodepoint(); goto case GraphemeBreakClass.LF; case GraphemeBreakClass.Control: @@ -82,53 +104,57 @@ namespace Avalonia.Media.TextFormatting.Unicode goto Return; // rule GB4 (no data after Control | LF) case GraphemeBreakClass.L: - if (processor.CurrentType == GraphemeBreakClass.L) + { + if (_currentType == GraphemeBreakClass.L) { - processor.MoveNext(); // rule GB6 (L x L) + ReadNextCodepoint(); // rule GB6 (L x L) goto case GraphemeBreakClass.L; } - else if (processor.CurrentType == GraphemeBreakClass.V) + else if (_currentType == GraphemeBreakClass.V) { - processor.MoveNext(); // rule GB6 (L x V) + ReadNextCodepoint(); // rule GB6 (L x V) goto case GraphemeBreakClass.V; } - else if (processor.CurrentType == GraphemeBreakClass.LV) + else if (_currentType == GraphemeBreakClass.LV) { - processor.MoveNext(); // rule GB6 (L x LV) + ReadNextCodepoint(); // rule GB6 (L x LV) goto case GraphemeBreakClass.LV; } - else if (processor.CurrentType == GraphemeBreakClass.LVT) + else if (_currentType == GraphemeBreakClass.LVT) { - processor.MoveNext(); // rule GB6 (L x LVT) + ReadNextCodepoint(); // rule GB6 (L x LVT) goto case GraphemeBreakClass.LVT; } else { break; } + } case GraphemeBreakClass.LV: case GraphemeBreakClass.V: - if (processor.CurrentType == GraphemeBreakClass.V) + { + if (_currentType == GraphemeBreakClass.V) { - processor.MoveNext(); // rule GB7 (LV | V x V) + ReadNextCodepoint(); // rule GB7 (LV | V x V) goto case GraphemeBreakClass.V; } - else if (processor.CurrentType == GraphemeBreakClass.T) + else if (_currentType == GraphemeBreakClass.T) { - processor.MoveNext(); // rule GB7 (LV | V x T) + ReadNextCodepoint(); // rule GB7 (LV | V x T) goto case GraphemeBreakClass.T; } else { break; } + } case GraphemeBreakClass.LVT: case GraphemeBreakClass.T: - if (processor.CurrentType == GraphemeBreakClass.T) + if (_currentType == GraphemeBreakClass.T) { - processor.MoveNext(); // rule GB8 (LVT | T x T) + ReadNextCodepoint(); // rule GB8 (LVT | T x T) goto case GraphemeBreakClass.T; } else @@ -139,123 +165,76 @@ namespace Avalonia.Media.TextFormatting.Unicode case GraphemeBreakClass.ExtendedPictographic: // Attempt processing extended pictographic (rules GB11, GB9). // First, drain any Extend scalars that might exist - while (processor.CurrentType == GraphemeBreakClass.Extend) + while (_currentType == GraphemeBreakClass.Extend) { - processor.MoveNext(); + ReadNextCodepoint(); } // Now see if there's a ZWJ + extended pictograph again. - if (processor.CurrentType != GraphemeBreakClass.ZWJ) + if (_currentType != GraphemeBreakClass.ZWJ) { break; } - processor.MoveNext(); - if (processor.CurrentType != GraphemeBreakClass.ExtendedPictographic) + ReadNextCodepoint(); + if (_currentType != GraphemeBreakClass.ExtendedPictographic) { break; } - processor.MoveNext(); + ReadNextCodepoint(); goto case GraphemeBreakClass.ExtendedPictographic; case GraphemeBreakClass.RegionalIndicator: // We've consumed a single RI scalar. Try to consume another (to make it a pair). - if (processor.CurrentType == GraphemeBreakClass.RegionalIndicator) + if (_currentType == GraphemeBreakClass.RegionalIndicator) { - processor.MoveNext(); + ReadNextCodepoint(); } // Standlone RI scalars (or a single pair of RI scalars) can only be followed by trailers. break; // nothing but trailers after the final RI - - default: - break; } // rules GB9, GB9a - while (processor.CurrentType == GraphemeBreakClass.Extend - || processor.CurrentType == GraphemeBreakClass.ZWJ - || processor.CurrentType == GraphemeBreakClass.SpacingMark) + while (_currentType is GraphemeBreakClass.Extend + or GraphemeBreakClass.ZWJ + or GraphemeBreakClass.SpacingMark) { - processor.MoveNext(); + ReadNextCodepoint(); } Return: - Current = new Grapheme(firstCodepoint, _text.Slice(0, processor.CurrentCodeUnitOffset)); - - _text = _text.Slice(processor.CurrentCodeUnitOffset); + var graphemeLength = _currentCodeUnitOffset - startOffset; + grapheme = new Grapheme(firstCodepoint, startOffset, graphemeLength); return true; // rules GB2, GB999 } - [StructLayout(LayoutKind.Auto)] - private ref struct Processor + private void ReadNextCodepoint() { - private readonly ReadOnlySpan _buffer; - private int _codeUnitLengthOfCurrentScalar; - - internal Processor(ReadOnlySpan buffer) - { - _buffer = buffer; - _codeUnitLengthOfCurrentScalar = 0; - CurrentCodepoint = Codepoint.ReplacementCodepoint; - CurrentType = GraphemeBreakClass.Other; - CurrentCodeUnitOffset = 0; - } - - public int CurrentCodeUnitOffset { get; private set; } - - /// - /// Will be if invalid data or EOF reached. - /// Caller shouldn't need to special-case this since the normal rules will halt on this condition. - /// - public GraphemeBreakClass CurrentType { get; private set; } - - /// - /// Get the currently processed . - /// - public Codepoint CurrentCodepoint { get; private set; } - - public void MoveNext() - { - // For ill-formed subsequences (like unpaired UTF-16 surrogate code points), we rely on - // the decoder's default behavior of interpreting these ill-formed subsequences as - // equivalent to U+FFFD REPLACEMENT CHARACTER. This code point has a boundary property - // of Other (XX), which matches the modifications made to UAX#29, Rev. 35. - // See: https://www.unicode.org/reports/tr29/tr29-35.html#Modifications - // This change is also reflected in the UCD files. For example, Unicode 11.0's UCD file - // https://www.unicode.org/Public/11.0.0/ucd/auxiliary/GraphemeBreakProperty.txt - // has the line "D800..DFFF ; Control # Cs [2048] ..", - // but starting with Unicode 12.0 that line has been removed. - // - // If a later version of the Unicode Standard further modifies this guidance we should reflect - // that here. - - if (CurrentCodeUnitOffset == _buffer.Length) - { - CurrentCodepoint = Codepoint.ReplacementCodepoint; - } - else - { - CurrentCodeUnitOffset += _codeUnitLengthOfCurrentScalar; - - if (CurrentCodeUnitOffset < _buffer.Length) - { - CurrentCodepoint = Codepoint.ReadAt(_buffer, CurrentCodeUnitOffset, - out _codeUnitLengthOfCurrentScalar); - } - else - { - CurrentCodepoint = Codepoint.ReplacementCodepoint; - } - } - - CurrentType = CurrentCodepoint.GraphemeBreakClass; - } + // For ill-formed subsequences (like unpaired UTF-16 surrogate code points), we rely on + // the decoder's default behavior of interpreting these ill-formed subsequences as + // equivalent to U+FFFD REPLACEMENT CHARACTER. This code point has a boundary property + // of Other (XX), which matches the modifications made to UAX#29, Rev. 35. + // See: https://www.unicode.org/reports/tr29/tr29-35.html#Modifications + // This change is also reflected in the UCD files. For example, Unicode 11.0's UCD file + // https://www.unicode.org/Public/11.0.0/ucd/auxiliary/GraphemeBreakProperty.txt + // has the line "D800..DFFF ; Control # Cs [2048] ..", + // but starting with Unicode 12.0 that line has been removed. + // + // If a later version of the Unicode Standard further modifies this guidance we should reflect + // that here. + + _currentCodeUnitOffset += _codeUnitLengthOfCurrentCodepoint; + + _currentCodepoint = Codepoint.ReadAt(_text, _currentCodeUnitOffset, + out _codeUnitLengthOfCurrentCodepoint); + + _currentType = _currentCodepoint.GraphemeBreakClass; } } } diff --git a/src/Skia/Avalonia.Skia/TextShaperImpl.cs b/src/Skia/Avalonia.Skia/TextShaperImpl.cs index def2482af3..e1a6b93692 100644 --- a/src/Skia/Avalonia.Skia/TextShaperImpl.cs +++ b/src/Skia/Avalonia.Skia/TextShaperImpl.cs @@ -52,6 +52,8 @@ namespace Avalonia.Skia var shapedBuffer = new ShapedBuffer(text, bufferLength, typeface, fontRenderingEmSize, bidiLevel); + var targetInfos = shapedBuffer.GlyphInfos; + var glyphInfos = buffer.GetGlyphInfoSpan(); var glyphPositions = buffer.GetGlyphPositionSpan(); @@ -77,9 +79,7 @@ namespace Avalonia.Skia 4 * typeface.GetGlyphAdvance(glyphIndex) * textScale; } - var targetInfo = new Media.TextFormatting.GlyphInfo(glyphIndex, glyphCluster, glyphAdvance, glyphOffset); - - shapedBuffer[i] = targetInfo; + targetInfos[i] = new Media.TextFormatting.GlyphInfo(glyphIndex, glyphCluster, glyphAdvance, glyphOffset); } return shapedBuffer; diff --git a/src/Windows/Avalonia.Direct2D1/Media/TextShaperImpl.cs b/src/Windows/Avalonia.Direct2D1/Media/TextShaperImpl.cs index ac441108e3..ff0fff6b14 100644 --- a/src/Windows/Avalonia.Direct2D1/Media/TextShaperImpl.cs +++ b/src/Windows/Avalonia.Direct2D1/Media/TextShaperImpl.cs @@ -52,6 +52,8 @@ namespace Avalonia.Direct2D1.Media var shapedBuffer = new ShapedBuffer(text, bufferLength, typeface, fontRenderingEmSize, bidiLevel); + var targetInfos = shapedBuffer.GlyphInfos; + var glyphInfos = buffer.GetGlyphInfoSpan(); var glyphPositions = buffer.GetGlyphPositionSpan(); @@ -77,9 +79,7 @@ namespace Avalonia.Direct2D1.Media 4 * typeface.GetGlyphAdvance(glyphIndex) * textScale; } - var targetInfo = new Avalonia.Media.TextFormatting.GlyphInfo(glyphIndex, glyphCluster, glyphAdvance, glyphOffset); - - shapedBuffer[i] = targetInfo; + targetInfos[i] = new Avalonia.Media.TextFormatting.GlyphInfo(glyphIndex, glyphCluster, glyphAdvance, glyphOffset); } return shapedBuffer; diff --git a/tests/Avalonia.Benchmarks/Text/HugeTextLayout.cs b/tests/Avalonia.Benchmarks/Text/HugeTextLayout.cs index c96edbef5c..0adabc75f1 100644 --- a/tests/Avalonia.Benchmarks/Text/HugeTextLayout.cs +++ b/tests/Avalonia.Benchmarks/Text/HugeTextLayout.cs @@ -77,7 +77,17 @@ In respect that the structure of the sufficient amount poses problems and challe public TextLayout BuildEmojisTextLayout() => MakeLayout(Emojis); [Benchmark] - public TextLayout[] BuildManySmallTexts() => _manySmallStrings.Select(MakeLayout).ToArray(); + public TextLayout[] BuildManySmallTexts() + { + var results = new TextLayout[_manySmallStrings.Length]; + + for (var i = 0; i < _manySmallStrings.Length; i++) + { + results[i] = MakeLayout(_manySmallStrings[i]); + } + + return results; + } [Benchmark] public void VirtualizeTextBlocks() diff --git a/tests/Avalonia.UnitTests/HarfBuzzTextShaperImpl.cs b/tests/Avalonia.UnitTests/HarfBuzzTextShaperImpl.cs index baf5ffb07c..0448ecd41f 100644 --- a/tests/Avalonia.UnitTests/HarfBuzzTextShaperImpl.cs +++ b/tests/Avalonia.UnitTests/HarfBuzzTextShaperImpl.cs @@ -52,6 +52,8 @@ namespace Avalonia.UnitTests var shapedBuffer = new ShapedBuffer(text, bufferLength, typeface, fontRenderingEmSize, bidiLevel); + var targetInfos = shapedBuffer.GlyphInfos; + var glyphInfos = buffer.GetGlyphInfoSpan(); var glyphPositions = buffer.GetGlyphPositionSpan(); @@ -77,9 +79,7 @@ namespace Avalonia.UnitTests 4 * typeface.GetGlyphAdvance(glyphIndex) * textScale; } - var targetInfo = new Media.TextFormatting.GlyphInfo(glyphIndex, glyphCluster, glyphAdvance, glyphOffset); - - shapedBuffer[i] = targetInfo; + targetInfos[i] = new Media.TextFormatting.GlyphInfo(glyphIndex, glyphCluster, glyphAdvance, glyphOffset); } return shapedBuffer; diff --git a/tests/Avalonia.UnitTests/MockTextShaperImpl.cs b/tests/Avalonia.UnitTests/MockTextShaperImpl.cs index b5f4777192..b810caabd9 100644 --- a/tests/Avalonia.UnitTests/MockTextShaperImpl.cs +++ b/tests/Avalonia.UnitTests/MockTextShaperImpl.cs @@ -13,6 +13,7 @@ namespace Avalonia.UnitTests var fontRenderingEmSize = options.FontRenderingEmSize; var bidiLevel = options.BidiLevel; var shapedBuffer = new ShapedBuffer(text, text.Length, typeface, fontRenderingEmSize, bidiLevel); + var targetInfos = shapedBuffer.GlyphInfos; var textSpan = text.Span; var textStartIndex = TextTestHelper.GetStartCharIndex(text); @@ -26,7 +27,7 @@ namespace Avalonia.UnitTests for (var j = 0; j < count; ++j) { - shapedBuffer[i + j] = new GlyphInfo(glyphIndex, glyphCluster, 10); + targetInfos[i + j] = new GlyphInfo(glyphIndex, glyphCluster, 10); } i += count;