A cross-platform UI framework for .NET
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1701 lines
58 KiB

using System;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
namespace Avalonia.Media.TextFormatting.Unicode
{
public ref struct LineBreakEnumerator
{
private const char DotCircle = '\u25CC';
private static readonly BreakUnit s_sot = new() { StartOfText = true };
private static readonly BreakUnit s_eot = new() { EndOfText = true };
public readonly ReadOnlySpan<char> _text;
private readonly LineBreakState _state;
public LineBreakEnumerator(ReadOnlySpan<char> text)
{
_text = text;
_state = new LineBreakState();
}
public readonly bool MoveNext([NotNullWhen(true)] out LineBreak lineBreak)
{
lineBreak = default;
if (_state.Current.EndOfText)
{
return false;
}
LineBreak? result = null;
while (result == null)
{
_state.Read(_text);
result = ExecuteRules(_text, _state);
}
if (result == null)
{
return false;
}
lineBreak = result.Value;
return true;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsBreakClass(LineBreakClass cls)
{
const ulong mask =
(1UL << (int)LineBreakClass.MandatoryBreak) |
(1UL << (int)LineBreakClass.LineFeed) |
(1UL << (int)LineBreakClass.CarriageReturn) |
(1UL << (int)LineBreakClass.NextLine);
return ((1UL << (int)cls) & mask) != 0UL;
}
private static LineBreak GetLineBreak(ReadOnlySpan<char> text, LineBreakState state, bool isRequired)
{
var positionMeasure = state.Current.Start + state.Current.Length;
var positionWrap = positionMeasure;
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Space:
case LineBreakClass.CarriageReturn:
case LineBreakClass.LineFeed:
{
if (state.Previous.LineBreakClass == LineBreakClass.CarriageReturn)
{
positionMeasure = FindPriorNonWhitespace(text, state.Previous.Start);
}
else
{
positionMeasure = FindPriorNonWhitespace(text, positionMeasure);
}
break;
}
}
return new LineBreak(positionMeasure, positionWrap, isRequired);
}
private static int FindPriorNonWhitespace(ReadOnlySpan<char> text, int from)
{
if (from > 0)
{
var cp = Codepoint.ReadAt(text, from - 1, out var count);
var cls = cp.LineBreakClass;
if (IsBreakClass(cls))
{
from -= count;
}
}
while (from > 0)
{
var cp = Codepoint.ReadAt(text, from - 1, out var count);
var cls = cp.LineBreakClass;
if (cls == LineBreakClass.Space)
{
from -= count;
}
else
{
break;
}
}
return from;
}
private static LineBreak? ExecuteRules(ReadOnlySpan<char> text, LineBreakState state)
{
foreach (var rule in s_rules)
{
var res = rule.Invoke(text, state);
switch (res)
{
case RuleResult.Pass:
continue;
case RuleResult.NoBreak:
return null;
case RuleResult.MayBreak:
case RuleResult.MustBreak:
return GetLineBreak(text, state, IsBreakClass(state.Current.LineBreakClass));
default:
throw new InvalidOperationException("Invalid state.");
}
}
return null;
}
private static RuleResult QuotationAndRegionalIndicator(ReadOnlySpan<char> text, LineBreakState state)
{
if (state.Current.Inherited)
{
return RuleResult.Pass;
}
if (state.Current.LineBreakClass == LineBreakClass.RegionalIndicator)
{
if(++state.RegionalIndicator % 2 == 0)
{
state.RegionalIndicator = 0;
}
}
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Quotation:
{
if (++state.Quotation % 2 == 0)
{
state.Quotation -= 2;
}
break;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB3: Always break at the end of text.
/// </summary>
private static RuleResult LB03(ReadOnlySpan<char> text, LineBreakState state)
{
if (state.Current.EndOfText)
{
return RuleResult.MustBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB4: Always break after hard line breaks.
/// </summary>
private static RuleResult LB04(ReadOnlySpan<char> text, LineBreakState state)
{
// BK !
if (state.Current.LineBreakClass == LineBreakClass.MandatoryBreak)
{
return RuleResult.MustBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB5: Treat CR followed by LF, as well as CR, LF, and NL as hard line
/// </summary>
private static RuleResult LB05(ReadOnlySpan<char> text, LineBreakState state)
{
switch (state.Current.LineBreakClass)
{
case LineBreakClass.CarriageReturn:
if (state.Next(text).LineBreakClass == LineBreakClass.LineFeed)
{
return RuleResult.NoBreak; // CR × LF
}
return RuleResult.MustBreak; // CR !
case LineBreakClass.LineFeed: // LF !
case LineBreakClass.NextLine: // NL !
return RuleResult.MustBreak;
default:
return RuleResult.Pass;
}
}
/// <summary>
/// LB6: Do not break before hard line breaks.
/// </summary>
/// <returns></returns>
private static RuleResult LB06(ReadOnlySpan<char> text, LineBreakState state)
{
// × ( BK | CR | LF | NL )
if (IsBreakClass(state.Next(text).LineBreakClass))
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB7: Do not break before spaces or zero width space.
/// </summary>
private static RuleResult LB07(ReadOnlySpan<char> text, LineBreakState state)
{
// × SP
// × ZW
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.Space:
case LineBreakClass.ZWSpace:
{
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB8: Break before any character following a zero-width space, even if one or more spaces intervene.
/// </summary>
private static RuleResult LB08(ReadOnlySpan<char> text, LineBreakState state)
{
if (state.LastBeforeSpace.LineBreakClass == LineBreakClass.ZWSpace && state.Next(text).LineBreakClass != LineBreakClass.Space)
{
return RuleResult.MayBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB8a: Do not break after a zero width joiner.
/// </summary>
private static RuleResult LB08a(ReadOnlySpan<char> text, LineBreakState state)
{
// ZWJ ×
if (state.Current.LineBreakClass == LineBreakClass.ZWJ)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB9: Do not break a combining character sequence;
/// treat it as if it has the line breaking class of the base character in all of the following rules.
/// Treat ZWJ as if it were CM.
/// </summary>
private static RuleResult LB09(ReadOnlySpan<char> text, LineBreakState state)
{
// Treat X (CM | ZWJ)* as if it were X.
// where X is any line break class except BK, CR, LF, NL, SP, or ZW.
var cls = state.Current.LineBreakClass;
if (IsBreakClass(cls) || cls == LineBreakClass.Space || cls == LineBreakClass.ZWSpace)
{
return RuleResult.Pass;
}
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.CombiningMark:
case LineBreakClass.ZWJ:
{
state.IgnoreNext(text);
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB10: Treat any remaining combining mark or ZWJ as AL.
/// </summary>
private static RuleResult LB10(ReadOnlySpan<char> text, LineBreakState state)
{
if (state.Current.LineBreakClass == LineBreakClass.CombiningMark)
{
state.Current = state.Current with { LineBreakClass = LineBreakClass.Alphabetic, Inherited = true };
}
var next = state.Next(text);
if (next.LineBreakClass == LineBreakClass.CombiningMark)
{
state.ReplaceNext(next with { LineBreakClass = LineBreakClass.Alphabetic, Inherited = true });
}
return RuleResult.Pass;
}
/// <summary>
/// LB11: Do not break before or after Word joiner and related characters.
/// </summary>
private static RuleResult LB11(ReadOnlySpan<char> text, LineBreakState state)
{
if (state.Next(text).LineBreakClass == LineBreakClass.WordJoiner /* × WJ */
|| state.Current.LineBreakClass == LineBreakClass.WordJoiner /* WJ × */)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB12: Do not break after NBSP and related characters.
/// </summary>
private static RuleResult LB12(ReadOnlySpan<char> text, LineBreakState state)
{
// GL ×
if (state.Current.LineBreakClass == LineBreakClass.Glue)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB12a: Do not break before NBSP and related characters, except after spaces and hyphens.
/// </summary>
private static RuleResult LB12a(ReadOnlySpan<char> text, LineBreakState state)
{
// [^SP BA HY] × GL
if (state.Next(text).LineBreakClass == LineBreakClass.Glue)
{
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Space:
case LineBreakClass.BreakAfter:
case LineBreakClass.Hyphen:
return RuleResult.Pass;
default:
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB13: Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
/// </summary>
private static RuleResult LB13(ReadOnlySpan<char> text, LineBreakState state)
{
// × CL
// × CP
// × EX
// × SY
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.ClosePunctuation:
case LineBreakClass.CloseParenthesis:
case LineBreakClass.Exclamation:
case LineBreakClass.BreakSymbols:
return RuleResult.NoBreak;
default:
return RuleResult.Pass;
}
}
/// <summary>
/// LB14: Do not break after ‘[’, even after spaces.
/// </summary>
private static RuleResult LB14(ReadOnlySpan<char> text, LineBreakState state)
{
// OP SP* ×
if (state.LastBeforeWhitespace.LineBreakClass == LineBreakClass.OpenPunctuation)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB15a: Do not break after an unresolved initial punctuation that lies at the start of the line,
/// after a space, after opening punctuation, or after an unresolved quotation mark, even after spaces.
/// </summary>
private static RuleResult LB15a(ReadOnlySpan<char> text, LineBreakState state)
{
// (sot | BK | CR | LF | NL | OP | QU | GL | SP | ZW) [\p{Pi}&QU] SP* ×
if (state.Quotation > 0 && state.LastBeforeWhitespace.Codepoint.GeneralCategory == GeneralCategory.InitialPunctuation &&
state.LastBeforeWhitespace.LineBreakClass == LineBreakClass.Quotation)
{
//at the start of the line
if (state.Current.StartOfText)
{
return RuleResult.NoBreak;
}
//at the start of the line
if (IsBreakClass(state.Previous.LineBreakClass))
{
return RuleResult.NoBreak;
}
if (state.Current.Inherited)
{
//LineBreakClass.Glue
return RuleResult.NoBreak;
}
//after a space
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Glue:
case LineBreakClass.Space:
case LineBreakClass.ZWSpace:
return RuleResult.NoBreak;
}
//after a space
switch (state.Previous.LineBreakClass)
{
case LineBreakClass.Glue:
case LineBreakClass.Space:
case LineBreakClass.ZWSpace:
return RuleResult.NoBreak;
}
// after opening punctuation
switch (LineBreakState.Before(text, state.LastBeforeWhitespace).LineBreakClass)
{
case LineBreakClass.OpenPunctuation:
return RuleResult.NoBreak;
}
// after an unresolved quotation mark
if (state.Quotation - 1 > 0)
{
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB15b: Do not break before an unresolved final punctuation that lies at the end of the line,
/// before a space, before a prohibited break, or before an unresolved quotation mark, even after spaces.
/// </summary>
private static RuleResult LB15b(ReadOnlySpan<char> text, LineBreakState state)
{
// × [\p{Pf}&QU] ( SP | GL | WJ | CL | QU | CP | EX | IS | SY | BK | CR | LF | NL | ZW | eot)
if (state.Next(text).Codepoint.GeneralCategory == GeneralCategory.FinalPunctuation && (state.Next(text).LineBreakClass == LineBreakClass.Quotation))
{
var after = LineBreakState.After(text, state.Next(text));
if (after.EndOfText)
{ // Only on eot
return RuleResult.NoBreak;
}
if (IsBreakClass(after.LineBreakClass))
{
return RuleResult.NoBreak;
}
switch (after.LineBreakClass)
{
case LineBreakClass.Space:
case LineBreakClass.Glue:
case LineBreakClass.WordJoiner:
case LineBreakClass.ClosePunctuation:
case LineBreakClass.Quotation:
case LineBreakClass.CloseParenthesis:
case LineBreakClass.Exclamation:
case LineBreakClass.InfixNumeric:
case LineBreakClass.BreakSymbols:
case LineBreakClass.ZWSpace:
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB15c: Break before a decimal mark that follows a space, for instance, in ‘subtract .5’.
/// </summary>
private static RuleResult LB15c(ReadOnlySpan<char> text, LineBreakState state)
{
// SP ÷ IS NU
if (state.Current.LineBreakClass == LineBreakClass.Space)
{
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.InfixNumeric when LineBreakState.After(text, state.Next(text)).LineBreakClass == LineBreakClass.Numeric:
{
return RuleResult.MayBreak;
}
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB15d: Otherwise, do not break before ‘;’, ‘,’, or ‘.’, even after spaces.
/// </summary>
private static RuleResult LB15d(ReadOnlySpan<char> text, LineBreakState state)
{
// × IS
if (state.Next(text).LineBreakClass == LineBreakClass.InfixNumeric)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB16: Do not break between closing punctuation and a nonstarter (lb=NS),
/// even with intervening spaces.
/// </summary>
private static RuleResult LB16(ReadOnlySpan<char> text, LineBreakState state)
{
switch (state.LastBeforeWhitespace.LineBreakClass)
{
case LineBreakClass.ClosePunctuation:
case LineBreakClass.CloseParenthesis:
{
var classAfterSpaces = LineBreakState.ClassAfterSpaces(text, state.Current);
switch (classAfterSpaces)
{
case LineBreakClass.ConditionalJapaneseStarter:
case LineBreakClass.Nonstarter:
{
return RuleResult.NoBreak;
}
}
break;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB17: Do not break within ‘——’, even with intervening spaces.
/// </summary>
private static RuleResult LB17(ReadOnlySpan<char> text, LineBreakState state)
{
// B2 SP* × B2
if (state.LastBeforeWhitespace.LineBreakClass == LineBreakClass.BreakBoth)
{
if (LineBreakState.ClassAfterSpaces(text, state.Current) == LineBreakClass.BreakBoth)
{
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB18: Break after spaces.
/// </summary>
private static RuleResult LB18(ReadOnlySpan<char> text, LineBreakState state)
{
// SP ÷
if (state.Current.LineBreakClass == LineBreakClass.Space)
{
return RuleResult.MayBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB19: Do not break before or after quotation marks, such as ‘ ” ’.
/// </summary>
private static RuleResult LB19(ReadOnlySpan<char> text, LineBreakState state)
{
var next = state.Next(text);
if (next.LineBreakClass == LineBreakClass.Quotation)
{
// × [QU - \p{Pi}]
if (next.Codepoint.GeneralCategory != GeneralCategory.InitialPunctuation)
{
return RuleResult.NoBreak;
}
//[^$EastAsian] × QU
if (!state.LastBeforeWhitespace.Codepoint.IsEastAsian)
{
return RuleResult.NoBreak;
}
var after = LineBreakState.After(text, next);
//× QU ( [^$EastAsian] | eot )
if(!after.Codepoint.IsEastAsian || after.EndOfText)
{
return RuleResult.NoBreak;
}
}
// [QU - \p{Pf}] ×
if (state.LastBeforeWhitespace.LineBreakClass == LineBreakClass.Quotation && state.LastBeforeWhitespace.Codepoint.GeneralCategory != GeneralCategory.InitialPunctuation)
{
return RuleResult.NoBreak;
}
if(state.LastBeforeSpace.LineBreakClass == LineBreakClass.Quotation)
{
//QU × [^$EastAsian]
if (!state.Next(text).Codepoint.IsEastAsian)
{
return RuleResult.NoBreak;
}
var before = LineBreakState.Before(text, state.LastBeforeSpace);
//( sot | [^$EastAsian] ) QU ×
if (before.StartOfText || !before.Codepoint.IsEastAsian)
{
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB20: Break before and after unresolved CB.
/// </summary>
private static RuleResult LB20(ReadOnlySpan<char> text, LineBreakState state)
{
// ÷ CB
// CB ÷
if ((state.Current.LineBreakClass == LineBreakClass.ContingentBreak) || (state.Next(text).LineBreakClass == LineBreakClass.ContingentBreak))
{
return RuleResult.MayBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB20a: Do not break after a word-initial hyphen.
/// </summary>
private static RuleResult LB20a(ReadOnlySpan<char> text, LineBreakState state)
{
// (sot | BK | CR | LF | NL | SP | ZW | CB | GL)(HY | [\u2010]) × AL
if (IsMatch(state.Previous) && state.Next(text).LineBreakClass == LineBreakClass.Alphabetic)
{
if (state.LastBeforeWhitespace.LineBreakClass == LineBreakClass.Hyphen || state.LastBeforeWhitespace.Codepoint.Value == 8208)
{
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
static bool IsMatch(BreakUnit unit)
{
if (unit.StartOfText)
{
return true;
}
if (IsBreakClass(unit.LineBreakClass))
{
return true;
}
switch (unit.LineBreakClass)
{
case LineBreakClass.Space:
case LineBreakClass.ZWSpace:
case LineBreakClass.ContingentBreak:
case LineBreakClass.Glue:
return true;
}
return false;
}
}
/// <summary>
/// LB21: Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana, and other non-starters, or after acute accents.
/// </summary>
private static RuleResult LB21(ReadOnlySpan<char> text, LineBreakState state)
{
// × (BA | HY | NS)
switch (state.Next(text).LineBreakClass)
{
// [21.01]
case LineBreakClass.BreakAfter:
// [21.01]
case LineBreakClass.Hyphen:
// [21.01]
case LineBreakClass.Nonstarter:
return RuleResult.NoBreak;
}
// [21.04] BB ×
if (state.Current.LineBreakClass == LineBreakClass.BreakBefore)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB21a: Don't break after Hebrew + Hyphen.
/// </summary>
private static RuleResult LB21a(ReadOnlySpan<char> text, LineBreakState state)
{
if(state.Next(text).LineBreakClass != LineBreakClass.HebrewLetter)
{
// [21.1] HL(HY|NonEastAsianBA) × [^HL]
if (state.Previous.LineBreakClass == LineBreakClass.HebrewLetter
&& state.Current.LineBreakClass is LineBreakClass.Hyphen or LineBreakClass.BreakAfter && !state.Current.Codepoint.IsEastAsian)
{
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB21b: Don’t break between Solidus and Hebrew letters.
/// </summary>
private static RuleResult LB21b(ReadOnlySpan<char> text, LineBreakState state)
{
// [21.2] SY × HL
if ((state.Current.LineBreakClass == LineBreakClass.BreakSymbols) && (state.Next(text).LineBreakClass == LineBreakClass.HebrewLetter))
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB22: Do not break before ellipses.
/// </summary>
private static RuleResult LB22(ReadOnlySpan<char> text, LineBreakState state)
{
// × IN
if (state.Next(text).LineBreakClass == LineBreakClass.Inseparable)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB23: Do not break between digits and letters.
/// </summary>
private static RuleResult LB23(ReadOnlySpan<char> text, LineBreakState state)
{
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Alphabetic:
case LineBreakClass.HebrewLetter:
{
// (AL | HL) × NU
if (state.Next(text).LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
case LineBreakClass.Numeric:
{
// NU × (AL | HL)
if (state.Next(text).LineBreakClass is LineBreakClass.Alphabetic or LineBreakClass.HebrewLetter)
{
return RuleResult.NoBreak;
}
break;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB23a: Do not break between numeric prefixes and ideographs, or between
/// ideographs and numeric postfixes.
/// </summary>
private static RuleResult LB23a(ReadOnlySpan<char> text, LineBreakState state)
{
// PR × (ID | EB | EM)
if ((state.Current.LineBreakClass == LineBreakClass.PrefixNumeric)
&& IsMatch(state.Next(text).LineBreakClass))
{
return RuleResult.NoBreak;
}
// (ID | EB | EM) × PO
if ((state.Next(text).LineBreakClass == LineBreakClass.PostfixNumeric)
&& IsMatch(state.Current.LineBreakClass))
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
static bool IsMatch(LineBreakClass cls)
{
switch (cls)
{
case LineBreakClass.Ideographic:
case LineBreakClass.EBase:
case LineBreakClass.EModifier:
return true;
}
return false;
}
}
/// <summary>
/// LB24: Do not break between numeric prefix/postfix and letters, or between
/// letters and prefix/postfix.
/// </summary>
private static RuleResult LB24(ReadOnlySpan<char> text, LineBreakState state)
{
// (PR | PO) × (AL | HL)
if (state.Current.LineBreakClass is LineBreakClass.PrefixNumeric or LineBreakClass.PostfixNumeric
&& state.Next(text).LineBreakClass is LineBreakClass.Alphabetic or LineBreakClass.HebrewLetter)
{
return RuleResult.NoBreak;
}
// (AL | HL) × (PR | PO)
if (state.Current.LineBreakClass is LineBreakClass.Alphabetic or LineBreakClass.HebrewLetter
&& state.Next(text).LineBreakClass is LineBreakClass.PrefixNumeric or LineBreakClass.PostfixNumeric)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB25: Do not break between the following pairs of classes relevant to numbers
/// </summary>
private static RuleResult LB25(ReadOnlySpan<char> text, LineBreakState state)
{
switch (state.Next(text).LineBreakClass)
{
// [25.06] NU(SY|IS)* x PR
case LineBreakClass.PrefixNumeric:
{
switch (state.Current.LineBreakClass)
{
// [25.04] NU(SY|IS)* CP × PR
case LineBreakClass.CloseParenthesis:
{
switch (state.Previous.LineBreakClass)
{
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
case LineBreakClass.BreakSymbols:
case LineBreakClass.InfixNumeric:
{
if (LineBreakState.Before(text, state.Previous).LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
}
break;
}
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
case LineBreakClass.BreakSymbols:
case LineBreakClass.InfixNumeric:
{
if (state.Previous.LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
// [25.03] NU(SY|IS)* CL × PR
case LineBreakClass.ClosePunctuation:
{
switch (state.Previous.LineBreakClass)
{
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
case LineBreakClass.BreakSymbols:
case LineBreakClass.InfixNumeric:
{
if (state.Previous.LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
}
break;
}
}
break;
}
// [25.15] NU(SY|IS)* × NU
case LineBreakClass.Numeric:
{
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
case LineBreakClass.BreakSymbols:
case LineBreakClass.InfixNumeric:
{
if (state.Previous.LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
}
break;
}
case LineBreakClass.PostfixNumeric:
{
switch (state.Current.LineBreakClass)
{
// [25.01] NU(SY|IS)* CL × PO
case LineBreakClass.ClosePunctuation:
{
switch (state.Previous.LineBreakClass)
{
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
case LineBreakClass.BreakSymbols:
case LineBreakClass.InfixNumeric:
{
if (state.Previous.LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
}
break;
}
// [25.05] NU(SY|IS)* × PO
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
case LineBreakClass.BreakSymbols:
case LineBreakClass.InfixNumeric:
{
if (state.Previous.LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
}
break;
}
}
if (state.Current.LineBreakClass == LineBreakClass.PrefixNumeric)
{
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.OpenPunctuation:
{
var afterNext = LineBreakState.After(text, state.Next(text));
// [25.1] PR × OP NU
if (afterNext.LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
// PR × OP IS NU
if (afterNext.LineBreakClass == LineBreakClass.InfixNumeric && LineBreakState.After(text, afterNext).LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
// PR × NU
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
}
}
if (state.Current.LineBreakClass == LineBreakClass.PostfixNumeric)
{
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.OpenPunctuation:
{
var afterNext = LineBreakState.After(text, state.Next(text));
// PO × OP NU
if (afterNext.LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
// PO × OP IS NU
if (afterNext.LineBreakClass == LineBreakClass.InfixNumeric && LineBreakState.After(text, afterNext).LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
// PO × NU
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
}
}
switch (state.Current.LineBreakClass)
{
// HY × NU
// [25.14] IS × NU
case LineBreakClass.Hyphen:
case LineBreakClass.InfixNumeric:
{
if (state.Next(text).LineBreakClass == LineBreakClass.Numeric)
{
return RuleResult.NoBreak;
}
break;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB26: Do not break a Korean syllable.
/// </summary>
private static RuleResult LB26(ReadOnlySpan<char> text, LineBreakState state)
{
switch (state.Current.LineBreakClass)
{
case LineBreakClass.JL:
{
// JL × (JL | JV | H2 | H3)
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.JL:
case LineBreakClass.JV:
case LineBreakClass.H2:
case LineBreakClass.H3:
return RuleResult.NoBreak;
}
break;
}
case LineBreakClass.JV:
case LineBreakClass.H2:
{
// (JV | H2) × (JV | JT)
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.JV:
case LineBreakClass.JT:
return RuleResult.NoBreak;
}
break;
}
case LineBreakClass.JT:
case LineBreakClass.H3:
{
// (JT | H3) × JT
if (state.Next(text).LineBreakClass == LineBreakClass.JT)
{
return RuleResult.NoBreak;
}
break;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB27: Treat a Korean Syllable Block the same as ID.
/// </summary>
private static RuleResult LB27(ReadOnlySpan<char> text, LineBreakState state)
{
switch (state.Current.LineBreakClass)
{
case LineBreakClass.JL:
case LineBreakClass.JV:
case LineBreakClass.JT:
case LineBreakClass.H2:
case LineBreakClass.H3:
{
// (JL | JV | JT | H2 | H3) × PO
if (state.Next(text).LineBreakClass == LineBreakClass.PostfixNumeric)
{
return RuleResult.NoBreak;
}
break;
}
case LineBreakClass.PrefixNumeric:
{
// PR × (JL | JV | JT | H2 | H3)
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.JL:
case LineBreakClass.JV:
case LineBreakClass.JT:
case LineBreakClass.H2:
case LineBreakClass.H3:
return RuleResult.NoBreak;
}
break;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB28: Do not break between alphabetics (“at”).
/// </summary>
private static RuleResult LB28(ReadOnlySpan<char> text, LineBreakState state)
{
// [28.0] (AL | HL) × (AL | HL)
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Alphabetic:
case LineBreakClass.HebrewLetter:
{
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.Alphabetic:
case LineBreakClass.HebrewLetter:
{
return RuleResult.NoBreak;
}
}
break;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB28a: Do not break inside the orthographic syllables of Brahmic scripts.
/// </summary>
private static RuleResult LB28a(ReadOnlySpan<char> text, LineBreakState state)
{
// [28.11] AP × (AK | DottedCircle | AS)
if ((state.Current.LineBreakClass == LineBreakClass.AksaraPrebase) && isMatch(state.Next(text)))
{
return RuleResult.NoBreak;
}
// [28.12] (AK | DottedCircle | AS) × (VF | VI)
if (isMatch(state.LastBeforeWhitespace)
&& ((state.Next(text).LineBreakClass == LineBreakClass.ViramaFinal) || (state.Next(text).LineBreakClass == LineBreakClass.Virama)))
{
return RuleResult.NoBreak;
}
// [28.13] (AK | DottedCircle| AS) VI × (AK | DottedCircle)
if (isMatch(state.Previous)
&& state.Current.LineBreakClass == LineBreakClass.Virama
&& ((state.Next(text).LineBreakClass == LineBreakClass.Aksara) || (state.Next(text).Codepoint == DotCircle)))
{
return RuleResult.NoBreak;
}
// [28.14] (AK | DottedCircle | AS) × (AK | DottedCircle | AS) VF
if (isMatch(state.LastBeforeWhitespace)
&& isMatch(state.Next(text))
&& (LineBreakState.After(text, state.Next(text)).LineBreakClass == LineBreakClass.ViramaFinal))
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
// (AK | DottedCircle | AS)
static bool isMatch(BreakUnit chr)
{
return (chr.LineBreakClass == LineBreakClass.Aksara) || (chr.Codepoint == DotCircle) || (chr.LineBreakClass == LineBreakClass.AksaraStart);
}
}
/// <summary>
/// LB29: Do not break between numeric punctuation and alphabetics (“e.g.”).
/// </summary>
private static RuleResult LB29(ReadOnlySpan<char> text, LineBreakState state)
{
// IS × (AL | HL)
if ((state.Current.LineBreakClass == LineBreakClass.InfixNumeric)
&& ((state.Next(text).LineBreakClass == LineBreakClass.Alphabetic) || (state.Next(text).LineBreakClass == LineBreakClass.HebrewLetter)))
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB30: Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses.
/// </summary>
private static RuleResult LB30(ReadOnlySpan<char> text, LineBreakState state)
{
switch (state.Current.LineBreakClass)
{
case LineBreakClass.Alphabetic:
case LineBreakClass.HebrewLetter:
case LineBreakClass.Numeric:
{
var next = state.Next(text);
// (AL | HL | NU) × [OP-[\p{ea=F}\p{ea=W}\p{ea=H}]]
if ((next.LineBreakClass == LineBreakClass.OpenPunctuation) && !next.Codepoint.IsEastAsian)
{
return RuleResult.NoBreak;
}
break;
}
case LineBreakClass.CloseParenthesis:
// [CP-[\p{ea=F}\p{ea=W}\p{ea=H}]] × (AL | HL | NU)
if (!state.Current.Codepoint.IsEastAsian)
{
switch (state.Next(text).LineBreakClass)
{
case LineBreakClass.Alphabetic:
case LineBreakClass.HebrewLetter:
case LineBreakClass.Numeric:
{
return RuleResult.NoBreak;
}
}
}
break;
}
return RuleResult.Pass;
}
/// <summary>
/// LB30a: Break between two regional indicator symbols if and only if there
/// are an even number of regional indicators preceding the position of the
/// break.
/// </summary>
/// <returns></returns>
private static RuleResult LB30a(ReadOnlySpan<char> text, LineBreakState state)
{
if (state.RegionalIndicator > 0 && state.Next(text).LineBreakClass == LineBreakClass.RegionalIndicator)
{
if (state.RegionalIndicator + 1 == 2)
{
return RuleResult.NoBreak;
}
}
return RuleResult.Pass;
}
/// <summary>
/// LB30b: Do not break between an emoji base (or potential emoji) and an emoji modifier.
/// </summary>
/// <returns></returns>
private static RuleResult LB30b(ReadOnlySpan<char> text, LineBreakState state)
{
// EB × EM
if ((state.Current.LineBreakClass == LineBreakClass.EBase) && (state.Next(text).LineBreakClass == LineBreakClass.EModifier))
{
return RuleResult.NoBreak;
}
// [\p{Extended_Pictographic}&&\p{Cn}] × EM
//
// The Extended_Pictographic property is used to customize segmentation (as
// described in [UAX29] and [UAX14]) so that possible future emoji ZWJ
// sequences will not break grapheme clusters, words, or lines. Unassigned
// codepoints with Line_Break=ID in some blocks are also assigned the
// Extended_Pictographic property. Those blocks are intended for future
// allocation of emoji characters.
if (state.Next(text).LineBreakClass == LineBreakClass.EModifier &&
state.Current.Codepoint.GraphemeBreakClass == GraphemeBreakClass.ExtendedPictographic &&
state.Current.Codepoint.GeneralCategory == GeneralCategory.Unassigned)
{
return RuleResult.NoBreak;
}
return RuleResult.Pass;
}
/// <summary>
/// LB31: Break everywhere else.
/// </summary>
private static RuleResult LB31(ReadOnlySpan<char> text, LineBreakState state)
{
return RuleResult.MayBreak;
}
private enum RuleResult
{
Pass,
NoBreak,
MayBreak,
MustBreak
}
private readonly struct BreakUnit
{
public BreakUnit()
{
LineBreakClass = LineBreakClass.Unknown;
}
public BreakUnit(BreakUnit other, LineBreakClass lineBreakClass)
{
Codepoint = other.Codepoint;
Start = other.Start;
Length = other.Length;
LineBreakClass = lineBreakClass;
EndOfText = other.EndOfText;
}
public BreakUnit(Codepoint codepoint, int start, int length)
{
Codepoint = codepoint;
Start = start;
Length = length;
LineBreakClass = MapClass(codepoint);
}
public int Start { get; }
public int Length { get; }
public Codepoint Codepoint { get; }
public bool EndOfText { get; init; }
public bool StartOfText { get; init; }
public LineBreakClass LineBreakClass { get; init; }
public bool Ignored { get; init; }
public bool Inherited { get; init; }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static LineBreakClass MapClass(Codepoint cp)
{
if (cp.Value == 327685)
{
return LineBreakClass.Alphabetic;
}
if (cp.Value == 327685)
{
return LineBreakClass.Alphabetic;
}
// LB 1
// ==========================================
// Resolved Original General_Category
// ==========================================
// AL AI, SG, XX Any
// CM SA Only Mn or Mc
// AL SA Any except Mn and Mc
// NS CJ Any
var cls = cp.LineBreakClass;
const ulong specialMask =
(1UL << (int)LineBreakClass.Ambiguous) |
(1UL << (int)LineBreakClass.Surrogate) |
(1UL << (int)LineBreakClass.Unknown) |
(1UL << (int)LineBreakClass.ComplexContext) |
(1UL << (int)LineBreakClass.ConditionalJapaneseStarter);
if (((1UL << (int)cls) & specialMask) != 0UL)
{
switch (cls)
{
case LineBreakClass.Ambiguous:
case LineBreakClass.Surrogate:
case LineBreakClass.Unknown:
return LineBreakClass.Alphabetic;
case LineBreakClass.ComplexContext:
return cp.GeneralCategory is GeneralCategory.NonspacingMark or GeneralCategory.SpacingMark
? LineBreakClass.CombiningMark
: LineBreakClass.Alphabetic;
case LineBreakClass.ConditionalJapaneseStarter:
return LineBreakClass.Nonstarter;
}
}
return cls;
}
}
private class LineBreakState
{
private BreakUnit? _next;
private BreakUnit _previous;
public LineBreakState()
{
_next = null;
_previous = s_sot;
Current = s_sot;
LastBeforeSpace = s_sot;
LastBeforeWhitespace = s_sot;
}
public BreakUnit Current { get; set; }
public BreakUnit Previous
{
get
{
if (_previous.Ignored || _previous.Inherited)
{
_previous = LastBeforeWhitespace;
}
return _previous;
}
}
public BreakUnit Next(ReadOnlySpan<char> text)
{
return _next ??= Peek(text);
}
public static BreakUnit After(ReadOnlySpan<char> text, BreakUnit current)
{
if (current.EndOfText)
{
return s_eot;
}
return PeekAt(text, current.Start + current.Length);
}
public static BreakUnit Before(ReadOnlySpan<char> text, BreakUnit current)
{
if (current.StartOfText)
{
return s_sot;
}
var position = current.Start - 1;
var unit = PeekAt(text, position);
return unit;
}
public void IgnoreNext(ReadOnlySpan<char> text)
{
_next = Next(text) with { Ignored = true };
}
public void ReplaceNext(BreakUnit next)
{
_next = next;
}
public int Position { get; private set; }
public int RegionalIndicator { get; set; }
public int Quotation { get; set; }
public BreakUnit LastBeforeWhitespace { get; set; }
public BreakUnit LastBeforeSpace { get; set; }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BreakUnit PeekAt(ReadOnlySpan<char> text, int index)
{
if (text.Length == 0)
{
return new BreakUnit(Codepoint.ReplacementCodepoint, index, 0)
{
StartOfText = true
};
}
if (index >= text.Length)
{
return s_eot;
}
var codepoint = Codepoint.ReadAt(text, index, out var count);
return new BreakUnit(codepoint, index, count)
{
EndOfText = index + count == text.Length,
StartOfText = index == 0
};
}
public BreakUnit Peek(ReadOnlySpan<char> text)
{
return PeekAt(text, Position);
}
public BreakUnit Read(ReadOnlySpan<char> text)
{
_previous = Current;
var next = Next(text);
if (!next.Codepoint.IsWhiteSpace)
{
LastBeforeWhitespace = next;
}
if (next.LineBreakClass != LineBreakClass.Space)
{
LastBeforeSpace = next;
}
Current = next;
Position += next.Length;
_next = null;
if (Current.Ignored)
{
Current = Current with { LineBreakClass = Previous.LineBreakClass, Inherited = true };
}
return next;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static LineBreakClass ClassAfterSpaces(ReadOnlySpan<char> text, BreakUnit current)
{
var position = current.Start + current.Length;
if (position >= text.Length)
{
return current.LineBreakClass;
}
var enumerator = new CodepointEnumerator(text.Slice(position));
Codepoint cp;
while (enumerator.MoveNext(out cp) && cp.LineBreakClass == LineBreakClass.Space)
{ }
return cp.LineBreakClass;
}
}
private delegate RuleResult BreakUnitDelegate(ReadOnlySpan<char> text, LineBreakState state);
private static readonly BreakUnitDelegate[] s_rules = [
QuotationAndRegionalIndicator,
LB03,
LB04,
LB05,
LB06,
LB07,
LB08,
LB08a,
LB09,
LB10,
LB11,
LB12,
LB12a,
LB13,
LB14,
LB15a,
LB15b,
LB15c,
LB15d,
LB16,
LB17,
LB18,
LB19,
LB20,
LB20a,
LB21a, // Must be before LB21
LB21,
LB21b,
LB22,
LB23,
LB23a,
LB24,
LB25,
LB26,
LB27,
LB28,
LB28a,
LB29,
LB30,
LB30a,
LB30b,
LB31,
];
}
}