tannergooding/Lexer.cs

## Lexer.cs
// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Globalization;
using System.Text;
using TerraFX.CodeAnalysis.Source;
using TerraFX.CodeAnalysis.Tokens;

namespace TerraFX.CodeAnalysis
{
    /// <summary>Defines a lexical analyzer which tokenizes a source text.</summary>
    public sealed partial class Lexer : IEnumerator<Token>
    {
        private Token _currentToken;
        private nuint _sourceTextIndex;

        private Lexer(SourceText sourceText)
        {
            SourceText = sourceText;
            Reset();
        }

        /// <inheritdoc />
        public Token Current => _currentToken;

        /// <summary>Gets the source text which is being tokenized.</summary>
        public SourceText SourceText { get; }

        /// <summary>Tokenizes the source text.</summary>
        /// <param name="text">The source text to tokenize.</param>
        /// <returns>An immutable array of the tokens that comprise <paramref name="text" />.</returns>
        public static ImmutableArray<Token> Tokenize(string text)
        {
            var sourceText = new SourceText(text);
            return Tokenize(sourceText);
        }

        /// <summary>Tokenizes the source text.</summary>
        /// <param name="sourceText">The source text to tokenize.</param>
        /// <returns>An immutable array of the tokens that comprise <paramref name="sourceText" />.</returns>
        public static ImmutableArray<Token> Tokenize(SourceText sourceText)
        {
            var tokensBuilder = ImmutableArray.CreateBuilder<Token>();

            var lexer = new Lexer(sourceText);
            _ = lexer.MoveNext();

            do
            {
                tokensBuilder.Add(lexer.Current);
            }
            while (lexer.MoveNext());

            return tokensBuilder.ToImmutable();
        }

        /// <inheritdoc />
        public unsafe bool MoveNext()
        {
            var sourceTextIndex = _sourceTextIndex;

            if (sourceTextIndex < SourceText.Length)
            {
                TokenKind tokenKind;
                nuint sourceSpanLength = 0;

                var rune = GetRune(sourceTextIndex);
                delegate*<Rune, bool> isContinuationRune;

                if (IsIdentifierStartRune(rune))
                {
                    tokenKind = TokenKind.Identifier;
                    isContinuationRune = &IsIdentifierContinuationRune;
                }
                else if (IsIntegerRune(rune))
                {
                    tokenKind = TokenKind.Integer;
                    isContinuationRune = &IsIntegerContinuationRune;
                }
                else if (IsNewlineRune(rune))
                {
                    tokenKind = TokenKind.Newline;
                    isContinuationRune = &IsNewlineContinuationRune;
                }
                else if (IsSyntaxRune(rune))
                {
                    tokenKind = TokenKind.Syntax;
                    isContinuationRune = &IsSyntaxContinuationRune;
                }
                else if (IsWhitespaceRune(rune))
                {
                    tokenKind = TokenKind.Whitespace;
                    isContinuationRune = &IsWhitespaceContinuationRune;
                }
                else
                {
                    tokenKind = TokenKind.Unknown;
                    isContinuationRune = &IsUnknownContinuationRune;
                }

                do
                {
                    // loop until we no longer have continuation characters

                    sourceSpanLength += 1;
                    sourceTextIndex += 1;

                    rune = GetRune(sourceTextIndex);
                }
                while (isContinuationRune(rune));

                var sourceSpan = new SourceSpan(SourceText, _sourceTextIndex, sourceSpanLength);
                _currentToken = new Token(tokenKind, sourceSpan);

                _sourceTextIndex += sourceSpanLength;
                return true;
            }
            else
            {
                _currentToken = default;
                return false;
            }
        }

        /// <inheritdoc />
        public void Reset()
        {
            _currentToken = default;
            _sourceTextIndex = 0;
        }

        private static bool IsASCII_Digit(Rune rune)
            => IsInRangeInclusive(rune, '0', '9');

        private static bool IsConnector_Punctuation(UnicodeCategory unicodeCategory)
            => unicodeCategory == UnicodeCategory.ConnectorPunctuation;

        private static bool IsDecimal_Number(UnicodeCategory unicodeCategory)
            => unicodeCategory == UnicodeCategory.DecimalDigitNumber;

        private static bool IsIdentifierContinuationRune(Rune rune) => IsID_Continue(rune);

        private static bool IsIdentifierStartRune(Rune rune) => IsID_Start(rune);

        private static bool IsID_Continue(Rune rune)
        {
            var unicodeCategory = Rune.GetUnicodeCategory(rune);

            // \p{ID_Start} is manually inlined and the order is modified slightly
            // to allow for a better early exit chance based on how common each
            // unicode category is expected to be.

            var result = IsLetter(unicodeCategory)                  //  \p{ID_Start}    \p{L}
                      || IsDecimal_Number(unicodeCategory)          //  \p{Nd}
                      || IsLetter_Number(unicodeCategory)           //                  \p{Nl}
                      || IsNonspacing_Mark(unicodeCategory)         //  \p{Mn}
                      || IsSpacing_Mark(unicodeCategory)            //  \p{Mc}
                      || IsConnector_Punctuation(unicodeCategory)   //  \p{Pc}
                      || IsOther_ID_Start(rune)                     //                  \p{Other_ID_Start}
                      || IsOther_ID_Continue(rune);                 //  \p{Other_ID_Continue}

            return result
                && !IsPattern_Syntax(rune)                          // -\p{Pattern_Syntax}
                && !IsPattern_White_Space(rune);                    // -\p{Pattern_White_Space}
        }

        private static bool IsID_Start(Rune rune)
        {
            var unicodeCategory = Rune.GetUnicodeCategory(rune);
            return IsID_Start(rune, unicodeCategory);
        }

        private static bool IsID_Start(Rune rune, UnicodeCategory unicodeCategory)
        {
            var result = IsLetter(unicodeCategory)          //  \p{L}
                      || IsLetter_Number(unicodeCategory)   //  \p{Nl}
                      || IsOther_ID_Start(rune);            //  \p{Other_ID_Start}

            return result
                && !IsPattern_Syntax(rune)                  // -\p{Pattern_Syntax}
                && !IsPattern_White_Space(rune);            // -\p{Pattern_White_Space}
        }

        private static bool IsInRangeInclusive(UnicodeCategory value, UnicodeCategory lowerBound, UnicodeCategory upperBound)
            => IsInRangeInclusive((uint)(value), (uint)(lowerBound), (uint)(upperBound));

        private static bool IsInRangeInclusive(Rune value, char lowerBound, char upperBound)
            => IsInRangeInclusive((uint)(value.Value), lowerBound, upperBound);

        private static bool IsInRangeInclusive(uint value, uint lowerBound, uint upperBound)
        {
            Debug.Assert(lowerBound < upperBound);
            return unchecked((value - lowerBound) <= (upperBound - lowerBound));
        }

        private static bool IsIntegerRune(Rune rune) => IsASCII_Digit(rune);

        private static bool IsIntegerContinuationRune(Rune rune) => IsIntegerRune(rune);

        private static bool IsLetter(UnicodeCategory unicodeCategory)
            => IsInRangeInclusive(unicodeCategory, UnicodeCategory.UppercaseLetter, UnicodeCategory.OtherLetter);

        private static bool IsLetter_Number(UnicodeCategory unicodeCategory)
            => unicodeCategory == UnicodeCategory.LetterNumber;


        private static bool IsNewlineRune(Rune rune)
            => IsInRangeInclusive(rune, '\u000A', '\u000D')
            || (rune.Value == '\u0085')
            || IsInRangeInclusive(rune, '\u2028', '\u2029');

        private static bool IsNewlineContinuationRune(Rune rune) => IsNewlineRune(rune);

        private static bool IsNonspacing_Mark(UnicodeCategory unicodeCategory)
            => unicodeCategory == UnicodeCategory.NonSpacingMark;

        private static bool IsOther_ID_Continue(Rune rune)
            => (rune.Value == '\u00B7')
            || (rune.Value == '\u0387')
            || IsInRangeInclusive(rune, '\u1369', '\u1371')
            || (rune.Value == '\u19DA');

        private static bool IsOther_ID_Start(Rune rune)
            => IsInRangeInclusive(rune, '\u1885', '\u1886')
            || (rune.Value == '\u2118')
            || (rune.Value == '\u212E')
            || IsInRangeInclusive(rune, '\u309B', '\u309C');

        private static bool IsPattern_Syntax(Rune rune)
            => IsInRangeInclusive(rune, '\u0021', '\u002F')
            || IsInRangeInclusive(rune, '\u003A', '\u0040')
            || IsInRangeInclusive(rune, '\u005B', '\u005E')
            || (rune.Value == '\u0060')
            || IsInRangeInclusive(rune, '\u007B', '\u007E')
            || IsInRangeInclusive(rune, '\u00A1', '\u00A7')
            || (rune.Value == '\u00A9')
            || IsInRangeInclusive(rune, '\u00AB', '\u00AC')
            || (rune.Value == '\u00AE')
            || IsInRangeInclusive(rune, '\u00B0', '\u00B1')
            || (rune.Value == '\u00B6')
            || (rune.Value == '\u00BB')
            || (rune.Value == '\u00BF')
            || (rune.Value == '\u00D7')
            || (rune.Value == '\u00F7')
            || IsInRangeInclusive(rune, '\u2010', '\u2027')
            || IsInRangeInclusive(rune, '\u2030', '\u203E')
            || IsInRangeInclusive(rune, '\u2041', '\u2053')
            || IsInRangeInclusive(rune, '\u2055', '\u205E')
            || IsInRangeInclusive(rune, '\u2190', '\u245F')
            || IsInRangeInclusive(rune, '\u2500', '\u2775')
            || IsInRangeInclusive(rune, '\u2794', '\u2BFF')
            || IsInRangeInclusive(rune, '\u2E00', '\u2E7F')
            || IsInRangeInclusive(rune, '\u3001', '\u3003')
            || IsInRangeInclusive(rune, '\u3008', '\u3020')
            || (rune.Value == '\u3030')
            || IsInRangeInclusive(rune, '\uFD3E', '\uFD3F')
            || IsInRangeInclusive(rune, '\uFE45', '\uFE46');

        private static bool IsPattern_White_Space(Rune rune)
            => IsInRangeInclusive(rune, '\u0009', '\u000D')
            || (rune.Value == '\u0020')
            || (rune.Value == '\u0085')
            || IsInRangeInclusive(rune, '\u200E', '\u200F')
            || IsInRangeInclusive(rune, '\u2028', '\u2029');

        private static bool IsSpace_Separator(UnicodeCategory unicodeCategory)
            => unicodeCategory == UnicodeCategory.SpaceSeparator;

        private static bool IsSpacing_Mark(UnicodeCategory unicodeCategory)
            => unicodeCategory == UnicodeCategory.SpacingCombiningMark;

        private static bool IsSyntaxRune(Rune rune) => IsPattern_Syntax(rune);

        private static bool IsSyntaxContinuationRune(Rune rune) => false;

        private static bool IsWhitespaceRune(Rune rune)
        {
            var unicodeCategory = Rune.GetUnicodeCategory(rune);

            // This would normally be something like:
            //    \p{Zs}
            //    \p{Pattern_White_Space}
            //    \p{White_Space}
            //
            // However, Pattern_White_Space and White_Space have a lot
            // of overlap, additionally they include various newline
            // characters that we don't want included and Zs covers
            // basically everything else, so we simplify the logic here
            // instead.

            return IsSpace_Separator(unicodeCategory)
                || (rune.Value == '\u0009')
                || IsInRangeInclusive(rune, '\u200E', '\u200F');
        }

        private static bool IsWhitespaceContinuationRune(Rune rune) => IsWhitespaceRune(rune);

        private static bool IsUnknownContinuationRune(Rune rune) => false;

        private Rune GetRune(nuint index) => (index < SourceText.Length) ? SourceText[index] : default;

        void IDisposable.Dispose() { }

        object IEnumerator.Current => Current;
    }
}

## SourceSpan.cs
// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

using System;
using System.Text;

namespace TerraFX.CodeAnalysis.Source
{
    /// <summary>Defines a span within a source.</summary>
    public readonly struct SourceSpan
    {
        /// <summary>Initializes a new instance of the <see cref="SourceSpan" /> struct.</summary>
        /// <param name="sourceText">The source text for the span.</param>
        /// <param name="start">The start of the span, in runes.</param>
        /// <param name="length">The length of the span, in runes.</param>
        public SourceSpan(SourceText sourceText, nuint start, nuint length)
        {
            SourceText = sourceText;
            Start = start;
            Length = length;
        }

        /// <summary>Gets the length of the span, in runes.</summary>
        public nuint Length { get; }

        /// <summary>Gets the source text for the span.</summary>
        public SourceText SourceText { get; }

        /// <summary>Gets the start of the span, in runes.</summary>
        public nuint Start { get; }

        /// <summary>Gets the rune at the specified index.</summary>
        /// <param name="index">The index of the rune to get.</param>
        /// <returns>The rune at the specified index.</returns>
        public Rune this[nuint index] => SourceText[Start + index];

        /// <inheritdoc />
        public override string ToString()
            => SourceText.Value.AsSpan((int)Start, (int)Length).ToString();
    }
}

## SourceText.cs
// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

using System;
using System.Linq;
using System.Text;

namespace TerraFX.CodeAnalysis.Source
{
    /// <summary>Defines the text for a source.</summary>
    public readonly struct SourceText
    {
        private readonly Rune[] _runes;
        private readonly string _value;

        /// <summary>Initializes a new instance of the <see cref="SourceText" /> class.</summary>
        /// <param name="value">The string used to populate the source text.</param>
        public SourceText(string value)
        {
            value ??= string.Empty;

            _runes = value.EnumerateRunes().ToArray() ?? Array.Empty<Rune>();
            _value = value;
        }

        /// <summary>Gets the length of the source text, in runes.</summary>
        public nuint Length => (nuint)(_runes.Length);

        /// <summary>The string used to populate the source text.</summary>
        internal string Value => _value;

        /// <summary>Gets the rune at the specified index.</summary>
        /// <param name="index">The index of the rune to get.</param>
        /// <returns>The rune at the specified index.</returns>
        public Rune this[nuint index] => _runes[index];

        /// <inheritdoc />
        public override string ToString() => _value;
    }
}

## Token.cs
// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

using System.Text;
using TerraFX.CodeAnalysis.Source;

namespace TerraFX.CodeAnalysis.Tokens
{
    /// <summary>Defines a token.</summary>
    public readonly struct Token
    {
        /// <summary>Initializes a new instance of the <see cref="Token" /> struct.</summary>
        /// <param name="kind">The kind of the token.</param>
        /// <param name="sourceSpan">The source span for the token.</param>
        public Token(TokenKind kind, SourceSpan sourceSpan)
        {
            Kind = kind;
            SourceSpan = sourceSpan;
        }

        /// <summary>Gets <c>true</c> if the token is an identifier; otherwise, <c>false</c>.</summary>
        public bool IsIdentifier => Kind == TokenKind.Identifier;

        /// <summary>Gets <c>true</c> if the token is an integer; otherwise, <c>false</c>.</summary>
        public bool IsInteger => Kind == TokenKind.Integer;

        /// <summary>Gets <c>true</c> if the token is a newline; otherwise, <c>false</c>.</summary>
        public bool IsNewline => Kind == TokenKind.Newline;

        /// <summary>Gets <c>true</c> if the token is syntax; otherwise, <c>false</c>.</summary>
        public bool IsSyntax => Kind == TokenKind.Syntax;

        /// <summary>Gets <c>true</c> if the token is whitespace; otherwise, <c>false</c>.</summary>
        public bool IsWhitespace => Kind == TokenKind.Whitespace;

        /// <summary>Gets the kind of the token.</summary>
        public TokenKind Kind { get; }

        /// <summary>Gets the source span for the token.</summary>
        public SourceSpan SourceSpan { get; }

        /// <summary>Gets the rune at the specified index.</summary>
        /// <param name="index">The index of the rune to get.</param>
        /// <returns>The rune at the specified index.</returns>
        public Rune this[nuint index] => SourceSpan[index];

        /// <inheritdoc />
        public override string ToString() => $"{Kind}: {SourceSpan}";
    }
}

## TokenKind.cs
// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

namespace TerraFX.CodeAnalysis.Tokens
{
    /// <summary>Defines the kind of a token.</summary>
    public enum TokenKind : uint
    {
        /// <summary>An unknown token.</summary>
        Unknown = 0,

        /// <summary>An identifier token.</summary>
        Identifier,

        /// <summary>An integer token.</summary>
        Integer,

        /// <summary>A newline token.</summary>
        Newline,

        /// <summary>A syntax token.</summary>
        Syntax,

        /// <summary>A whitespace token.</summary>
        Whitespace,
    }
}
	// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

	using System;
	using System.Collections;
	using System.Collections.Generic;
	using System.Collections.Immutable;
	using System.Diagnostics;
	using System.Globalization;
	using System.Text;
	using TerraFX.CodeAnalysis.Source;
	using TerraFX.CodeAnalysis.Tokens;

	namespace TerraFX.CodeAnalysis
	{
	/// <summary>Defines a lexical analyzer which tokenizes a source text.</summary>
	public sealed partial class Lexer : IEnumerator<Token>
	{
	private Token _currentToken;
	private nuint _sourceTextIndex;

	private Lexer(SourceText sourceText)
	{
	SourceText = sourceText;
	Reset();
	}

	/// <inheritdoc />
	public Token Current => _currentToken;

	/// <summary>Gets the source text which is being tokenized.</summary>
	public SourceText SourceText { get; }

	/// <summary>Tokenizes the source text.</summary>
	/// <param name="text">The source text to tokenize.</param>
	/// <returns>An immutable array of the tokens that comprise <paramref name="text" />.</returns>
	public static ImmutableArray<Token> Tokenize(string text)
	{
	var sourceText = new SourceText(text);
	return Tokenize(sourceText);
	}

	/// <summary>Tokenizes the source text.</summary>
	/// <param name="sourceText">The source text to tokenize.</param>
	/// <returns>An immutable array of the tokens that comprise <paramref name="sourceText" />.</returns>
	public static ImmutableArray<Token> Tokenize(SourceText sourceText)
	{
	var tokensBuilder = ImmutableArray.CreateBuilder<Token>();

	var lexer = new Lexer(sourceText);
	_ = lexer.MoveNext();

	do
	{
	tokensBuilder.Add(lexer.Current);
	}
	while (lexer.MoveNext());

	return tokensBuilder.ToImmutable();
	}

	/// <inheritdoc />
	public unsafe bool MoveNext()
	{
	var sourceTextIndex = _sourceTextIndex;

	if (sourceTextIndex < SourceText.Length)
	{
	TokenKind tokenKind;
	nuint sourceSpanLength = 0;

	var rune = GetRune(sourceTextIndex);
	delegate*<Rune, bool> isContinuationRune;

	if (IsIdentifierStartRune(rune))
	{
	tokenKind = TokenKind.Identifier;
	isContinuationRune = &IsIdentifierContinuationRune;
	}
	else if (IsIntegerRune(rune))
	{
	tokenKind = TokenKind.Integer;
	isContinuationRune = &IsIntegerContinuationRune;
	}
	else if (IsNewlineRune(rune))
	{
	tokenKind = TokenKind.Newline;
	isContinuationRune = &IsNewlineContinuationRune;
	}
	else if (IsSyntaxRune(rune))
	{
	tokenKind = TokenKind.Syntax;
	isContinuationRune = &IsSyntaxContinuationRune;
	}
	else if (IsWhitespaceRune(rune))
	{
	tokenKind = TokenKind.Whitespace;
	isContinuationRune = &IsWhitespaceContinuationRune;
	}
	else
	{
	tokenKind = TokenKind.Unknown;
	isContinuationRune = &IsUnknownContinuationRune;
	}

	do
	{
	// loop until we no longer have continuation characters

	sourceSpanLength += 1;
	sourceTextIndex += 1;

	rune = GetRune(sourceTextIndex);
	}
	while (isContinuationRune(rune));

	var sourceSpan = new SourceSpan(SourceText, _sourceTextIndex, sourceSpanLength);
	_currentToken = new Token(tokenKind, sourceSpan);

	_sourceTextIndex += sourceSpanLength;
	return true;
	}
	else
	{
	_currentToken = default;
	return false;
	}
	}

	/// <inheritdoc />
	public void Reset()
	{
	_currentToken = default;
	_sourceTextIndex = 0;
	}

	private static bool IsASCII_Digit(Rune rune)
	=> IsInRangeInclusive(rune, '0', '9');

	private static bool IsConnector_Punctuation(UnicodeCategory unicodeCategory)
	=> unicodeCategory == UnicodeCategory.ConnectorPunctuation;

	private static bool IsDecimal_Number(UnicodeCategory unicodeCategory)
	=> unicodeCategory == UnicodeCategory.DecimalDigitNumber;

	private static bool IsIdentifierContinuationRune(Rune rune) => IsID_Continue(rune);

	private static bool IsIdentifierStartRune(Rune rune) => IsID_Start(rune);

	private static bool IsID_Continue(Rune rune)
	{
	var unicodeCategory = Rune.GetUnicodeCategory(rune);

	// \p{ID_Start} is manually inlined and the order is modified slightly
	// to allow for a better early exit chance based on how common each
	// unicode category is expected to be.

	var result = IsLetter(unicodeCategory) // \p{ID_Start} \p{L}
	\|\| IsDecimal_Number(unicodeCategory) // \p{Nd}
	\|\| IsLetter_Number(unicodeCategory) // \p{Nl}
	\|\| IsNonspacing_Mark(unicodeCategory) // \p{Mn}
	\|\| IsSpacing_Mark(unicodeCategory) // \p{Mc}
	\|\| IsConnector_Punctuation(unicodeCategory) // \p{Pc}
	\|\| IsOther_ID_Start(rune) // \p{Other_ID_Start}
	\|\| IsOther_ID_Continue(rune); // \p{Other_ID_Continue}

	return result
	&& !IsPattern_Syntax(rune) // -\p{Pattern_Syntax}
	&& !IsPattern_White_Space(rune); // -\p{Pattern_White_Space}
	}

	private static bool IsID_Start(Rune rune)
	{
	var unicodeCategory = Rune.GetUnicodeCategory(rune);
	return IsID_Start(rune, unicodeCategory);
	}

	private static bool IsID_Start(Rune rune, UnicodeCategory unicodeCategory)
	{
	var result = IsLetter(unicodeCategory) // \p{L}
	\|\| IsLetter_Number(unicodeCategory) // \p{Nl}
	\|\| IsOther_ID_Start(rune); // \p{Other_ID_Start}

	return result
	&& !IsPattern_Syntax(rune) // -\p{Pattern_Syntax}
	&& !IsPattern_White_Space(rune); // -\p{Pattern_White_Space}
	}

	private static bool IsInRangeInclusive(UnicodeCategory value, UnicodeCategory lowerBound, UnicodeCategory upperBound)
	=> IsInRangeInclusive((uint)(value), (uint)(lowerBound), (uint)(upperBound));

	private static bool IsInRangeInclusive(Rune value, char lowerBound, char upperBound)
	=> IsInRangeInclusive((uint)(value.Value), lowerBound, upperBound);

	private static bool IsInRangeInclusive(uint value, uint lowerBound, uint upperBound)
	{
	Debug.Assert(lowerBound < upperBound);
	return unchecked((value - lowerBound) <= (upperBound - lowerBound));
	}

	private static bool IsIntegerRune(Rune rune) => IsASCII_Digit(rune);

	private static bool IsIntegerContinuationRune(Rune rune) => IsIntegerRune(rune);

	private static bool IsLetter(UnicodeCategory unicodeCategory)
	=> IsInRangeInclusive(unicodeCategory, UnicodeCategory.UppercaseLetter, UnicodeCategory.OtherLetter);

	private static bool IsLetter_Number(UnicodeCategory unicodeCategory)
	=> unicodeCategory == UnicodeCategory.LetterNumber;


	private static bool IsNewlineRune(Rune rune)
	=> IsInRangeInclusive(rune, '\u000A', '\u000D')
	\|\| (rune.Value == '\u0085')
	\|\| IsInRangeInclusive(rune, '\u2028', '\u2029');

	private static bool IsNewlineContinuationRune(Rune rune) => IsNewlineRune(rune);

	private static bool IsNonspacing_Mark(UnicodeCategory unicodeCategory)
	=> unicodeCategory == UnicodeCategory.NonSpacingMark;

	private static bool IsOther_ID_Continue(Rune rune)
	=> (rune.Value == '\u00B7')
	\|\| (rune.Value == '\u0387')
	\|\| IsInRangeInclusive(rune, '\u1369', '\u1371')
	\|\| (rune.Value == '\u19DA');

	private static bool IsOther_ID_Start(Rune rune)
	=> IsInRangeInclusive(rune, '\u1885', '\u1886')
	\|\| (rune.Value == '\u2118')
	\|\| (rune.Value == '\u212E')
	\|\| IsInRangeInclusive(rune, '\u309B', '\u309C');

	private static bool IsPattern_Syntax(Rune rune)
	=> IsInRangeInclusive(rune, '\u0021', '\u002F')
	\|\| IsInRangeInclusive(rune, '\u003A', '\u0040')
	\|\| IsInRangeInclusive(rune, '\u005B', '\u005E')
	\|\| (rune.Value == '\u0060')
	\|\| IsInRangeInclusive(rune, '\u007B', '\u007E')
	\|\| IsInRangeInclusive(rune, '\u00A1', '\u00A7')
	\|\| (rune.Value == '\u00A9')
	\|\| IsInRangeInclusive(rune, '\u00AB', '\u00AC')
	\|\| (rune.Value == '\u00AE')
	\|\| IsInRangeInclusive(rune, '\u00B0', '\u00B1')
	\|\| (rune.Value == '\u00B6')
	\|\| (rune.Value == '\u00BB')
	\|\| (rune.Value == '\u00BF')
	\|\| (rune.Value == '\u00D7')
	\|\| (rune.Value == '\u00F7')
	\|\| IsInRangeInclusive(rune, '\u2010', '\u2027')
	\|\| IsInRangeInclusive(rune, '\u2030', '\u203E')
	\|\| IsInRangeInclusive(rune, '\u2041', '\u2053')
	\|\| IsInRangeInclusive(rune, '\u2055', '\u205E')
	\|\| IsInRangeInclusive(rune, '\u2190', '\u245F')
	\|\| IsInRangeInclusive(rune, '\u2500', '\u2775')
	\|\| IsInRangeInclusive(rune, '\u2794', '\u2BFF')
	\|\| IsInRangeInclusive(rune, '\u2E00', '\u2E7F')
	\|\| IsInRangeInclusive(rune, '\u3001', '\u3003')
	\|\| IsInRangeInclusive(rune, '\u3008', '\u3020')
	\|\| (rune.Value == '\u3030')
	\|\| IsInRangeInclusive(rune, '\uFD3E', '\uFD3F')
	\|\| IsInRangeInclusive(rune, '\uFE45', '\uFE46');

	private static bool IsPattern_White_Space(Rune rune)
	=> IsInRangeInclusive(rune, '\u0009', '\u000D')
	\|\| (rune.Value == '\u0020')
	\|\| (rune.Value == '\u0085')
	\|\| IsInRangeInclusive(rune, '\u200E', '\u200F')
	\|\| IsInRangeInclusive(rune, '\u2028', '\u2029');

	private static bool IsSpace_Separator(UnicodeCategory unicodeCategory)
	=> unicodeCategory == UnicodeCategory.SpaceSeparator;

	private static bool IsSpacing_Mark(UnicodeCategory unicodeCategory)
	=> unicodeCategory == UnicodeCategory.SpacingCombiningMark;

	private static bool IsSyntaxRune(Rune rune) => IsPattern_Syntax(rune);

	private static bool IsSyntaxContinuationRune(Rune rune) => false;

	private static bool IsWhitespaceRune(Rune rune)
	{
	var unicodeCategory = Rune.GetUnicodeCategory(rune);

	// This would normally be something like:
	// \p{Zs}
	// \p{Pattern_White_Space}
	// \p{White_Space}
	//
	// However, Pattern_White_Space and White_Space have a lot
	// of overlap, additionally they include various newline
	// characters that we don't want included and Zs covers
	// basically everything else, so we simplify the logic here
	// instead.

	return IsSpace_Separator(unicodeCategory)
	\|\| (rune.Value == '\u0009')
	\|\| IsInRangeInclusive(rune, '\u200E', '\u200F');
	}

	private static bool IsWhitespaceContinuationRune(Rune rune) => IsWhitespaceRune(rune);

	private static bool IsUnknownContinuationRune(Rune rune) => false;

	private Rune GetRune(nuint index) => (index < SourceText.Length) ? SourceText[index] : default;

	void IDisposable.Dispose() { }

	object IEnumerator.Current => Current;
	}
	}
	// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

	using System;
	using System.Linq;
	using System.Text;

	namespace TerraFX.CodeAnalysis.Source
	{
	/// <summary>Defines the text for a source.</summary>
	public readonly struct SourceText
	{
	private readonly Rune[] _runes;
	private readonly string _value;

	/// <summary>Initializes a new instance of the <see cref="SourceText" /> class.</summary>
	/// <param name="value">The string used to populate the source text.</param>
	public SourceText(string value)
	{
	value ??= string.Empty;

	_runes = value.EnumerateRunes().ToArray() ?? Array.Empty<Rune>();
	_value = value;
	}

	/// <summary>Gets the length of the source text, in runes.</summary>
	public nuint Length => (nuint)(_runes.Length);

	/// <summary>The string used to populate the source text.</summary>
	internal string Value => _value;

	/// <summary>Gets the rune at the specified index.</summary>
	/// <param name="index">The index of the rune to get.</param>
	/// <returns>The rune at the specified index.</returns>
	public Rune this[nuint index] => _runes[index];

	/// <inheritdoc />
	public override string ToString() => _value;
	}
	}
	// Copyright © Tanner Gooding and Contributors. Licensed under the MIT License (MIT). See License.md in the repository root for more information.

	namespace TerraFX.CodeAnalysis.Tokens
	{
	/// <summary>Defines the kind of a token.</summary>
	public enum TokenKind : uint
	{
	/// <summary>An unknown token.</summary>
	Unknown = 0,

	/// <summary>An identifier token.</summary>
	Identifier,

	/// <summary>An integer token.</summary>
	Integer,

	/// <summary>A newline token.</summary>
	Newline,

	/// <summary>A syntax token.</summary>
	Syntax,

	/// <summary>A whitespace token.</summary>
	Whitespace,
	}
	}