Skip to content

Instantly share code, notes, and snippets.

@MihaZupan
Created September 3, 2019 18:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MihaZupan/2240e24ff571ec7bdbed1cd9e9ab2bcc to your computer and use it in GitHub Desktop.
Save MihaZupan/2240e24ff571ec7bdbed1cd9e9ab2bcc to your computer and use it in GitHub Desktop.
public static class CharHelper
{
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsRomanLetterPartial(char c)
{
// We don't support LCDM
/* return IsRomanLetterLowerPartial(c) || IsRomanLetterUpperPartial(c); */
int testValue = c - 73;
if ((uint)testValue > 47)
return false;
return ((175926155452417L >> testValue) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsRomanLetterLowerPartial(char c)
{
/* // We don't support LCDM
return c == 'i' || c == 'v' || c == 'x'; */
int testValue = c - 105;
if ((uint)testValue > 15)
return false;
return ((40961 >> testValue) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsRomanLetterUpperPartial(char c)
{
/* // We don't support LCDM
return c == 'I' || c == 'V' || c == 'X'; */
int testValue = c - 73;
if ((uint)testValue > 15)
return false;
return ((40961 >> testValue) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsWhitespace(this char c)
{
// 2.1 Characters and lines
// A whitespace character is a space(U + 0020), tab(U + 0009), newline(U + 000A), line tabulation (U + 000B), form feed (U + 000C), or carriage return (U + 000D).
/* return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; */
if (c > 32)
return false;
return ((4294983168L >> c) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsControl(this char c)
{
return c < ' ' || char.IsControl(c);
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsEscapableSymbol(this char c)
{
/* // char.IsSymbol also works with Unicode symbols that cannot be escaped based on the specification.
return (c > ' ' && c < '0') || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z' && c < 127) || c == '•'; */
int testValue = c;
if (testValue > 126)
return c == '•';
return testValue < 64 ? ((-287948909764935680L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsWhiteSpaceOrZero(this char c)
{
/* return IsWhitespace(c) || IsZero(c); */
if (c > 32)
return false;
return ((4294983169L >> c) & 1) != 0;
}
// Note that we are not considering the character & as a punctuation in HTML
// as it is used for HTML entities, print unicode, so we assume that when we have a `&`
// it is more likely followed by a valid HTML Entity that represents a non punctuation
public static void CheckUnicodeCategory(this char c, out bool space, out bool punctuation)
{
// Credits: code from CommonMark.NET
// Copyright (c) 2014, Kārlis Gaņģis All rights reserved.
// See license for details: https://github.com/Knagis/CommonMark.NET/blob/master/LICENSE.md
if (c <= 'ÿ')
{
// space = c == '\0' || c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085';
// punctuation = c == '\0' || (c >= 33 && c <= 47 && c != 38) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126);
int testValue = c;
if (testValue > 126)
{
space = c == '\u00a0' || c == '\u0085';
punctuation = false;
}
else
{
space = testValue < 33 && ((4294983169L >> testValue) & 1) != 0;
punctuation = testValue < 64 ? ((-287949184642842623L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0;
}
}
else
{
var category = CharUnicodeInfo.GetUnicodeCategory(c);
space = category == UnicodeCategory.SpaceSeparator
|| category == UnicodeCategory.LineSeparator
|| category == UnicodeCategory.ParagraphSeparator;
punctuation = !space &&
(category == UnicodeCategory.ConnectorPunctuation
|| category == UnicodeCategory.DashPunctuation
|| category == UnicodeCategory.OpenPunctuation
|| category == UnicodeCategory.ClosePunctuation
|| category == UnicodeCategory.InitialQuotePunctuation
|| category == UnicodeCategory.FinalQuotePunctuation
|| category == UnicodeCategory.OtherPunctuation);
}
}
// Same as CheckUnicodeCategory
internal static bool IsSpaceOrPunctuation(this char c)
{
if (c <= 'ÿ')
{
/* return c == '\0' || c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085' ||
(c >= 33 && c <= 47 && c != 38) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126); */
int testValue = c;
if (testValue > 126)
return c == '\u00a0' || c == '\u0085';
return testValue < 64 ? ((-287949180347859455L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0;
}
else
{
var category = CharUnicodeInfo.GetUnicodeCategory(c);
return category == UnicodeCategory.SpaceSeparator
|| category == UnicodeCategory.LineSeparator
|| category == UnicodeCategory.ParagraphSeparator
|| category == UnicodeCategory.ConnectorPunctuation
|| category == UnicodeCategory.DashPunctuation
|| category == UnicodeCategory.OpenPunctuation
|| category == UnicodeCategory.ClosePunctuation
|| category == UnicodeCategory.InitialQuotePunctuation
|| category == UnicodeCategory.FinalQuotePunctuation
|| category == UnicodeCategory.OtherPunctuation;
}
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsAlpha(this char c)
{
/* return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); */
int testValue = c - 65;
if ((uint)testValue > 57)
return false;
return ((288230371923853311L >> testValue) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsAlphaNumeric(this char c)
{
/* return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); */
int testValue = c;
if (testValue > 122)
return false;
return testValue < 64 ? ((287948901175001088L >> testValue) & 1) != 0 : ((576460743847706622L >> (testValue - 64)) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsAsciiPunctuation(this char c)
{
/* // 2.1 Characters and lines
// An ASCII punctuation character is !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~.
switch (c)
{
case '!':
case '"':
case '#':
case '$':
case '%':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case '-':
case '.':
case '/':
case ':':
case ';':
case '<':
case '=':
case '>':
case '?':
case '@':
case '[':
case '\\':
case ']':
case '^':
case '_':
case '`':
case '{':
case '|':
case '}':
case '~':
return true;
}
return false; */
int testValue = c;
if (testValue > 126)
return false;
return testValue < 64 ? ((-287948909764935680L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0;
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public static bool IsEmailUsernameSpecialChar(char c)
{
/* return ".!#$%&'*+/=?^_`{|}~-+.~".IndexOf(c) >= 0; */
int testValue = c;
if (testValue > 126)
return false;
return testValue < 64 ? ((-6917268469155102720L >> testValue) & 1) != 0 : ((8646911292067545088L >> (testValue - 64)) & 1) != 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment