Skip to content

Instantly share code, notes, and snippets.

@FabienDehopre
Last active January 27, 2021 08:23
Show Gist options
  • Save FabienDehopre/5245476 to your computer and use it in GitHub Desktop.
Save FabienDehopre/5245476 to your computer and use it in GitHub Desktop.
Validate C# identifier name
using System;
using System.Linq;
using System.Text.RegularExpressions;
public static class IdentifierExtensions
{
// definition of a valid C# identifier: http://msdn.microsoft.com/en-us/library/aa664670(v=vs.71).aspx
private const string FORMATTING_CHARACTER = @"\p{Cf}";
private const string CONNECTING_CHARACTER = @"\p{Pc}";
private const string DECIMAL_DIGIT_CHARACTER = @"\p{Nd}";
private const string COMBINING_CHARACTER = @"\p{Mn}|\p{Mc}";
private const string LETTER_CHARACTER = @"\p{Lu}|\p{Ll}|\p{Lt}|\p{Lm}|\p{Lo}|\p{Nl}";
private const string IDENTIFIER_PART_CHARACTER = LETTER_CHARACTER + "|" +
DECIMAL_DIGIT_CHARACTER + "|" +
CONNECTING_CHARACTER + "|" +
COMBINING_CHARACTER + "|" +
FORMATTING_CHARACTER;
private const string IDENTIFIER_PART_CHARACTERS = "(" + IDENTIFIER_PART_CHARACTER + ")+";
private const string IDENTIFIER_START_CHARACTER = "(" + LETTER_CHARACTER + "|_)";
private const string IDENTIFIER_OR_KEYWORD = IDENTIFIER_START_CHARACTER + "(" +
IDENTIFIER_PART_CHARACTERS + ")*";
// C# keywords: http://msdn.microsoft.com/en-us/library/x53a06bb(v=vs.71).aspx
private static readonly HashSet<string> _keywords = new HashSet<string>
{
"__arglist", "__makeref", "__reftype", "__refvalue",
"abstract", "as", "base", "bool",
"break", "byte", "case", "catch",
"char", "checked", "class", "const",
"continue", "decimal", "default", "delegate",
"do", "double", "else", "enum",
"event", "explicit", "extern", "false",
"finally", "fixed", "float", "for",
"foreach", "goto", "if", "implicit",
"in", "int", "interface", "internal",
"is", "lock", "long", "namespace",
"new", "null", "object", "operator",
"out", "override", "params", "private",
"protected", "public", "readonly", "ref",
"return", "sbyte", "sealed", "short",
"sizeof", "stackalloc", "static", "string",
"struct", "switch", "this", "throw",
"true", "try", "typeof", "uint",
"ulong", "unchecked", "unsafe", "ushort",
"using", "virtual", "volatile", "void",
"while"
};
private static readonly Regex _validIdentifierRegex = new Regex("^" + IDENTIFIER_OR_KEYWORD + "$", RegexOptions.Compiled);
public static bool IsValidIdentifier(this string identifier)
{
if (String.IsNullOrWhiteSpace(identifier)) return false;
var normalizedIdentifier = identifier.Normalize();
// 1. check that the identifier match the validIdentifer regex and it's not a C# keyword
if (_validIdentifierRegex.IsMatch(normalizedIdentifier) && !_keywords.Contains(normalizedIdentifier))
{
return true;
}
// 2. check if the identifier starts with @
if (normalizedIdentifier.StartsWith("@") && _validIdentifierRegex.IsMatch(normalizedIdentifier.Substring(1)))
{
return true;
}
// 3. it's not a valid identifier
return false;
}
}
@FabienDehopre
Copy link
Author

License?

It's free to use.

@SolidAlloy
Copy link

Please replace "unchekeced" with "unchecked" in the keywords array.

@FabienDehopre
Copy link
Author

@SolidAlloy I fixed the typo you reported. Thanks

@bpierson
Copy link

You could probably make this faster by replacing the string[] _keywords with HashSet<string> _keywords. In line 60 where you are using Contains(...), you are "secretly" using LINQ, which is secretly downcasting your string[] to IEnumerable<string>. A good optimization for things that should run fast is to see if you can get rid of using System.Linq at the top of your code.

@bpierson
Copy link

Here is a test that highlights the difference. It can be quite dramatic with larger inputs:

public class LINQ_VS_HashSet
{
	private static readonly string[] _array = new[]
	{
		"abstract",  "event",      "new",        "struct",
		"as",        "explicit",   "null",       "switch",
		"base",      "extern",     "object",     "this",
		"bool",      "false",      "operator",   "throw",
		"break",     "finally",    "out",        "true",
		"byte",      "fixed",      "override",   "try",
		"case",      "float",      "params",     "typeof",
		"catch",     "for",        "private",    "uint",
		"char",      "foreach",    "protected",  "ulong",
		"checked",   "goto",       "public",     "unchecked",
		"class",     "if",         "readonly",   "unsafe",
		"const",     "implicit",   "ref",        "ushort",
		"continue",  "in",         "return",     "using",
		"decimal",   "int",        "sbyte",      "virtual",
		"default",   "interface",  "sealed",     "volatile",
		"delegate",  "internal",   "short",      "void",
		"do",        "is",         "sizeof",     "while",
		"double",    "lock",       "stackalloc",
		"else",      "long",       "static",
		"enum",      "namespace",  "string"
	};

	private static readonly HashSet<string> _hashset = new HashSet<string>
	{
		"abstract",  "event",      "new",        "struct",
		"as",        "explicit",   "null",       "switch",
		"base",      "extern",     "object",     "this",
		"bool",      "false",      "operator",   "throw",
		"break",     "finally",    "out",        "true",
		"byte",      "fixed",      "override",   "try",
		"case",      "float",      "params",     "typeof",
		"catch",     "for",        "private",    "uint",
		"char",      "foreach",    "protected",  "ulong",
		"checked",   "goto",       "public",     "unchecked",
		"class",     "if",         "readonly",   "unsafe",
		"const",     "implicit",   "ref",        "ushort",
		"continue",  "in",         "return",     "using",
		"decimal",   "int",        "sbyte",      "virtual",
		"default",   "interface",  "sealed",     "volatile",
		"delegate",  "internal",   "short",      "void",
		"do",        "is",         "sizeof",     "while",
		"double",    "lock",       "stackalloc",
		"else",      "long",       "static",
		"enum",      "namespace",  "string"
	};

	public static void Test()
	{
		const string tester = "This is a short test string that will show the typeof behavior expected";
		const int iterations = 500000;
		var testArray = tester.Split( ' ' );
		var arrayTimer = new Stopwatch();
		var hashsetTimer = new Stopwatch();
		var arrayMatches = 0;
		var hashSetMatches = 0;
		//
		// Warmup
		//
		for( int i = 0; i < 100; i++ )
		{
			//
			// HashSet
			//
			for( int j = 0; j < testArray.Length; j++ )
			{
				var word = testArray[j];
				_ = _hashset.Contains( word );
			}
			//
			// Array
			//
			for( int j = 0; j < testArray.Length; j++ )
			{
				var word = testArray[j];
				_ = _array.Contains( word );
			}
		}
		//
		// Test
		//
		//
		// Array
		//
		arrayTimer.Start();
		for( int i = 0; i < iterations; i++ )
		{
			for( int j = 0; j < testArray.Length; j++ )
			{
				var word = testArray[j];
				if( _array.Contains( word ) )
				{
					arrayMatches++;
				}
			}
		}
		arrayTimer.Stop();
		//
		// HashSet
		//
		hashsetTimer.Start();
		for( int i = 0; i < iterations; i++ )
		{
			for( int j = 0; j < testArray.Length; j++ )
			{
				var word = testArray[j];
				if( _hashset.Contains( word ) )
				{
					hashSetMatches++;
				}
			}
		}
		hashsetTimer.Stop();

		Console.WriteLine( $"Array: {iterations} iterations, {arrayMatches} matches, {arrayTimer.ElapsedMilliseconds} ms" );
		Console.WriteLine( $"HashSet: {iterations} iterations, {hashSetMatches} matches, {hashsetTimer.ElapsedMilliseconds} ms" );
	}
}

This gives the following results on my machine:

Array: 500000 iterations, 2000000 matches, 1528 ms
HashSet: 500000 iterations, 2000000 matches, 104 ms
Press any key to continue . . .

@FabienDehopre
Copy link
Author

Thanks @bpierson. I've update the gist.
Of course, the list of keywords should also be updated but I've currently not the free time to do it.

@piotrstenke
Copy link

Your hashset is missing four keywords: __arglist, __makeref, __reftype and __refvalue. Though they are not mentioned in the documentation, they exist and are not valid identifiers.

@FabienDehopre
Copy link
Author

Thanks @piotrstenke. I've updated the list of identifiers.
I also took the opportunity to rearrange the list of identifiers and to format the code using the dotnet-format tool.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment