Created
June 19, 2011 22:41
-
-
Save praeclarum/1034858 to your computer and use it in GitHub Desktop.
Separates an identifier into its constituent parts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using NUnit.Framework; | |
namespace DevSearchTest | |
{ | |
class Identifier | |
{ | |
public static string[] Separate(string rawId) | |
{ | |
var id = (rawId ?? "").Trim (); | |
if (id.Length == 0) return new string[0]; | |
var idLower = id.ToLowerInvariant (); | |
var parts = new List<string> (); | |
var lastChar = ' '; | |
var wordStart = 0; | |
for (var i = 0; i < id.Length; i++) { | |
var ch = id [i]; | |
var isSep = (ch=='_')||(ch=='-')||(ch==' ')||(ch=='\t'); | |
var newWord = isSep; | |
if (!newWord && i > 0) { | |
newWord = char.IsLower (lastChar) && char.IsUpper (ch); | |
if (!newWord && char.IsUpper (lastChar) && char.IsUpper (ch) && (i + 1) < id.Length && char.IsLower (id[i+1])) { | |
newWord = true; | |
} | |
} | |
if (newWord) { | |
if ((i - wordStart) > 0) { | |
parts.Add (string.Intern (idLower.Substring (wordStart, i - wordStart))); | |
} | |
wordStart = isSep ? i + 1 : i; | |
} | |
lastChar = ch; | |
} | |
if (wordStart < id.Length) { | |
parts.Add (string.Intern (idLower.Substring (wordStart))); | |
} | |
return parts.ToArray (); | |
} | |
} | |
[TestFixture] | |
public class Test | |
{ | |
[Test] | |
public void Empty () | |
{ | |
Assert.AreEqual (new string[0], Identifier.Separate ("")); | |
Assert.AreEqual (new string[0], Identifier.Separate (null)); | |
Assert.AreEqual (new string[0], Identifier.Separate (" ")); | |
Assert.AreEqual (new string[0], Identifier.Separate (" \r\n \t ")); | |
} | |
[Test] | |
public void SingleWord () | |
{ | |
Assert.AreEqual (new string[]{"j35t3r"}, Identifier.Separate ("j35t3r")); | |
Assert.AreEqual (new string[]{"j35t3r"}, Identifier.Separate ("J35T3R")); | |
Assert.AreEqual (new string[]{"j35t3r"}, Identifier.Separate ("J35t3r")); | |
} | |
[Test] | |
public void Underscores () | |
{ | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("this_is_long")); | |
Assert.AreEqual (new string[]{"this","is","long","m"}, Identifier.Separate ("this_is_long_m")); | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("___this_is_long___")); | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_this_is_long_")); | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("THIS_IS_Long")); | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_THIS_IS_Long")); | |
Assert.AreEqual (new string[]{"m","this","is","long"}, Identifier.Separate ("m_this_is_long")); | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_this_is_long")); | |
Assert.AreEqual (new string[]{"m","this","is","long","32"}, Identifier.Separate ("m_this_is_long_32")); | |
Assert.AreEqual (new string[]{"m","this","is","long32"}, Identifier.Separate ("m_this_is_long32")); | |
} | |
[Test] | |
public void Camel () | |
{ | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("ThisIsLong")); | |
Assert.AreEqual (new string[]{"m","this","is","long"}, Identifier.Separate ("m_ThisIsLong")); | |
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_ThisIsLong")); | |
Assert.AreEqual (new string[]{"this","is","long","ext"}, Identifier.Separate ("ThisIsLongExt")); | |
Assert.AreEqual (new string[]{"this","is","long","ext32"}, Identifier.Separate ("ThisIsLongExt32")); | |
} | |
[Test] | |
public void HardCamel () | |
{ | |
Assert.AreEqual (new string[]{"html","element"}, Identifier.Separate ("HTMLElement")); | |
Assert.AreEqual (new string[]{"html","element"}, Identifier.Separate ("HtmlElement")); | |
Assert.AreEqual (new string[]{"cg","context","eo","clip"}, Identifier.Separate ("CGContextEOClip")); | |
Assert.AreEqual (new string[]{"cg","context","set","cmyk","fill","color"}, Identifier.Separate ("CGContextSetCMYKFillColor")); | |
Assert.AreEqual (new string[]{"cg","context","translate","ctm"}, Identifier.Separate ("CGContextTranslateCTM")); | |
Assert.AreEqual (new string[]{"cg","context","translate","ctm"}, Identifier.Separate ("CGContextTranslateCTM_")); | |
} | |
[Test] | |
public void RubyIdentifiers () | |
{ | |
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("thisIsLong?")); | |
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("THIS_IS_Long?")); | |
Assert.AreEqual (new string[]{"this","is","long!"}, Identifier.Separate ("_THIS_IS_Long!")); | |
} | |
[Test] | |
public void Spaces () | |
{ | |
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("this Is Long?")); | |
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("THIS_IS Long?")); | |
Assert.AreEqual (new string[]{"th","is","is","long!"}, Identifier.Separate ("\t_TH \t IS_IS_Long!")); | |
} | |
[Test] | |
public void LispIdentifiers () | |
{ | |
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("this-is-long?")); | |
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("THIS_IS_Long?")); | |
Assert.AreEqual (new string[]{"this","is?","long!"}, Identifier.Separate ("_THIS-IS?-Long!")); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment