Skip to content

Instantly share code, notes, and snippets.

@praeclarum
Created June 19, 2011 22:41
Show Gist options
  • Save praeclarum/1034858 to your computer and use it in GitHub Desktop.
Save praeclarum/1034858 to your computer and use it in GitHub Desktop.
Separates an identifier into its constituent parts
using System;
using System.Collections.Generic;
using NUnit.Framework;
namespace DevSearchTest
{
class Identifier
{
public static string[] Separate(string rawId)
{
var id = (rawId ?? "").Trim ();
if (id.Length == 0) return new string[0];
var idLower = id.ToLowerInvariant ();
var parts = new List<string> ();
var lastChar = ' ';
var wordStart = 0;
for (var i = 0; i < id.Length; i++) {
var ch = id [i];
var isSep = (ch=='_')||(ch=='-')||(ch==' ')||(ch=='\t');
var newWord = isSep;
if (!newWord && i > 0) {
newWord = char.IsLower (lastChar) && char.IsUpper (ch);
if (!newWord && char.IsUpper (lastChar) && char.IsUpper (ch) && (i + 1) < id.Length && char.IsLower (id[i+1])) {
newWord = true;
}
}
if (newWord) {
if ((i - wordStart) > 0) {
parts.Add (string.Intern (idLower.Substring (wordStart, i - wordStart)));
}
wordStart = isSep ? i + 1 : i;
}
lastChar = ch;
}
if (wordStart < id.Length) {
parts.Add (string.Intern (idLower.Substring (wordStart)));
}
return parts.ToArray ();
}
}
[TestFixture]
public class Test
{
[Test]
public void Empty ()
{
Assert.AreEqual (new string[0], Identifier.Separate (""));
Assert.AreEqual (new string[0], Identifier.Separate (null));
Assert.AreEqual (new string[0], Identifier.Separate (" "));
Assert.AreEqual (new string[0], Identifier.Separate (" \r\n \t "));
}
[Test]
public void SingleWord ()
{
Assert.AreEqual (new string[]{"j35t3r"}, Identifier.Separate ("j35t3r"));
Assert.AreEqual (new string[]{"j35t3r"}, Identifier.Separate ("J35T3R"));
Assert.AreEqual (new string[]{"j35t3r"}, Identifier.Separate ("J35t3r"));
}
[Test]
public void Underscores ()
{
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("this_is_long"));
Assert.AreEqual (new string[]{"this","is","long","m"}, Identifier.Separate ("this_is_long_m"));
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("___this_is_long___"));
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_this_is_long_"));
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("THIS_IS_Long"));
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_THIS_IS_Long"));
Assert.AreEqual (new string[]{"m","this","is","long"}, Identifier.Separate ("m_this_is_long"));
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_this_is_long"));
Assert.AreEqual (new string[]{"m","this","is","long","32"}, Identifier.Separate ("m_this_is_long_32"));
Assert.AreEqual (new string[]{"m","this","is","long32"}, Identifier.Separate ("m_this_is_long32"));
}
[Test]
public void Camel ()
{
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("ThisIsLong"));
Assert.AreEqual (new string[]{"m","this","is","long"}, Identifier.Separate ("m_ThisIsLong"));
Assert.AreEqual (new string[]{"this","is","long"}, Identifier.Separate ("_ThisIsLong"));
Assert.AreEqual (new string[]{"this","is","long","ext"}, Identifier.Separate ("ThisIsLongExt"));
Assert.AreEqual (new string[]{"this","is","long","ext32"}, Identifier.Separate ("ThisIsLongExt32"));
}
[Test]
public void HardCamel ()
{
Assert.AreEqual (new string[]{"html","element"}, Identifier.Separate ("HTMLElement"));
Assert.AreEqual (new string[]{"html","element"}, Identifier.Separate ("HtmlElement"));
Assert.AreEqual (new string[]{"cg","context","eo","clip"}, Identifier.Separate ("CGContextEOClip"));
Assert.AreEqual (new string[]{"cg","context","set","cmyk","fill","color"}, Identifier.Separate ("CGContextSetCMYKFillColor"));
Assert.AreEqual (new string[]{"cg","context","translate","ctm"}, Identifier.Separate ("CGContextTranslateCTM"));
Assert.AreEqual (new string[]{"cg","context","translate","ctm"}, Identifier.Separate ("CGContextTranslateCTM_"));
}
[Test]
public void RubyIdentifiers ()
{
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("thisIsLong?"));
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("THIS_IS_Long?"));
Assert.AreEqual (new string[]{"this","is","long!"}, Identifier.Separate ("_THIS_IS_Long!"));
}
[Test]
public void Spaces ()
{
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("this Is Long?"));
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("THIS_IS Long?"));
Assert.AreEqual (new string[]{"th","is","is","long!"}, Identifier.Separate ("\t_TH \t IS_IS_Long!"));
}
[Test]
public void LispIdentifiers ()
{
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("this-is-long?"));
Assert.AreEqual (new string[]{"this","is","long?"}, Identifier.Separate ("THIS_IS_Long?"));
Assert.AreEqual (new string[]{"this","is?","long!"}, Identifier.Separate ("_THIS-IS?-Long!"));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment