Skip to content

Instantly share code, notes, and snippets.

@randyburden
Created March 12, 2013 23:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save randyburden/5148112 to your computer and use it in GitHub Desktop.
Save randyburden/5148112 to your computer and use it in GitHub Desktop.
CapitalizationHelper intelligently capitalizes English names taking into account some common names that do not conform to the normal English capitalization rules.
using System;
using System.Globalization;
using System.Text.RegularExpressions;
namespace Utilities
{
/// <summary>
/// Provides intelligent capitalization methods.
/// </summary>
public static class CapitalizationHelper
{
static readonly Regex FindALetterAndApostrophe = new Regex( "[a-z]'", RegexOptions.Compiled );
/// <summary>
/// Intelligently capitalizes English names taking into account some common names that do not conform
/// to the normal English capitalization rules.
/// </summary>
/// <param name="nameToCapitalize">Name to capitalize</param>
/// <returns>Capitalized name</returns>
public static string CapitalizeName( this string nameToCapitalize )
{
if ( string.IsNullOrWhiteSpace( nameToCapitalize ) )
return nameToCapitalize;
string[] names = nameToCapitalize.Split( new[] { " " }, StringSplitOptions.RemoveEmptyEntries );
string capitalizedName = string.Empty;
foreach ( var name in names )
{
// Handle already properly formatted names
// If the entire name is not uppercased and the first letter is already uppercased and another
// letter is also uppercased, then this is most likely already properly formatted.
if ( name.IsAllUpperCase() == false && name.Length > 1 )
{
string substring = name.Substring( 1 );
if ( char.IsUpper(name, 0 ) && substring.ContainsAnUpperCase() )
{
capitalizedName = capitalizedName.Append( name );
continue;
}
}
string lowercaseName = name.ToLower().Replace( "\"", "'" );
Match match;
// Handle: O'Brien or D'Angelos or any other name that starts with a letter and an apostrophe
if ( FindALetterAndApostrophe.TryMatch( lowercaseName, out match ) && lowercaseName.Length > 2 )
{
string value = match.Value;
string substring = lowercaseName.Substring( 2 );
capitalizedName = capitalizedName.Append( value.ToUpper() + substring.CapitalizeFirstLetter() );
}
// Handle: cn&b
else if ( lowercaseName.Contains( "&" ) && lowercaseName.Length > 1 )
{
capitalizedName = capitalizedName.Append( lowercaseName.ToUpper() );
}
// Handle: McDonald
else if ( lowercaseName.StartsWith( "mc" ) && lowercaseName.Length > 2 )
{
string substring = lowercaseName.Substring( 2 );
capitalizedName = capitalizedName.Append( "Mc" + substring.CapitalizeFirstLetter() );
}
// Handle: MacDonald
else if ( lowercaseName.StartsWith( "mac" ) && lowercaseName.Length > 3 )
{
string substring = lowercaseName.Substring( 3 );
capitalizedName = capitalizedName.Append( "Mac" + substring.CapitalizeFirstLetter() );
}
else
{
capitalizedName = capitalizedName.Append( lowercaseName.CapitalizeFirstLetter() );
}
}
return capitalizedName;
}
/// <summary>
/// Converts the specified string to titlecase.
/// </summary>
/// <param name="input">String to manipulate</param>
/// <returns>Titlecased string</returns>
public static string CapitalizeFirstLetter( this string input )
{
return CultureInfo.CurrentCulture.TextInfo.ToTitleCase( input );
}
/// <summary>
/// Determines if the string is all upper case.
/// </summary>
/// <param name="input">String to search.</param>
/// <returns>Indicates whether the string contains all upper case characters</returns>
public static bool IsAllUpperCase( this string input )
{
for ( int i = 0; i < input.Length; i++ )
{
if ( Char.IsLetter( input[ i ] ) && !Char.IsUpper( input[ i ] ) )
return false;
}
return true;
}
/// <summary>
/// Determines if the string contain an upper case character
/// </summary>
/// <param name="input">String to search.</param>
/// <returns>Indicates whether the string contains an upper case character</returns>
public static bool ContainsAnUpperCase( this string input )
{
for ( int i = 0; i < input.Length; i++ )
{
if ( Char.IsLetter( input[ i ] ) && Char.IsUpper( input[ i ] ) )
return true;
}
return false;
}
/// <summary>
/// Tries to find a match. Returns false if not found.
/// </summary>
/// <param name="regex">Regex</param>
/// <param name="input">The string to search for a match.</param>
/// <param name="match">An object that contains information about the match.</param>
/// <returns>Was a match found</returns>
public static bool TryMatch( this Regex regex, string input, out Match match )
{
match = regex.Match( input );
return match.Success;
}
/// <summary>
/// Appends the strings together adding a space in between the strings.
/// </summary>
/// <param name="input">Input string</param>
/// <param name="inputToAdd">String to append</param>
/// <returns>Appended string</returns>
private static string Append( this string input, string inputToAdd )
{
if ( input != string.Empty )
{
input += " ";
}
return input + inputToAdd;
}
}
}
using System;
using System.Collections.Generic;
using System.Diagnostics;
using NUnit.Framework;
namespace Utilities.Tests
{
[TestFixture]
public class CapitalizationHelperTests
{
[Test]
public void Test()
{
// <OriginalCapitalizedName,ProperlyCapitalizedName>
// < input , expectedValue >
var names = new Dictionary<string, string>
{
{ "WILFRID & ROSEMONDE JEAN-PAUL", "Wilfrid & Rosemonde Jean-Paul" },
{ "Shirley Jean Bapiste C004", "Shirley Jean Bapiste C004" },
{ "Leah Ben-Zev", "Leah Ben-Zev" },
{ "cb&b partners corp.", "CB&B Partners Corp." },
{ "Dr. Eliyahu Ladell", "Dr. Eliyahu Ladell" },
{ "Alice A. Yorks", "Alice A. Yorks" },
{ "Sheryl A o\"meara", "Sheryl A O'Meara" },
{ "ROSE MARIE JEAN-BAPTISTE", "Rose Marie Jean-Baptiste" },
{ "86 FRANCIS CORP", "86 Francis Corp" },
{ "dAVID ROSENBERG", "David Rosenberg" },
{ "Jean robert Bonard", "Jean Robert Bonard" },
{ "goseph goldstein c/o simonowitz", "Goseph Goldstein C/O Simonowitz" },
{ "Cong Kahal Torath Chaim Goldie Greenberger", "Cong Kahal Torath Chaim Goldie Greenberger" },
{ "Wilfrido Vega R009", "Wilfrido Vega R009" },
{ "bob d'Angelos", "Bob D'Angelos" }
};
Debug.WriteLine( string.Format( "{0,-45} | {1,-45} | {2,-45}", "Originally Capitalized Name", "Newly Capitalized Name", "Properly Capitalized Name" ) );
Debug.WriteLine( string.Format( "{0,-45} | {1,-45} | {2,-45}", "---------------------------", "----------------------", "-------------------------" ) );
foreach ( var name in names )
{
var originallyCapitalizedName = name.Key;
var properlyCapitalizedName = name.Value;
var newlyCapitalizedName = originallyCapitalizedName.CapitalizeName();
Debug.WriteLine( string.Format( "{0,-45} | {1,-45} | {2,-45}", originallyCapitalizedName, newlyCapitalizedName, properlyCapitalizedName ) );
Assert.That( properlyCapitalizedName == newlyCapitalizedName );
}
}
[Test]
public void StressTest()
{
var stopwatch = Stopwatch.StartNew();
string result = string.Empty;
const int numberOfIterations = 1000000;
for ( int i = 0; i < numberOfIterations; i++ )
{
result = "bob d'Angelos".CapitalizeName();
}
Debug.WriteLine( result );
Debug.WriteLine( string.Format( "Elapsed time to run {0:n0} iterations: {1}", numberOfIterations, GetElapsedTime( stopwatch ) ) );
}
/// <summary>
/// Returns the elapsed time of the stopwatch in a formatted string.
/// </summary>
/// <returns>A string with a customized output of the elapsed time</returns>
public static string GetElapsedTime( Stopwatch stopWatch )
{
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime;
if ( ts.Minutes > 0 )
elapsedTime = String.Format( "{0:00} min. {1:00}.{2:00} sec.", ts.Minutes, ts.Seconds, ts.Milliseconds / 10 );
else if ( ts.Seconds > 0 )
elapsedTime = String.Format( "{0:00}.{1:00} sec.", ts.Seconds, ts.Milliseconds / 10 );
else
elapsedTime = string.Format( "{0} ms.", ts.Milliseconds );
return elapsedTime;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment