Skip to content

Instantly share code, notes, and snippets.

@derekantrican
Last active March 15, 2020 00:51
Show Gist options
  • Save derekantrican/9604f1f09383c75f67305c88749d71f0 to your computer and use it in GitHub Desktop.
Save derekantrican/9604f1f09383c75f67305c88749d71f0 to your computer and use it in GitHub Desktop.
A class for converting number words (eg "I am ninety-seven years old") in a string into their numerical representation (eg "I am 97 years old")
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace NumberWordsToNumbers
{
/* ==================================================================================================
* A robust class for converting a number words (eg "one-hundred and sixty-nine") in a string
* to numbers (eg "169")
*
* Usage: string convertedString = NumberWordsToNumbers.ConvertString("one-hundred and sixty-nine");
*
* NOTE: Commas will prevent numbers from being combined. This is helpful for lists (eg "My favorite
* numbers are six hundred, seventy, and four" will be "600, 70, and 4"). But also keep in mind that
* this prevents strings like "six hundred, and five" from being interpreted as "605". Unfortunately
* there is no way to determine whether "six hundred and five" means "605" or "600 and 5" so I have
* opted for maintaining string formatting (keeping commas) rather than combining words "through" them.
* You can simply add commas to the Regex on line 140 if you want to change this.
*
* Example cases:
*
* "Sixty-Nine" becomes "69"
* "Nine to Five" becomes "9 to 5"
* "Dude's Five Nine" becomes "Dude's 5 9"
* "Zero to Sixty-Nine" becomes "0 to 69"
* "Goose Bubbs and 500 Million Years" becomes "Goose Bubbs and 500000000 Years"
* "I am thirty five, you are 15" becomes "I am 35, you are 15"
* "Five Hundred" becomes "500"
* "Sixty Seven" becomes "67"
* "Four hundred six" becomes "406"
* "Four hundred, six" becomes "400, 6"
* "Four hundred and six" becomes "406"
* "Six hundred, Seven hundred" becomes "600, 700"
* "One thousand eighteen" becomes "1018"
* ==================================================================================================
*/
public static class NumberWordsToNumbers
{
#region Number Dictionaries
private static Dictionary<string, int> singleNumbers = new Dictionary<string, int>
{
{"zero", 0 },
{"one", 1 },
{"two", 2 },
{"three", 3 },
{"four", 4 },
{"five", 5 },
{"six", 6 },
{"seven", 7 },
{"eight", 8 },
{"nine", 9 }
};
private static Dictionary<string, int> teenNumbers = new Dictionary<string, int>
{
{"eleven", 11 },
{"twelve", 12 },
{"thirteen", 13 },
{"fourteen", 14 },
{"fifteen", 15 },
{"sixteen", 16 },
{"seventeen", 17 },
{"eighteen", 18 },
{"nineteen", 19 }
};
private static Dictionary<string, int> doubleNumbers = new Dictionary<string, int>
{
{"ten", 10 },
{"twenty", 20 },
{"thirty", 30 },
{"fourty", 40 },
{"fifty", 50 },
{"sixty", 60 },
{"seventy", 70 },
{"eighty", 80 },
{"ninety", 90 }
};
private static Dictionary<string, long> multiplierWords = new Dictionary<string, long>
{
{"hundred", 100 },
{"thousand", 1000 },
{"million", 1000000 },
{"billion", 1000000000 },
{"trillion", 1000000000000 }
};
#endregion Number Dictionaries
public static string ConvertString(string input)
{
List<Match> numberWords = GetNumberWords(input);
List<Tuple<string, int, int>> numberWordGroups = CombineNumberWordGroups(numberWords, input);
for (int i = 0; i < numberWordGroups.Count; i++)
{
string filteredNumberWords = numberWordGroups[i].Item1.Replace("-", " ").Replace(",", " ").Replace(" and ", " ");
numberWordGroups[i] = new Tuple<string, int, int>(ConvertNumberWordsToNumber(filteredNumberWords).ToString(),
numberWordGroups[i].Item2,
numberWordGroups[i].Item3);
}
input = ReplaceNumbersInString(input, numberWordGroups);
return input;
}
private static List<Match> GetNumberWords(string input)
{
List<Match> result = new List<Match>();
string lookBehindChars = @"^|\s|-";
string lookAheadChars = @",|-|\.|\?|!|\s|$";
string allNumbersRegex = $"{string.Join("|", multiplierWords.Keys.Select(p => $"(?<={lookBehindChars}){p}(?={lookAheadChars})"))}|" +
$"{string.Join("|", doubleNumbers.Keys.Select(p => $"(?<={lookBehindChars}){p}(?={lookAheadChars})"))}|" +
$"{string.Join("|", teenNumbers.Keys.Select(p => $"(?<={lookBehindChars}){p}(?={lookAheadChars})"))}|" +
$"{string.Join("|", singleNumbers.Keys.Select(p => $"(?<={lookBehindChars}){p}(?={lookAheadChars})"))}|" +
$@"(?<={lookBehindChars})\d+(?={lookAheadChars})";
foreach (Match match in Regex.Matches(input, allNumbersRegex, RegexOptions.IgnoreCase))
result.Add(match);
return result;
}
private static List<Tuple<string, int, int>> CombineNumberWordGroups(List<Match> numberWords, string input)
{
List<Tuple<string, int, int>> result = new List<Tuple<string, int, int>>();
for (int i = 0; i < numberWords.Count; i++)
{
Match word = numberWords[i];
if (i > 0)
{
Match lastWord = numberWords[i - 1];
string separatingString = input.Substring(lastWord.Index + lastWord.Length, word.Index - lastWord.Index - lastWord.Length);
//If words are only separated by "and", " ", or "-" then combine them together into one group
if (Regex.Replace(separatingString, " |-|and", "", RegexOptions.IgnoreCase) == "")
{
result[result.Count - 1] = new Tuple<string, int, int>(result[result.Count - 1].Item1 + separatingString + word.Value,
result[result.Count - 1].Item2,
result[result.Count - 1].Item3 + separatingString.Length + word.Value.Length);
continue;
}
}
result.Add(new Tuple<string, int, int>(word.Value, word.Index, word.Length));
}
return result;
}
private static string ConvertNumberWordsToNumber(string numberWords)
{
//Need to do number matching in reverse order so we match things like "sixty" before "six"
foreach (string key in multiplierWords.Keys)
numberWords = Regex.Replace(numberWords, key, multiplierWords[key].ToString(), RegexOptions.IgnoreCase);
foreach (string key in doubleNumbers.Keys)
numberWords = Regex.Replace(numberWords, key, doubleNumbers[key].ToString(), RegexOptions.IgnoreCase);
foreach (string key in teenNumbers.Keys)
numberWords = Regex.Replace(numberWords, key, teenNumbers[key].ToString(), RegexOptions.IgnoreCase);
foreach (string key in singleNumbers.Keys)
numberWords = Regex.Replace(numberWords, key, singleNumbers[key].ToString(), RegexOptions.IgnoreCase);
numberWords = CombineMultipliers(numberWords);
numberWords = CombineNearbyNumbers(numberWords);
return numberWords;
}
private static string CombineMultipliers(string input)
{
string result = "";
int currentNum = int.MinValue;
foreach (string word in input.Split(' '))
{
if (word == "")
continue;
int number;
if (int.TryParse(word, out number))
{
string numStr = currentNum.ToString();
if (currentNum == int.MinValue)
currentNum = number;
else if (multiplierWords.Values.Contains(number) && currentNum != int.MinValue)
currentNum *= number;
else
{
if (currentNum != int.MinValue)
{
result += currentNum + " ";
currentNum = int.MinValue;
}
currentNum = number;
}
}
else
{
if (currentNum != double.MinValue)
{
result += currentNum + " ";
currentNum = int.MinValue;
}
result += word + " ";
}
}
if (currentNum != int.MinValue)
result += currentNum;
return result.Trim();
}
private static string CombineNearbyNumbers(string input)
{
string result = "";
int currentNum = int.MinValue;
foreach (string word in input.Split(' '))
{
if (word == "")
continue;
int number;
if (int.TryParse(word, out number))
{
string numStr = currentNum.ToString();
if (currentNum == int.MinValue)
currentNum = number;
else if (numStr.Length > word.Length &&
numStr.Substring(numStr.Length - word.Length).Distinct().All(c => c == '0')) //Check if space for new number in currentNum is only zeros
{
currentNum = currentNum + number;
}
else
{
if (currentNum != int.MinValue)
{
result += currentNum + " ";
currentNum = int.MinValue;
}
currentNum = number;
}
}
else
{
if (currentNum != int.MinValue)
{
result += currentNum + " ";
currentNum = int.MinValue;
}
result += word + " ";
}
}
if (currentNum != int.MinValue)
result += currentNum;
return result.Trim();
}
private static string ReplaceNumbersInString(string originalString, List<Tuple<string, int, int>> replacementNumbersAndPositions)
{
int indexDiff = 0; //Account for difference in length values as we replace substrings
foreach (Tuple<string, int, int> replacementNumber in replacementNumbersAndPositions)
{
string temp = originalString.Replace(replacementNumber.Item2 - indexDiff, replacementNumber.Item3, replacementNumber.Item1);
indexDiff += originalString.Length - temp.Length;
originalString = temp;
}
return originalString;
}
private static string Replace(this string inputString, int startIndex, int length, string newSubString)
{
StringBuilder aStringBuilder = new StringBuilder(inputString);
aStringBuilder.Remove(startIndex, length);
aStringBuilder.Insert(startIndex, newSubString);
return aStringBuilder.ToString();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment