Skip to content

Instantly share code, notes, and snippets.

@kenfdev
Last active October 26, 2021 00:10
Show Gist options
  • Save kenfdev/aed2a55031b346dab1c2bfed9722b7c4 to your computer and use it in GitHub Desktop.
Save kenfdev/aed2a55031b346dab1c2bfed9722b7c4 to your computer and use it in GitHub Desktop.
CSharp
static string[] INVALID_WORDS = new string[] {
"the", "and", "a", "to", "of", "he", "was", "in", "his", "that", "it", "with", "you", "I", "but", "they", "for", "had", "The", "as", "at", "she", "on", "He", "this", "be", "all", "not", "him", "so", "were", "then", "by", "if", "her", "There", "would", "or", "when", "their", "from", "don't", "an", "could", "have", "But", "there", "what", "been", "is"
};
void Main()
{
var result = GetTopWords(@"C:\Users\kenfdev\Desktop\tomsawyer.txt");
result.Dump();
}
private static IDictionary<string, uint> GetTopWords(string path) {
return File.ReadLines(path)
.AsParallel()
.Aggregate (
() => new Dictionary<string, uint>(StringComparer.InvariantCultureIgnoreCase),
(localDic, line) => {
foreach (var word in line.Split(" ", StringSplitOptions.RemoveEmptyEntries))
{
if (!IsValidWord(word))
continue;
TrackWordsOccurence(localDic, word);
}
return localDic;
},
(finalResult, localDic) => {
foreach (var pair in localDic)
{
var key = pair.Key;
if (finalResult.ContainsKey(key)) {
finalResult[key] += pair.Value;
} else {
finalResult[key] = pair.Value;
}
}
return finalResult;
},
finalResult => finalResult
.OrderByDescending(kv => kv.Value)
.Take(10)
.ToDictionary(kv => kv.Key, kv => kv.Value)
);
}
private static void TrackWordsOccurence(IDictionary<string, uint> wordCounts, string word)
{
if (wordCounts.TryGetValue(word, out uint count)) {
wordCounts[word] = count + 1;
} else {
wordCounts[word] = 1;
}
}
private static bool IsValidWord(string word) => !INVALID_WORDS.Contains(word);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment