Created
January 22, 2018 19:34
-
-
Save jianminchen/c218d7050c77083a3b3e74d46bc44dff to your computer and use it in GitHub Desktop.
Word count practice - after mock interview, I completed the code, try not to use LINQ method, and use Bucket sort instead. Spent over 30 minutes to work on regular.Split API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
namespace WordCountEngine | |
{ | |
class Program | |
{ | |
/// <summary> | |
/// Follow up after mock interview January 20, 2018 | |
/// </summary> | |
/// <param name="args"></param> | |
static void Main(string[] args) | |
{ | |
var sorted = WordCountEngine("A b a a b c"); | |
} | |
/// <summary> | |
/// please go over C# string.Split article: | |
/// https://msdn.microsoft.com/en-us/library/b873y76a(v=vs.110).aspx | |
/// Understand how to specify regular expression in 5 minutes | |
/// </summary> | |
/// <param name="document"></param> | |
/// <returns></returns> | |
public static string[,] WordCountEngine(string document) // "A b a a b c" | |
{ | |
if(document == null || document.Length == 0) // false | |
{ | |
return new string[0,0]; | |
} | |
var processed = document.ToLower(); // "a b a' a b c" | |
// replace | |
processed = processed.Replace('\\','\0'); | |
// split the words | |
var words = Regex.Split(processed, "[ +,?!.]+"); // | |
// sorted the dictionary and then send to buckets | |
var buckets = applyBucketSort(storeToDict(words)); | |
// send to List<string, int> | |
var sortedbyDescending = sortByDescending(buckets); | |
// send to dimensional array | |
var sortedLength = sortedbyDescending.Count; | |
var sortedwords = new string[sortedLength, 2]; | |
int index = 0; | |
foreach (var item in sortedbyDescending) | |
{ | |
sortedwords[index, 0] = item[0]; | |
sortedwords[index, 1] = item[1]; | |
index++; | |
} | |
return sortedwords; | |
} | |
private static List<string[]> sortByDescending(List<string>[] buckets) | |
{ | |
int length = buckets.Length; | |
var descendingOrder = new List<string[]>(); | |
for (int i = length - 1; i >= 0; i--) // -- not ++ | |
{ | |
foreach (var item in buckets[i]) | |
{ | |
descendingOrder.Add(new string[]{item, i.ToString()}); | |
} | |
} | |
return descendingOrder; | |
} | |
/// comparison sort -> nlogn - | |
/// bucket sort -> detmine max value -> 1 to max buckets -> go over each word | |
/// const size of bucket -> hint at most 200 | |
private static List<string>[] applyBucketSort(Dictionary<string, int> dict) | |
{ | |
// get maximum value count | |
var max = 0; | |
foreach(var pair in dict) | |
{ | |
var number = pair.Value; | |
max = number > max ? number : max; | |
} | |
// var list = new List<string> | |
var buckets = new List<string>[max + 1]; // not max, should be max + 1 | |
for(int i = 0 ; i < max + 1; i++) | |
{ | |
buckets[i] = new List<string>(); | |
} | |
foreach (var pair in dict) | |
{ | |
var key = pair.Key; | |
var value = pair.Value; | |
buckets[value].Add(key); | |
} | |
return buckets; | |
} | |
/// <summary> | |
/// need to exclude the empty string | |
/// </summary> | |
/// <param name="words"></param> | |
/// <returns></returns> | |
private static Dictionary<string, int> storeToDict(string[] words) | |
{ | |
var dict = new Dictionary<string, int>(); | |
foreach(var word in words) | |
{ | |
if (word.Length == 0) | |
{ | |
continue; | |
} | |
if(!dict.ContainsKey(word)) | |
{ | |
dict.Add(word, 1); | |
} | |
else | |
{ | |
dict[word]++; | |
} | |
} | |
return dict; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment