Skip to content

Instantly share code, notes, and snippets.

@jianminchen
Created January 22, 2018 19:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jianminchen/c218d7050c77083a3b3e74d46bc44dff to your computer and use it in GitHub Desktop.
Save jianminchen/c218d7050c77083a3b3e74d46bc44dff to your computer and use it in GitHub Desktop.
Word count practice - after mock interview, I completed the code, try not to use LINQ method, and use Bucket sort instead. Spent over 30 minutes to work on regular.Split API
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace WordCountEngine
{
class Program
{
/// <summary>
/// Follow up after mock interview January 20, 2018
/// </summary>
/// <param name="args"></param>
static void Main(string[] args)
{
var sorted = WordCountEngine("A b a a b c");
}
/// <summary>
/// please go over C# string.Split article:
/// https://msdn.microsoft.com/en-us/library/b873y76a(v=vs.110).aspx
/// Understand how to specify regular expression in 5 minutes
/// </summary>
/// <param name="document"></param>
/// <returns></returns>
public static string[,] WordCountEngine(string document) // "A b a a b c"
{
if(document == null || document.Length == 0) // false
{
return new string[0,0];
}
var processed = document.ToLower(); // "a b a' a b c"
// replace
processed = processed.Replace('\\','\0');
// split the words
var words = Regex.Split(processed, "[ +,?!.]+"); //
// sorted the dictionary and then send to buckets
var buckets = applyBucketSort(storeToDict(words));
// send to List<string, int>
var sortedbyDescending = sortByDescending(buckets);
// send to dimensional array
var sortedLength = sortedbyDescending.Count;
var sortedwords = new string[sortedLength, 2];
int index = 0;
foreach (var item in sortedbyDescending)
{
sortedwords[index, 0] = item[0];
sortedwords[index, 1] = item[1];
index++;
}
return sortedwords;
}
private static List<string[]> sortByDescending(List<string>[] buckets)
{
int length = buckets.Length;
var descendingOrder = new List<string[]>();
for (int i = length - 1; i >= 0; i--) // -- not ++
{
foreach (var item in buckets[i])
{
descendingOrder.Add(new string[]{item, i.ToString()});
}
}
return descendingOrder;
}
/// comparison sort -> nlogn -
/// bucket sort -> detmine max value -> 1 to max buckets -> go over each word
/// const size of bucket -> hint at most 200
private static List<string>[] applyBucketSort(Dictionary<string, int> dict)
{
// get maximum value count
var max = 0;
foreach(var pair in dict)
{
var number = pair.Value;
max = number > max ? number : max;
}
// var list = new List<string>
var buckets = new List<string>[max + 1]; // not max, should be max + 1
for(int i = 0 ; i < max + 1; i++)
{
buckets[i] = new List<string>();
}
foreach (var pair in dict)
{
var key = pair.Key;
var value = pair.Value;
buckets[value].Add(key);
}
return buckets;
}
/// <summary>
/// need to exclude the empty string
/// </summary>
/// <param name="words"></param>
/// <returns></returns>
private static Dictionary<string, int> storeToDict(string[] words)
{
var dict = new Dictionary<string, int>();
foreach(var word in words)
{
if (word.Length == 0)
{
continue;
}
if(!dict.ContainsKey(word))
{
dict.Add(word, 1);
}
else
{
dict[word]++;
}
}
return dict;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment