Skip to content

Instantly share code, notes, and snippets.

@jianminchen
Created April 3, 2017 04:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jianminchen/fc3dcd492eb3ede868ed69c9f35fb120 to your computer and use it in GitHub Desktop.
Save jianminchen/fc3dcd492eb3ede868ed69c9f35fb120 to your computer and use it in GitHub Desktop.
Scan document - practice code - need to review more
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
class ScanDocument {
static void Main(string[] args) {
//string s = WordCountEngine("practice makes perfect, ", " !."); // customize the delimiters here if necessaray
//Console.WriteLine(s);
//RunTestcaseOnSplit();
RunTestcase();
}
public static void RunTestcase()
{
var result = WordCountEngine(" a,b,c,a a", " ,");
}
public static void RunTestcaseOnSplit()
{
var result = splitMethod(" a,b,c", " ,");
Debug.Assert(result[0].CompareTo("a") == 0);
}
/// <summary>
/// minimum run time O(N) + O(NM), N is word's length, and M is specialDelimeters' length
/// </summary>
/// <param name="words"></param>
/// <param name="specialDelimeters"></param>
/// <returns></returns>
public static string WordCountEngine(string words, string specialDelimeters)
{
IList<string> data = new List<string>();
if(words == null || words.Length ==0)
{
return string.Empty;
}
//var specailDelimters = " -;";
var split = splitMethod(words, specialDelimeters); // O(N)
var raw = new Dictionary<string, int>();
foreach(var s in split)
{
if(raw.ContainsKey(s))
{
raw[s]++;
}
else
{
raw.Add(s,1);
}
}
// depending on dictionary
// time O(NlogN)
var sorted = from entry in raw orderby entry.Value descending select entry;
data.Add("{ ");
int length = sorted.Count();
int index = 0;
foreach(var item in sorted)
{
StringBuilder temp = new StringBuilder();
temp.Append(item.Key + ": " + item.Value);
if(index < length - 1)
{
temp.Append(",");
}
data.Add(temp.ToString());
index ++;
}
data.Add(" }");
return string.Join("", data.ToArray());
}
/// <summary>
/// delimter =" ;-/"
/// delimiter string length is m
/// n * m , n is string s's length
/// algorithm time complexity is O(nm)
///
/// actually C# string.Split(Char[])
/// </summary>
/// <param name="s"></param>
/// <param name="delimiters"></param>
/// <returns></returns>
private static string[] splitMethod(string s, string delimiters)
{
IList<string> output = new List<string>();
if(s == null || s.Length == 0)
{
return new string[0];
}
int length = s.Length;
int start = 0;
int end = 0;
for(int i = 0; i < length ; i++)
{
var current = s[i];
var startChar = s[start];
// continue to skip delimiter char if need
if(delimiters.IndexOf(startChar) >= 0)
{
start = i;
if(end < start)
{
end = start;
}
}
if(delimiters.IndexOf(current) >= 0)
{
end = i;
// output the word, s[end] is delimiter
if(end - start > 0)
{
output.Add(s.Substring(start, end - start)); // minus 1, checking
start = end + 1;
}
}
}
// edge case
if (delimiters.IndexOf(s[start]) < 0 )
{
output.Add(s.Substring(start, length - start));
}
return output.ToArray();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment