Created
April 3, 2017 04:39
-
-
Save jianminchen/fc3dcd492eb3ede868ed69c9f35fb120 to your computer and use it in GitHub Desktop.
Scan document - practice code - need to review more
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Linq; | |
using System.Text; | |
class ScanDocument { | |
static void Main(string[] args) { | |
//string s = WordCountEngine("practice makes perfect, ", " !."); // customize the delimiters here if necessaray | |
//Console.WriteLine(s); | |
//RunTestcaseOnSplit(); | |
RunTestcase(); | |
} | |
public static void RunTestcase() | |
{ | |
var result = WordCountEngine(" a,b,c,a a", " ,"); | |
} | |
public static void RunTestcaseOnSplit() | |
{ | |
var result = splitMethod(" a,b,c", " ,"); | |
Debug.Assert(result[0].CompareTo("a") == 0); | |
} | |
/// <summary> | |
/// minimum run time O(N) + O(NM), N is word's length, and M is specialDelimeters' length | |
/// </summary> | |
/// <param name="words"></param> | |
/// <param name="specialDelimeters"></param> | |
/// <returns></returns> | |
public static string WordCountEngine(string words, string specialDelimeters) | |
{ | |
IList<string> data = new List<string>(); | |
if(words == null || words.Length ==0) | |
{ | |
return string.Empty; | |
} | |
//var specailDelimters = " -;"; | |
var split = splitMethod(words, specialDelimeters); // O(N) | |
var raw = new Dictionary<string, int>(); | |
foreach(var s in split) | |
{ | |
if(raw.ContainsKey(s)) | |
{ | |
raw[s]++; | |
} | |
else | |
{ | |
raw.Add(s,1); | |
} | |
} | |
// depending on dictionary | |
// time O(NlogN) | |
var sorted = from entry in raw orderby entry.Value descending select entry; | |
data.Add("{ "); | |
int length = sorted.Count(); | |
int index = 0; | |
foreach(var item in sorted) | |
{ | |
StringBuilder temp = new StringBuilder(); | |
temp.Append(item.Key + ": " + item.Value); | |
if(index < length - 1) | |
{ | |
temp.Append(","); | |
} | |
data.Add(temp.ToString()); | |
index ++; | |
} | |
data.Add(" }"); | |
return string.Join("", data.ToArray()); | |
} | |
/// <summary> | |
/// delimter =" ;-/" | |
/// delimiter string length is m | |
/// n * m , n is string s's length | |
/// algorithm time complexity is O(nm) | |
/// | |
/// actually C# string.Split(Char[]) | |
/// </summary> | |
/// <param name="s"></param> | |
/// <param name="delimiters"></param> | |
/// <returns></returns> | |
private static string[] splitMethod(string s, string delimiters) | |
{ | |
IList<string> output = new List<string>(); | |
if(s == null || s.Length == 0) | |
{ | |
return new string[0]; | |
} | |
int length = s.Length; | |
int start = 0; | |
int end = 0; | |
for(int i = 0; i < length ; i++) | |
{ | |
var current = s[i]; | |
var startChar = s[start]; | |
// continue to skip delimiter char if need | |
if(delimiters.IndexOf(startChar) >= 0) | |
{ | |
start = i; | |
if(end < start) | |
{ | |
end = start; | |
} | |
} | |
if(delimiters.IndexOf(current) >= 0) | |
{ | |
end = i; | |
// output the word, s[end] is delimiter | |
if(end - start > 0) | |
{ | |
output.Add(s.Substring(start, end - start)); // minus 1, checking | |
start = end + 1; | |
} | |
} | |
} | |
// edge case | |
if (delimiters.IndexOf(s[start]) < 0 ) | |
{ | |
output.Add(s.Substring(start, length - start)); | |
} | |
return output.ToArray(); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment