Skip to content

Instantly share code, notes, and snippets.

@jianminchen
Created January 25, 2017 05:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jianminchen/435812697e6e838d980c5bd9b8edf39e to your computer and use it in GitHub Desktop.
Save jianminchen/435812697e6e838d980c5bd9b8edf39e to your computer and use it in GitHub Desktop.
Hackerrank - sherlock and anagram - optimal solution - O(26 * N * N) where N is the string's length
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
class Solution
{
public class HashedAnagramString
{
/*
* Make sure that two anagram strings will have some hashed code
*
* @frequencyTable - Dictionary<char, int>
* The frequency table has to be sorted first and then construct
* a string with each char in alphabetic numbers concatenated by
* its occurrences.
*
*/
public static string GetHashedAnagram(int[] frequencyTable)
{
StringBuilder key = new StringBuilder();
for (int i = 0; i < 26; i++)
{
int value = frequencyTable[i];
if (value > 0)
{
char c = (char)(i + 'a');
key.Append(c.ToString() + value);
}
}
return key.ToString();
}
}
static void Main(String[] args)
{
ProcessInput();
//RunSampleTestcase();
}
public static void RunSampleTestcase()
{
var hashedAnagramsDictionary = ConstructHashedAnagramsDictionary("abba");
var pairs = CalculatePairs(hashedAnagramsDictionary);
Debug.Assert(pairs == 4);
}
public static void ProcessInput()
{
var queries = int.Parse(Console.ReadLine());
while (queries-- > 0)
{
var input = Console.ReadLine();
var hashedAnagramsDictionary = ConstructHashedAnagramsDictionary(input);
Console.WriteLine(CalculatePairs(hashedAnagramsDictionary));
}
}
/*
* Prepare fequence table for N substring which starts from index 0
* Do not need include all other substrings as we know there are total O(N*N) substrings.
*
* Work on one small test case:
* "abcd"
* so the frequency table for substrings, all starts from index = 0:
* a
* ab
* abc
* abcd
* Only for those 4 substrings, not all of them.
*
* Time complexity: O(N * N)
* Space complexity: O(26*N), N is the string's length
*/
public static int[][] PrepareFequencyTableForOnlyNSubstring(string input)
{
if (input == null || input.Length == 0)
{
return null;
}
int length = input.Length;
int[][] frequencyTables = new int[length][];
for (int i = 0; i < length; i++)
{
frequencyTables[i] = new int[26];
}
for (int start = 0; start < length; start++) // go over the string once from the beginning
{
char current = input[start];
int charIndex = current - 'a';
for (int index = start; index < length; index++)
{
frequencyTables[index][charIndex]++;
}
}
return frequencyTables;
}
/*
* What should be taken cared of in the design?
* Time complexity: O(26 * N * N), N is the string length.
*
*
* I think that it is same idea as Leetcode 238 Product of array except itself. We
* take advantage of alphabetic size is limited and constant, 26 chars, and then work
* with substring (denote Si) starting from 0 to i, 0 < i < n ( n is string's length)
* to calculate frequency table, and any substring starting from i and ending j can be
* Sj's frequency table - Si's frequency table for those 26 alphabetic numbers.
* So, the preprocessed frequency table size is O(N) ( N is the length of string)
* instead of O(N^2) based on each of substrings. For any of substrings, there are only 26
* calculation to compute for each alphabet number, so the time complexity goes
* to O(26N^2) = O(n^2)
*
* Update hashed anagram counting dictionary - a statistics, basically
* tell the fact like this:
* For example, test case string abba,
* substring ab -> hashed key a1b1, value is 2, because there are
* two substrings: "ab","ba" having hashed key: "a1b1"
* Here are all possible hashed keys:
* a1 - a, a,
* b1 - b, b
* a1b1 - ab, ba
* b2 - bb
* a1b2 - abb, bba
* a2b2 - abba
*
* Time complexity is O(N^2), not O(N^3).
*/
public static Dictionary<string, int> ConstructHashedAnagramsDictionary(string input)
{
var hashedAnagramsDictionary = new Dictionary<string, int>();
var length = input.Length;
// frequency table memo is using time O(26 * N)
var fequencyTableMemo = PrepareFequencyTableForOnlyNSubstring(input);
for (int start = 0; start < length; start++)
{
for (int substringLength = 1; start + substringLength <= length; substringLength++)
{
var frequencyData = CalculateSubstringFequencyDiff(fequencyTableMemo, start, substringLength);
var key = HashedAnagramString.GetHashedAnagram(frequencyData);
// At most there are O(N*N) entry in the dictionary, go over once
if (hashedAnagramsDictionary.ContainsKey(key))
{
hashedAnagramsDictionary[key]++;
}
else
{
hashedAnagramsDictionary.Add(key, 1);
}
}
}
return hashedAnagramsDictionary;
}
/*
*
* @start - substring' start position in the original string
* @length - substring's length
* Just a simple minus calculation for each alphabet number.
*/
public static int[] CalculateSubstringFequencyDiff(int[][] fequencyTableMemo, int start, int length)
{
const int size = 26;
int[] difference = new int[size];
for (int i = 0; i < size; i++)
{
difference[i] = fequencyTableMemo[start + length - 1][i];
if (start > 0)
{
difference[i] -= fequencyTableMemo[start-1][i];
}
}
return difference;
}
/*
* The formula to calculate pairs
* For example, test case string abba,
* substring ab -> hashed key a1b1, value is 2, because there are two substrings: "ab","ba" having hashed key: "a1b1"
* Here are all possible hashed keys:
* a1 - a, a,
* b1 - b, b
* a1b1 - ab, ba
* b2 - bb
* a1b2 - abb, bba
* a2b2 - abba
* So, how many pairs?
* should be 4, from 4 hashed keys: a1, b1, a1b1 and a2b2
*/
public static int CalculatePairs(Dictionary<string, int> hashedAnagrams)
{
// get pairs
int anagrammaticPairs = 0;
foreach (var count in hashedAnagrams)
{
anagrammaticPairs += count.Value * (count.Value - 1) / 2;
}
return anagrammaticPairs;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment