Skip to content

Instantly share code, notes, and snippets.

@microaeris
Last active November 5, 2015 02:19
Show Gist options
  • Save microaeris/6c3eb8133a28b30c8599 to your computer and use it in GitHub Desktop.
Save microaeris/6c3eb8133a28b30c8599 to your computer and use it in GitHub Desktop.
import java.util.*;
/**
* CryptoEquivalence is defined by two words having a
* character mapping where the characters in one word
* can singularly map to the character of the same index
* of the second word without a conflict.
*
* book -> room GOOD
* book -> roam BAD (o maps to both o and a)
*
* @author Alice
*
*/
public class CryptoEquivalence2 {
final static int TOTAL_DIGITS = 3;
/**
* Create a string hash for a word.
* A pattern is defined by the character count at each index
* For every character, write to the string how many
* times we have seen that character already. This method
* assumes that each word has to be under 999 characters,
* which should accommodate all but two words in the
* English language.
*
* Also needs to save the previous index we've seen a letter before
* A character's hash is: 3 digits for lastIndex seen and then 3 digits for a counter
* This forces the count to match up to what character it's counting.
* With this, "book" won't match to "crct".
*
* Examples:
* "book" would hash to "0010010000100001001".
* "room" would also has to "0010010000100001001".
* "alice" would hash to "001001001001001".
* "car" and "bat" would hash to "001001001".
*
* @param s
*/
public static String cryptoHash(String s) {
HashMap<Character, String> charCount = new HashMap<Character, String>();
HashMap<Character, Integer> prevIndex = new HashMap<Character, Integer>();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); ++i) {
Character c = s.charAt(i);
String count = charCount.get(c);
if (count == null || count.equals("")) {
// Found a new character
charCount.put(c, "001");
sb.append("001");
prevIndex.put(c, i);
} else {
// Update last index c was seen
int prev = prevIndex.get(c);
int prevLength = (int)(Math.log10(prev)+1);
int fillerBits = TOTAL_DIGITS - prevLength;
prevIndex.put(c, i);
// Write the last index seen
for (int j = 0; j < fillerBits; ++j) {
sb.append("0");
}
sb.append(count.toString());
// Found a character we've seen before
int countValue = Integer.parseInt(count) + 1;
int countLength = (int)(Math.log10(countValue)+1);
fillerBits = TOTAL_DIGITS - countLength;
// Write the count
for (int j = 0; j < fillerBits; ++j) {
sb.append("0");
}
sb.append(count.toString());
}
}
return sb.toString();
}
/**
* Assigns each word to the set of its crypto equivalence.
* Equivalence is found by checking each word's hash value.
* ie. Words with the same hash have crypto equivalence.
* @param list
* @return
*/
public static List<LinkedList<String>> findCESets(List<String> list) {
HashMap<String, LinkedList<String>> mapHashToList =
new HashMap<String, LinkedList<String>>();
// Map all strings to a set of words with the same hash.
for (String s : list) {
String hash = cryptoHash(s);
LinkedList<String> hashList = mapHashToList.get(hash);
if (hashList == null) {
// Create a new list for this hash
LinkedList<String> newList = new LinkedList<String>();
newList.add(s);
mapHashToList.put(hash, newList);
} else {
// Add current word to the list
hashList.add(s);
}
}
// Convert hashmap to a list of lists
LinkedList<LinkedList<String>> result = new LinkedList<LinkedList<String>>();
for (String key : mapHashToList.keySet()) {
LinkedList<String> ll = mapHashToList.get(key);
result.add(ll);
}
return result;
}
public static void main(String[] args) {
List<String> list = Arrays.asList("car", "mom", "dad", "ant", "book", "crct",
"room", "abbc", "alice", "asdasdasdas", "aluce", "bat", "roam");
System.out.println(findCESets(list));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment