Skip to content

Instantly share code, notes, and snippets.

@dizzi
Created March 29, 2009 14:09
Show Gist options
  • Save dizzi/87408 to your computer and use it in GitHub Desktop.
Save dizzi/87408 to your computer and use it in GitHub Desktop.
package cz.dizzi;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
public class UniqueFiles {
private int counter = 0;
private class Node {
public HashMap<Long, Node> ChildNodes = new HashMap<Long, Node>();
}
private static HashMap<Long, Node> root = new HashMap<Long, Node>();
static ArrayList<Long> tokenize(String line) {
String[] sarray = line.split(" ");
ArrayList<Long> al = new ArrayList<Long>();
for (String t : sarray) {
if (t.startsWith("\""))
continue;
try {
al.add(Long.parseLong(t, 16));
} catch (Exception e) {
System.out.println("Expected exception string: " + t);
}
}
return al;
}
void processToken(HashMap<Long, Node> cRoot, ArrayList<Long> queue, int index) {
// boundaries check
if (queue.size() <= index) {
return;
}
Long token = queue.get(index);
if (!cRoot.containsKey(token)){
cRoot.put(token, new Node());
}else if((queue.size()-1)==index && cRoot.get(token).ChildNodes.size()==0){ //precount identical rows
counter++;
}
processToken(cRoot.get(token).ChildNodes, queue, ++index);
}
int countLeafs(HashMap<Long, Node> cRoot){
for(Long key : cRoot.keySet()){
if(cRoot.get(key).ChildNodes.size()==0)
counter++;
countLeafs(cRoot.get(key).ChildNodes);
}
return counter;
}
public static void main(String[] args) throws Exception {
BufferedReader in = new BufferedReader(new FileReader("d:/set_small.dat3"));
UniqueFiles uf = new UniqueFiles();
int cnt = 0;
System.out.println("Building tree...");
long time = System.currentTimeMillis();
// Thread.sleep(10000);
while (true) {
String line = in.readLine();
if (line == null)
break;
// System.out.println(line);
uf.processToken(root, UniqueFiles.tokenize(line), 0);
if (cnt % 1000 == 0)
System.out.print(cnt);
else if (cnt % 100 == 0)
System.out.print(".");
cnt++;
}
System.out.println("."+cnt);
System.out.println("Counting leafs...");
int total = uf.countLeafs(root);
System.out.println("Running time: " + (System.currentTimeMillis() - time) + "ms");
System.out.println("Diff lines: " + total);
}
}
Building tree...
0.........1000.........2000.........3000.........4000.........5000.........6000.........7000.........8000.........9000.........10000.........11000.........12000.........13000.........14000.........15000.........16000.........17000.........18000.........19000.........20000.........21000.........22000.........23000.........24000.........25000.........26000.........27000.........28000.........29000.........30000.........31000.........32000.........33000.........34000.........35000.........36000.........37000.........38000.........39000.........40000.........41000.........42000.........43000.........44000.........45000.........46000.........47000.........48000.........49000.........50000.........51000.........52000.........53000.........54000.........55000.........56000.........57000.........58000.........59000.........60000.........61000.........62000.........63000.........64000.........65000.........66000.........67000.........68000.........69000.........70000.........71000.........72000.........73000.........74000.........75000.........76000.........77000.........78000.........79000.........80000.........81000.........82000.........83000.........84000.........85000.........86000.........87000.........88000.........89000.........90000.........91000.........92000.........93000.........94000.........95000.........96000.........97000.........98000.........99000.........100000.........101000.........102000.........103000.........104000.........105000.........106000.106020
Counting leafs...
Running time: 19657ms
Diff lines: 96312
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment