Skip to content

Instantly share code, notes, and snippets.

@dbasch
Created December 3, 2012 16:22
Show Gist options
  • Save dbasch/4196053 to your computer and use it in GitHub Desktop.
Save dbasch/4196053 to your computer and use it in GitHub Desktop.
import java.io.File;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;
/** create in-memory mappings from words to the files that contain them */
public class Indexer2 {
public static HashMap<String, HashSet<String>> buildIndex(String dirName) throws java.io.IOException {
HashMap<String,HashSet<String>> map = new HashMap<String,HashSet<String>>();
for (File f : new File(dirName).listFiles()) {
String fname = f.getName();
Scanner s = new Scanner(f).useDelimiter("[^\\p{L}&&[^\\p{M}]]");
while (s.hasNext()) {
String key = s.next().toLowerCase();
HashSet<String> set = map.get(key);
if (set == null) {
set = new HashSet<String>();
map.put(key, set);
}
set.add(fname);
}
}
return map;
}
//for testing
public static void main(String args[]) throws java.io.IOException {
long start = new java.util.Date().getTime();
HashMap<String,HashSet<String>> index = Indexer2.buildIndex(args[0]);
System.out.println(new java.util.Date().getTime() - start);
System.out.println(index.get(args[1]));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment