Skip to content

Instantly share code, notes, and snippets.

@dbasch
Created December 3, 2012 16:27
Show Gist options
  • Save dbasch/4196088 to your computer and use it in GitHub Desktop.
Save dbasch/4196088 to your computer and use it in GitHub Desktop.
import java.io.File;
import java.util.Scanner;
import com.google.common.collect.HashMultimap;
/** create in-memory mappings from words to the files that contain them */
public class Indexer {
public static HashMultimap<String,String> buildIndex(String dirName) throws java.io.IOException {
HashMultimap<String,String> map = HashMultimap.create();
for (File f : new File(dirName).listFiles()) {
String fname = f.getName();
Scanner s = new Scanner(f).useDelimiter("[^\\p{L}&&[^\\p{M}]]");
while (s.hasNext()) {
map.put(s.next().toLowerCase(), fname);
}
}
return map;
}
//for testing
public static void main(String args[]) throws java.io.IOException {
long start = new java.util.Date().getTime();
HashMultimap<String,String> index = Indexer.buildIndex(args[0]);
System.out.println(new java.util.Date().getTime() - start);
System.out.println(index.get(args[1]));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment