Skip to content

Instantly share code, notes, and snippets.

@dbasch
Created November 28, 2012 07:19
Show Gist options
  • Save dbasch/4159593 to your computer and use it in GitHub Desktop.
Save dbasch/4159593 to your computer and use it in GitHub Desktop.
Creating an in-memory index in Java using a Multimap
import java.io.File;
import java.util.regex.Pattern;
import java.util.Scanner;
import com.google.common.collect.ArrayListMultimap;
/** create in-memory mappings from words to the files that contain them */
public class Indexer {
public static ArrayListMultimap<String,String> buildIndex(String dirName) throws java.io.IOException {
ArrayListMultimap<String,String> map = ArrayListMultimap.create();
Pattern p = Pattern.compile("[\\s#&!:,;\\.\\\\+-]+");
File dir = new File(dirName);
for (File f : dir.listFiles()) {
String contents = new Scanner(f).useDelimiter("\\Z").next();
for (String word : p.split(contents.toLowerCase())) {
map.put(word, f.getName());
}
}
return map;
}
//for testing
public static void main(String args[]) throws java.io.IOException {
ArrayListMultimap<String,String> index = Indexer.buildIndex(args[0]);
System.out.println(index.get(args[1]));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment