Skip to content

Instantly share code, notes, and snippets.

@thesurlydev
Created April 9, 2014 15:43
Show Gist options
  • Save thesurlydev/10284561 to your computer and use it in GitHub Desktop.
Save thesurlydev/10284561 to your computer and use it in GitHub Desktop.
Reads in a word list index from http://wordnet.princeton.edu/wordnet/download/current-version/ and filters/transforms to something more usable
import java.io.*;
public class WordListProcessor {
public static final char[] EXCLUDE_CHARS = {'_', '-', '.', '\''};
public static void main(String[] args) throws Exception {
StringBuilder content = new StringBuilder();
File file = new File("/Users/shane.witbeck/Downloads/dict/index.adv");
BufferedReader br = new BufferedReader(new FileReader(file));
String line;
int startLine = 50, curLine = 0;
int maxLen = 6;
while ((line = br.readLine()) != null) {
if (curLine >= startLine) {
String nline = line.substring(0, line.indexOf(' '));
if (nline.length() > maxLen) {
continue;
}
char[] nlineArr = nline.toCharArray();
boolean include = true;
for (char excludeChar : EXCLUDE_CHARS) {
for (char c : nlineArr) {
if (c == excludeChar) {
include = false;
break;
}
}
if (!include) {
break;
}
}
if (include) {
content.append(nline).append("\n");
}
}
curLine++;
}
br.close();
File out = new File("/Users/shane.witbeck/Downloads/dict/adverbs.txt");
FileWriter fw = new FileWriter(out.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
bw.write(content.toString());
bw.close();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment