Skip to content

Instantly share code, notes, and snippets.

@rubykv
Created August 8, 2022 00:22
Show Gist options
  • Save rubykv/722da1c3b24fab7fa5ff6b3aa7b5c59e to your computer and use it in GitHub Desktop.
Save rubykv/722da1c3b24fab7fa5ff6b3aa7b5c59e to your computer and use it in GitHub Desktop.
private static final String ADVERB = "_RB";
private static final String VERB = "_VB";
private static final String ADJECTIVE = "_JJ";
public List<String> fetchVerbAndAdjective(String input) throws IOException {
File file = ResourceUtils.getFile("src/main/resources/nlp-model/en-pos-maxent.bin");
InputStream in = new FileInputStream(file);
POSModel model = new POSModel(in);
POSTaggerME tagger = new POSTaggerME(model);
WhitespaceTokenizer whitespaceTokenizer = WhitespaceTokenizer.INSTANCE;
String[] tokens = whitespaceTokenizer.tokenize(input);
String[] tags = tagger.tag(tokens);
POSSample posSample = new POSSample(tokens, tags);
List<String> verbsAndAdj = new ArrayList<>();
StringTokenizer tokenizer = new StringTokenizer(posSample.toString());
while (tokenizer.hasMoreTokens()) {
String temp = tokenizer.nextToken();
if (temp.contains(ADJECTIVE) || temp.contains(VERB) || temp.contains(ADVERB)) {
verbsAndAdj.add(temp.split("_")[0]);
}
}
return verbsAndAdj;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment