Created
August 8, 2022 00:22
-
-
Save rubykv/722da1c3b24fab7fa5ff6b3aa7b5c59e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private static final String ADVERB = "_RB"; | |
private static final String VERB = "_VB"; | |
private static final String ADJECTIVE = "_JJ"; | |
public List<String> fetchVerbAndAdjective(String input) throws IOException { | |
File file = ResourceUtils.getFile("src/main/resources/nlp-model/en-pos-maxent.bin"); | |
InputStream in = new FileInputStream(file); | |
POSModel model = new POSModel(in); | |
POSTaggerME tagger = new POSTaggerME(model); | |
WhitespaceTokenizer whitespaceTokenizer = WhitespaceTokenizer.INSTANCE; | |
String[] tokens = whitespaceTokenizer.tokenize(input); | |
String[] tags = tagger.tag(tokens); | |
POSSample posSample = new POSSample(tokens, tags); | |
List<String> verbsAndAdj = new ArrayList<>(); | |
StringTokenizer tokenizer = new StringTokenizer(posSample.toString()); | |
while (tokenizer.hasMoreTokens()) { | |
String temp = tokenizer.nextToken(); | |
if (temp.contains(ADJECTIVE) || temp.contains(VERB) || temp.contains(ADVERB)) { | |
verbsAndAdj.add(temp.split("_")[0]); | |
} | |
} | |
return verbsAndAdj; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment