Created
October 23, 2019 22:04
-
-
Save AlexTitovWork/29b0f8b9b32c8c53f376408e829d93d4 to your computer and use it in GitHub Desktop.
This code perform inputdata analysis and get it label from product categories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public ArrayList textAnalisis(String inputdata){ | |
System.out.println("-> Nimbler: RestController: current message:\n " + inputdata); | |
/** | |
* Detector of sentence and divide text on independent sentence. | |
*/ | |
String[] sentences = sentenceDetect(inputdata); | |
System.out.println(Arrays.toString(sentences)); | |
/** | |
* Split sentence on words and numbers(tokens) | |
*/ | |
String[] tokens = tokenize(inputdata); | |
System.out.println(Arrays.toString(tokens)); | |
/** | |
* Classyfy all tokens by category (POS - tagger). | |
* Set tag to any token. | |
*/ | |
String[] tags = tag(tokens); | |
System.out.println( "Detected tag:\n"); | |
System.out.println(Arrays.toString(tags)); | |
double[] probsCurr = viewProb(); | |
System.out.println(Arrays.toString(probs)); | |
System.out.println( "ProbThresholdFilter start..."); | |
/** | |
* Deleted all duplicate category with small probability. | |
* This method get only one maxProb category. | |
* Method delete all objects smaller than TH. | |
*/ | |
double TH = 0.2; //bots ->boots | |
System.out.println(Arrays.toString(tags)) | |
/** | |
* If the probability of detecting a category is low, | |
* choose the most likely one from the list | |
*/ | |
ArrayList containerResult = new ArrayList(); | |
for(int i =0; i < tokens.length; i++){ | |
containerResult.add(tokens[i]); | |
containerResult.add(tags[i]); | |
} | |
System.out.println("Most probability phrase, get first phrase if" + "\nprob > " + TH + " ..."); | |
Sequence tagTree[] = topKSequences(tokens); | |
return containerResult; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment