Skip to content

Instantly share code, notes, and snippets.

@yogin16
Created April 10, 2018 17:26
Show Gist options
  • Save yogin16/805a92736d97ba5e8dc9070931becd35 to your computer and use it in GitHub Desktop.
Save yogin16/805a92736d97ba5e8dc9070931becd35 to your computer and use it in GitHub Desktop.
TfidfVectorizer.java
import org.deeplearning4j.bagofwords.vectorizer.TfidfVectorizer;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache;
import org.deeplearning4j.text.documentiterator.LabelAwareIterator;
import org.deeplearning4j.text.documentiterator.LabelledDocument;
import org.deeplearning4j.text.documentiterator.LabelsSource;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.nd4j.linalg.api.ndarray.INDArray;
import java.lang.reflect.Type;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* Date 10/04/18
* Time 2:50 PM
*
* @author yogin
*/
public class TfIdfExample {
public static void main(String[] args) {
HashMap<String, String> docs = new HashMap<>();
docs.put("This was in world war", "History");
docs.put("Trump wants to nuke NK", "Current");
Iterator<Map.Entry<String, String>> iterator = docs.entrySet().iterator();
TfidfVectorizer.Builder builder = new TfidfVectorizer.Builder();
DefaultTokenizerFactory tokenizerFactory = new DefaultTokenizerFactory();
TfidfVectorizer vectorizer = builder.setTokenizerFactory(tokenizerFactory)
.setIterator(new LabelAwareIterator() {
@Override
public boolean hasNextDocument() {
return iterator.hasNext();
}
@Override
public LabelledDocument nextDocument() {
Map.Entry<String, String> next = iterator.next();
LabelledDocument labelledDocument = new LabelledDocument();
labelledDocument.setContent(next.getKey());
labelledDocument.addLabel(next.getValue());
return labelledDocument;
}
@Override
public void reset() {
}
@Override
public LabelsSource getLabelsSource() {
return null;
}
@Override
public void shutdown() {
}
@Override
public boolean hasNext() {
return hasNextDocument();
}
@Override
public LabelledDocument next() {
return nextDocument();
}
}).build();
vectorizer.fit();
INDArray transform = vectorizer.transform("This was in world war");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment