Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from gensim.models import TfidfModel
from gensim.corpora import Dictionary
class MyCorpus:
def __init__(self, documents):
self.documents = documents
self.dictionary = Dictionary(documents)
def __iter__(self):
for document in self.documents:
yield self.dictionary.doc2bow(document)
p1 = ['hans', 'göran', 'persson', 'född', 'januari', 'vingåker']
p2 = ['göran', 'persson', 'benämnd', 'göran', 'persson', 'simrishamn']
p3 = ['en', 'person', 'som', 'inte', 'har', 'någon', 'koppling', 'till', 'ovan']
documents = [p1, p2, p3]
dictionary = Dictionary(documents)
corpus = MyCorpus(documents)
tfidf = TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.