Skip to content

Instantly share code, notes, and snippets.

@EmilStenstrom
Created July 14, 2017 06:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EmilStenstrom/b89d342370604ec82d3f0736d330caee to your computer and use it in GitHub Desktop.
Save EmilStenstrom/b89d342370604ec82d3f0736d330caee to your computer and use it in GitHub Desktop.
from gensim.models import TfidfModel
from gensim.corpora import Dictionary
class MyCorpus:
def __init__(self, documents):
self.documents = documents
self.dictionary = Dictionary(documents)
def __iter__(self):
for document in self.documents:
yield self.dictionary.doc2bow(document)
p1 = ['hans', 'göran', 'persson', 'född', 'januari', 'vingåker']
p2 = ['göran', 'persson', 'benämnd', 'göran', 'persson', 'simrishamn']
p3 = ['en', 'person', 'som', 'inte', 'har', 'någon', 'koppling', 'till', 'ovan']
documents = [p1, p2, p3]
dictionary = Dictionary(documents)
corpus = MyCorpus(documents)
tfidf = TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment