Lev Konstantinovskiy tmylk

## cats_metadata.tsv
пушистый
и
пудель
котенок
громкий
мяукал
лаял
большой
бегал
мурлыкал

## pydataberlin.ipynb

      
              1 file
            
          
              5 forks
            
          
              0 comments
            
          
              8 stars
            
          
                tmylk
                / pydataberlin.ipynb
            
            
              Last active
              June 18, 2019 11:46
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## get_similiarity_you_need.py

# just run this in the end of 01_pride_and_predjudice.ipynb from https://github.com/cytora/pycon-nlp-in-10-lines
processed_sentences = [sent.lemma_.split() for sent in processed_text.sents]
interchangeable_words_model = Word2Vec(
    sentences=processed_sentences,
    workers=multiprocessing.cpu_count() - 1, # use your cores
    window=2, sg=1)

attributes_of_model = Word2Vec(
    sentences=processed_sentences,

## ldamodel.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
#
# Parts of the LDA inference code come from Dr. Hoffman's `onlineldavb.py` script,
# (C) 2010  Matthew D. Hoffman, GNU GPL 3.0


## comparison.py
from time import time
import logging
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

from gensim.matutils import Sparse2Corpus
from gensim.models.ldamodel import LdaModel
	пушистый
	и
	пудель
	котенок
	громкий
	мяукал
	лаял
	большой
	бегал
	мурлыкал

	# just run this in the end of 01_pride_and_predjudice.ipynb from https://github.com/cytora/pycon-nlp-in-10-lines
	processed_sentences = [sent.lemma_.split() for sent in processed_text.sents]
	interchangeable_words_model = Word2Vec(
	sentences=processed_sentences,
	workers=multiprocessing.cpu_count() - 1, # use your cores
	window=2, sg=1)

	attributes_of_model = Word2Vec(
	sentences=processed_sentences,
	#!/usr/bin/env python
	# -- coding: utf-8 --
	#
	# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
	# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
	#
	# Parts of the LDA inference code come from Dr. Hoffman's `onlineldavb.py` script,
	# (C) 2010 Matthew D. Hoffman, GNU GPL 3.0
	from time import time
	import logging
	import numpy as np
	import pandas as pd
	from sklearn.datasets import fetch_20newsgroups
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.decomposition import LatentDirichletAllocation

	from gensim.matutils import Sparse2Corpus
	from gensim.models.ldamodel import LdaModel