Skip to content

Instantly share code, notes, and snippets.

@AadityaJ
Created October 9, 2016 11:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AadityaJ/724cbda60d9e3c8a1247ec5becee200d to your computer and use it in GitHub Desktop.
Save AadityaJ/724cbda60d9e3c8a1247ec5becee200d to your computer and use it in GitHub Desktop.
simple non-distributive lda wrapper for sklearn
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
#
"""
scikit learn interface for gensim for easy use of gensim with scikit-learn
"""
import numpy as np
import gensim.models.ldamodel
class BaseClass(object):
def __init__(self):
"""init
base class to be always inherited
to be used in the future
"""
def run(self): # to test
return np.array([0, 0, 0])
class LdaModel(object):
"""
Base LDA module
"""
def __init__(self, n_topics=5, n_iter=2000, alpha=0.1, eta=0.01, random_state=None,
refresh=10,lda_model=None,ex=None):
"""
base LDA code . Uses mapper function
n_topics : num_topics
.fit : init call // corpus not used
//none : id2word
n_iter : passes // assumed
random_state : random_state
alpha : alpha
eta : eta
refresh : update_every
"""
self.n_topics = n_topics
self.n_iter = n_iter
self.alpha = alpha
self.eta = eta
self.random_state = random_state
self.refresh = refresh
# use lda_model variable as object
self.lda_model = lda_model
# perform appropriate checks
if alpha <= 0:
raise ValueError("alpha value must be larger than zero")
if eta <= 0:
raise ValueError("eta value must be larger than zero")
def get_params(self, deep=True):
if deep:
return {"alpha": self.alpha, "n_iter": self.n_iter,"eta":self.eta,"random_state":self.random_state,"lda_model":self.lda_model}
def set_params(self, **parameters):
for parameter, value in parameters.items():
self.setattr(parameter, value)
return self
def fit(self,X,y=None):
"""
call gensim.model.LdaModel from this
// todo: convert fit and relevant,corpus still requires gensim preprocessing
calling :
>>>gensim.models.LdaModel(corpus=corpus,num_topics=n_topics,id2word=None,passes=n_iter,update_every=refresh,alpha=alpha,iterations=n_iter,eta=eta,random_state=random_state)
"""
if X is None:
raise AttributeError("Corpus defined as none")
self.lda_model = gensim.models.LdaModel(corpus=X,num_topics=self.n_topics, id2word=None, passes=self.n_iter,
update_every=self.refresh,alpha=self.alpha, iterations=self.n_iter,
eta=self.eta,random_state=self.random_state)
return self.lda_model
def print_topics(self,n_topics):
"""
print all the topics
using the object lda_model
"""
return self.lda_model.print_topics(n_topics)
# do get_document_topic
def show_topic(self,topicid=0,topn=5):
"""
Return a list of (word,probability) tuple
"""
return self.lda_model.show_topic(topicid=self.topicid, topn=self.topn)
# from sklearn.utils.estimator_checks import check_estimator
# check_estimator(LdaModel)
# corpus = [[(0, 2), (1, 2), (2, 1), (3, 1), (4, 1), (5, 2)], [(3, 1), (4, 1), (6, 1), (7, 1), (8, 1), (9, 1), (10, 1), (11, 1), (12, 1)], [(8, 1), (13, 1), (14, 1), (15, 1), (16, 1), (17, 1), (18, 1), (19, 1), (20, 1), (21, 1)], [(3, 1), (4, 1), (8, 1), (18, 1), (22, 1), (23, 1), (24, 1), (25, 1), (26, 1), (27, 1), (28, 1), (29, 1)], [(0, 1), (1, 1), (19, 2), (30, 1), (31, 1)]]
# model = LdaModel(n_topics=20, n_iter=500, random_state=1)
# model.fit(corpus)
# print model.print_topics(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment