Hiroyuki Tanaka nkt1546789

## puclassifier.py
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.grid_search import GridSearchCV

class PUClassifier(object):
    def __init__(self, trad_clf=None, n_folds=2):
        self.trad_clf = trad_clf
        self.n_folds = n_folds

## kde_regression.py
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

random_state = 1
n_samples = 200

## lrridge.py
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg,random
from sklearn.base import BaseEstimator
from sklearn import datasets,metrics

class LRRidge(BaseEstimator):
    def __init__(self,alpha=1.,beta=1.,gamma=10.,k=10):
        self.alpha=alpha
        self.beta=beta

## coo_mat.py
def create_cooccurrence_matrix(filename,tokenizer,window_size):
    vocabulary={}
    data=[]
    row=[]
    col=[]
    for sentence in codecs.open(filename,"r","utf-8"):
        sentence=sentence.strip()
        tokens=[token for token in tokenizer(sentence) if token!=u""]
        for pos,token in enumerate(tokens):
            i=vocabulary.setdefault(token,len(vocabulary))

## pu_demo.py
import numpy as np
import matplotlib.pyplot as plt
from numpy import random
import seaborn as sns
from sklearn import metrics
from puwrapper import PUWrapper
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV
sns.set_style("white")
random.seed(0)

## rbfmodel_wrapper.py
import numpy as np

class RbfModelWrapper(object):
    def __init__(self,model,gamma=1.,**kwds):
        self._model=model
        self.gamma=gamma

    def fit(self,X,y):
        X2=np.c_[np.sum(X**2,1)]
        Phi=np.exp(-self.gamma*(X2+X2.T-2*X.dot(X.T)))

## puwrapper.py
import numpy as np
from numpy import random
from sklearn import base

class PUWrapper(object):
    def __init__(self,trad_clf,n_fold=5):
        self._trad_clf=trad_clf
        self._n_fold=n_fold

    def fit(self,X,s):

## farthest_first_traversal.py
import numpy as np

def fft(X,D,k):
    """
    X: input vectors (n_samples by dimensionality)
    D: distance matrix (n_samples by n_samples)
    k: number of centroids
    out: indices of centroids
    """
    n=X.shape[0]

## bsm.py
# coding: utf-8 -*-

import MeCab
import numpy as np

m = MeCab.Tagger("-Ochasen")

def sent_tokenize(text):
    if type(text) is unicode:
        text = text.encode("utf8")

## collage_template_generation.py
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

def generate_template(n, width, height, random_state=1, max_random_state=10000, offset=0):
    L = [np.array([offset, offset, width-offset, height-offset])]
    random_state_lists = stats.randint.rvs(0, max_random_state, size=(n-1, 4), random_state=random_state)

    for random_state_list in random_state_lists:
        n_areas = len(L)
	import numpy as np
	from sklearn.linear_model import SGDClassifier
	from sklearn.cross_validation import StratifiedKFold
	from sklearn.grid_search import GridSearchCV

	class PUClassifier(object):
	def __init__(self, trad_clf=None, n_folds=2):
	self.trad_clf = trad_clf
	self.n_folds = n_folds
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy import stats
	from sklearn.model_selection import GridSearchCV
	from sklearn.neighbors import KernelDensity
	from sklearn import datasets
	from sklearn.preprocessing import StandardScaler

	random_state = 1
	n_samples = 200
	import numpy as np
	import matplotlib.pyplot as plt
	from numpy import linalg,random
	from sklearn.base import BaseEstimator
	from sklearn import datasets,metrics

	class LRRidge(BaseEstimator):
	def __init__(self,alpha=1.,beta=1.,gamma=10.,k=10):
	self.alpha=alpha
	self.beta=beta
	def create_cooccurrence_matrix(filename,tokenizer,window_size):
	vocabulary={}
	data=[]
	row=[]
	col=[]
	for sentence in codecs.open(filename,"r","utf-8"):
	sentence=sentence.strip()
	tokens=[token for token in tokenizer(sentence) if token!=u""]
	for pos,token in enumerate(tokens):
	i=vocabulary.setdefault(token,len(vocabulary))
	import numpy as np
	import matplotlib.pyplot as plt
	from numpy import random
	import seaborn as sns
	from sklearn import metrics
	from puwrapper import PUWrapper
	from sklearn.linear_model import LogisticRegression,LogisticRegressionCV
	sns.set_style("white")
	random.seed(0)
	import numpy as np

	class RbfModelWrapper(object):
	def __init__(self,model,gamma=1.,**kwds):
	self._model=model
	self.gamma=gamma

	def fit(self,X,y):
	X2=np.c_[np.sum(X**2,1)]
	Phi=np.exp(-self.gamma(X2+X2.T-2X.dot(X.T)))
	import numpy as np
	from numpy import random
	from sklearn import base

	class PUWrapper(object):
	def __init__(self,trad_clf,n_fold=5):
	self._trad_clf=trad_clf
	self._n_fold=n_fold

	def fit(self,X,s):
	import numpy as np

	def fft(X,D,k):
	"""
	X: input vectors (n_samples by dimensionality)
	D: distance matrix (n_samples by n_samples)
	k: number of centroids
	out: indices of centroids
	"""
	n=X.shape[0]
	# coding: utf-8 -*-

	import MeCab
	import numpy as np

	m = MeCab.Tagger("-Ochasen")

	def sent_tokenize(text):
	if type(text) is unicode:
	text = text.encode("utf8")