This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.linear_model import SGDClassifier | |
from sklearn.cross_validation import StratifiedKFold | |
from sklearn.grid_search import GridSearchCV | |
class PUClassifier(object): | |
def __init__(self, trad_clf=None, n_folds=2): | |
self.trad_clf = trad_clf | |
self.n_folds = n_folds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy import stats | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.neighbors import KernelDensity | |
from sklearn import datasets | |
from sklearn.preprocessing import StandardScaler | |
random_state = 1 | |
n_samples = 200 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from numpy import linalg,random | |
from sklearn.base import BaseEstimator | |
from sklearn import datasets,metrics | |
class LRRidge(BaseEstimator): | |
def __init__(self,alpha=1.,beta=1.,gamma=10.,k=10): | |
self.alpha=alpha | |
self.beta=beta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_cooccurrence_matrix(filename,tokenizer,window_size): | |
vocabulary={} | |
data=[] | |
row=[] | |
col=[] | |
for sentence in codecs.open(filename,"r","utf-8"): | |
sentence=sentence.strip() | |
tokens=[token for token in tokenizer(sentence) if token!=u""] | |
for pos,token in enumerate(tokens): | |
i=vocabulary.setdefault(token,len(vocabulary)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from numpy import random | |
import seaborn as sns | |
from sklearn import metrics | |
from puwrapper import PUWrapper | |
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV | |
sns.set_style("white") | |
random.seed(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class RbfModelWrapper(object): | |
def __init__(self,model,gamma=1.,**kwds): | |
self._model=model | |
self.gamma=gamma | |
def fit(self,X,y): | |
X2=np.c_[np.sum(X**2,1)] | |
Phi=np.exp(-self.gamma*(X2+X2.T-2*X.dot(X.T))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from numpy import random | |
from sklearn import base | |
class PUWrapper(object): | |
def __init__(self,trad_clf,n_fold=5): | |
self._trad_clf=trad_clf | |
self._n_fold=n_fold | |
def fit(self,X,s): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def fft(X,D,k): | |
""" | |
X: input vectors (n_samples by dimensionality) | |
D: distance matrix (n_samples by n_samples) | |
k: number of centroids | |
out: indices of centroids | |
""" | |
n=X.shape[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 -*- | |
import MeCab | |
import numpy as np | |
m = MeCab.Tagger("-Ochasen") | |
def sent_tokenize(text): | |
if type(text) is unicode: | |
text = text.encode("utf8") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
from scipy import stats | |
def generate_template(n, width, height, random_state=1, max_random_state=10000, offset=0): | |
L = [np.array([offset, offset, width-offset, height-offset])] | |
random_state_lists = stats.randint.rvs(0, max_random_state, size=(n-1, 4), random_state=random_state) | |
for random_state_list in random_state_lists: | |
n_areas = len(L) |
NewerOlder