This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.sparse.linalg as la | |
import bs4 | |
from scipy import sparse | |
def get_text_elements(elem): | |
if isinstance(elem, bs4.NavigableString): | |
if type(elem) not in (bs4.Comment, bs4.Declaration) and elem.strip(): | |
yield elem | |
elif elem.name not in ('script', 'style'): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import sparse | |
from sklearn import preprocessing | |
from graphranker import GraphRanker | |
class TextRank(GraphRanker): | |
def fit(self, texts): | |
self.texts = texts | |
dictionary = {} | |
data = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from copy import deepcopy | |
class GraphRanker(object): | |
def __init__(self, d=0.85, tol=1e-6, max_iters=200): | |
self.d = d | |
self.tol = tol | |
self.max_iters = max_iters | |
def fit(self, A): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import sparse | |
from sklearn import preprocessing | |
from graphranker import GraphRanker | |
class TokenRank(GraphRanker): | |
def __init__(self, window=10, **kwds): | |
self.window = window | |
super(TokenRank, self).__init__(**kwds) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A demo code for ranking SVM | |
The data used in this code comes from http://download.joachims.org/svm_light/examples/example3.tar.gz | |
""" | |
import numpy as np | |
import itertools | |
from sklearn.linear_model import SGDClassifier | |
from sklearn.grid_search import GridSearchCV | |
np.random.seed(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def fft(X,D,k): | |
""" | |
X: input vectors (n_samples by dimensionality) | |
D: distance matrix (n_samples by n_samples) | |
k: number of centroids | |
out: indices of centroids | |
""" | |
n=X.shape[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from numpy import random | |
import seaborn as sns | |
from sklearn import metrics | |
from puwrapper import PUWrapper | |
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV | |
sns.set_style("white") | |
random.seed(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ブルーベリー is-a 果物 | |
動物 has-a モルモット | |
動物 has-a ワタボウシタマリン | |
スポーツ is-a スポーツ | |
登山 is-a スポーツ | |
ロデオ is-a スポーツ | |
動物 has-a ユーラシアカワウソ | |
スポーツ has-a フリーダイビング | |
競馬 is-a スポーツ | |
スポーツ has-a ゴルフ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
data=[[u"スポーツ",u"競走"], | |
[u"スポーツ",u"跳躍"], | |
[u"スポーツ",u"投てき"], | |
[u"スポーツ",u"混成"], | |
[u"スポーツ",u"トライアスロン"], | |
[u"スポーツ",u"バイアスロン"], | |
[u"スポーツ",u"近代五種"], | |
[u"スポーツ",u"水泳"], | |
[u"スポーツ",u"競泳"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class RbfModelWrapper(object): | |
def __init__(self,model,gamma=1.,**kwds): | |
self._model=model | |
self.gamma=gamma | |
def fit(self,X,y): | |
X2=np.c_[np.sum(X**2,1)] | |
Phi=np.exp(-self.gamma*(X2+X2.T-2*X.dot(X.T))) |