This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Implementation of pairwise ranking using scikit-learn LinearSVC | |
Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich, | |
T. Graepel, K. Obermayer. | |
Authors: Fabian Pedregosa <fabian@fseoane.net> | |
Alexandre Gramfort <alexandre.gramfort@inria.fr> | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pylab import * | |
def string_kernel(s1,s2,n=inf): | |
k1=[] | |
k2=[] | |
A=set() | |
for k in xrange(1,min(n,len(s1)+1)): | |
for i in xrange(0,len(s1)-(k-1)): | |
A.add(s1[i:i+k]) | |
for k in xrange(1,min(n,len(s2)+1)): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def intersected(a,b,c,d): | |
x1=b[0]-a[0] | |
x2=c[0]-d[0] | |
x3=b[1]-a[1] | |
x4=c[1]-d[1] | |
d=float(x1*x4-x2*x3) | |
if d==0: return False | |
c1=c[0]-a[0] | |
c2=c[1]-a[1] | |
s=(c1*x4-c2*x2)/d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dijkstra(M,s,SELF=0,NOTCONNECTED=-1): | |
""" | |
M: adjacency matrix | |
s: starting point | |
""" | |
n=len(M) | |
table=[float("inf") for i in xrange(n)] | |
done=set() | |
heap=[] | |
heappush(heap,[0,s]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing import Pool | |
import requests | |
def urlopen(url): | |
try: return requests.get(url).content | |
except: return None | |
def urlsopen(urls): | |
pool = Pool(processes=8) | |
return pool.map(urlopen, urls) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
from numpy import random | |
from numpy import linalg | |
def WMF(R,k=10,alpha=0.1,beta=10.,max_iteration=100000,stop_criterion=1e-6): | |
""" | |
R: feedback matrix like R_ij represents the number that user_i clicks item_j | |
k: dimensionality of latent factor vector | |
alpha: regularization parameter | |
beta: confidence parameter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
from numpy import random | |
from matplotlib import pyplot | |
from sklearn import datasets | |
from sklearn import base | |
from sklearn import metrics | |
from sklearn import grid_search | |
class NonParametricKNN(base.BaseEstimator): | |
def __init__(self,sigma=0.1): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def KernelITML(K,constraints,dm=None,dc=None,gamma=1.0,max_iter=1000,stop_threshold=1e-3): | |
""" | |
K: initial kernel matrix. | |
constraints: array or list whose element is in the form of (delta,i,j), where delta=1 if (i,j) is must-link and delta=-1 if (i,j) is cannot-link. | |
dm: target distance for must-link. if not provided, dm is automatically selected. | |
dc: target distance for cannot-link. | |
gamma: trade-off parameter. gamma=1 gives stable solution. | |
max_iter: maximum number of iteration. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from sklearn.svm import SVC | |
from sklearn.grid_search import GridSearchCV | |
from sklearn.metrics import accuracy_score | |
from gensim.models import word2vec | |
import numpy as np | |
np.random.seed(0) | |
model=word2vec.Word2Vec.load("/path/to/your/model") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os,re | |
from os import path | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import pandas as pd | |
sns.set(style="white", context="talk") | |
HOMEPATH="/path/to/your/home_directory" | |
MAX_DEPTH=5 |
OlderNewer