Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
nkt1546789 / ranking.py
Last active August 29, 2015 14:10 — forked from agramfort/ranking.py
"""
Implementation of pairwise ranking using scikit-learn LinearSVC
Reference: "Large Margin Rank Boundaries for Ordinal Regression", R. Herbrich,
T. Graepel, K. Obermayer.
Authors: Fabian Pedregosa <fabian@fseoane.net>
Alexandre Gramfort <alexandre.gramfort@inria.fr>
"""
@nkt1546789
nkt1546789 / string_kernel.py
Created December 11, 2014 18:23
This is an implementation of string kernel called "limited range correlation" on Python.
from pylab import *
def string_kernel(s1,s2,n=inf):
k1=[]
k2=[]
A=set()
for k in xrange(1,min(n,len(s1)+1)):
for i in xrange(0,len(s1)-(k-1)):
A.add(s1[i:i+k])
for k in xrange(1,min(n,len(s2)+1)):
@nkt1546789
nkt1546789 / intersected.py
Last active August 29, 2015 14:12
線分の交差判定 (Test whether line segments are intersected) on Python
def intersected(a,b,c,d):
x1=b[0]-a[0]
x2=c[0]-d[0]
x3=b[1]-a[1]
x4=c[1]-d[1]
d=float(x1*x4-x2*x3)
if d==0: return False
c1=c[0]-a[0]
c2=c[1]-a[1]
s=(c1*x4-c2*x2)/d
@nkt1546789
nkt1546789 / dijkstra.py
Last active August 29, 2015 14:12
Implementation of Dijkstra's algorithm on Python
def dijkstra(M,s,SELF=0,NOTCONNECTED=-1):
"""
M: adjacency matrix
s: starting point
"""
n=len(M)
table=[float("inf") for i in xrange(n)]
done=set()
heap=[]
heappush(heap,[0,s])
@nkt1546789
nkt1546789 / urlmulti.py
Created March 18, 2015 05:15
urlmulti.py makes url loading process faster using multiprocessing.
from multiprocessing import Pool
import requests
def urlopen(url):
try: return requests.get(url).content
except: return None
def urlsopen(urls):
pool = Pool(processes=8)
return pool.map(urlopen, urls)
@nkt1546789
nkt1546789 / wmf.py
Last active August 29, 2015 14:17
Implementation of Weighted Matrix Factorization for Implicit Feedbacks
import numpy
from numpy import random
from numpy import linalg
def WMF(R,k=10,alpha=0.1,beta=10.,max_iteration=100000,stop_criterion=1e-6):
"""
R: feedback matrix like R_ij represents the number that user_i clicks item_j
k: dimensionality of latent factor vector
alpha: regularization parameter
beta: confidence parameter
@nkt1546789
nkt1546789 / weighted_knn.py
Created March 28, 2015 08:38
Implementation of weighted kNN.
import numpy
from numpy import random
from matplotlib import pyplot
from sklearn import datasets
from sklearn import base
from sklearn import metrics
from sklearn import grid_search
class NonParametricKNN(base.BaseEstimator):
def __init__(self,sigma=0.1):
@nkt1546789
nkt1546789 / kitml.py
Last active August 29, 2015 14:22
Implementation of Kernel Information-Theoretic Metric Learning proposed by V.Davis et al.
import numpy as np
def KernelITML(K,constraints,dm=None,dc=None,gamma=1.0,max_iter=1000,stop_threshold=1e-3):
"""
K: initial kernel matrix.
constraints: array or list whose element is in the form of (delta,i,j), where delta=1 if (i,j) is must-link and delta=-1 if (i,j) is cannot-link.
dm: target distance for must-link. if not provided, dm is automatically selected.
dc: target distance for cannot-link.
gamma: trade-off parameter. gamma=1 gives stable solution.
max_iter: maximum number of iteration.
# coding: utf-8
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import accuracy_score
from gensim.models import word2vec
import numpy as np
np.random.seed(0)
model=word2vec.Word2Vec.load("/path/to/your/model")
@nkt1546789
nkt1546789 / visualize_filetype.py
Created July 9, 2015 17:07
Visualization of filetype in your computer.
import os,re
from os import path
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set(style="white", context="talk")
HOMEPATH="/path/to/your/home_directory"
MAX_DEPTH=5