Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
Created December 11, 2014 18:23
Show Gist options
  • Save nkt1546789/187fa009691a27440498 to your computer and use it in GitHub Desktop.
Save nkt1546789/187fa009691a27440498 to your computer and use it in GitHub Desktop.
This is an implementation of string kernel called "limited range correlation" on Python.
from pylab import *
def string_kernel(s1,s2,n=inf):
k1=[]
k2=[]
A=set()
for k in xrange(1,min(n,len(s1)+1)):
for i in xrange(0,len(s1)-(k-1)):
A.add(s1[i:i+k])
for k in xrange(1,min(n,len(s2)+1)):
for i in xrange(0,len(s2)-(k-1)):
A.add(s2[i:i+k])
for s in A:
k1.append((1.+log2(len(s)))*s1.count(s))
k2.append((1.+log2(len(s)))*s2.count(s))
k1=array(k1)
k2=array(k2)
try:
return k1.dot(k2)/(norm(k1)*norm(k2))
except ZeroDivisionError:
return 0.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment