Skip to content

Instantly share code, notes, and snippets.

@MajorGressingham
Created December 9, 2013 02:11
Show Gist options
  • Save MajorGressingham/7866469 to your computer and use it in GitHub Desktop.
Save MajorGressingham/7866469 to your computer and use it in GitHub Desktop.
def SLINK(SList):
#1
random.shuffle(SList)
Clusters = []
Centroid = []
Scores = []
for string in SList:
SPScores = []
Matched = 0
#2
if len(Clusters) == 0:
Clusters.append([string])
Centroid.append([string])
Scores.append([])
continue
#3
for ClustNum in xrange(len(Clusters)):
Dist = jf.jaro_distance(string, Centroid[ClustNum][0])
SPScores.append(Dist)
#4
MaxVal = max(SPScores)
MaxInd = SPScores.index(max(SPScores))
#5
if MaxVal > 0.75:
Clusters[MaxInd].append(string)
#6
if len(Scores[MaxInd]) == 0:
Scores[MaxInd].append(Dist)
else:
#7
if MaxVal > Scores[MaxInd]:
print Scores[MaxInd], Centroid[MaxInd]
Scores[MaxInd][0] = Dist
Centroid[MaxInd][0] = string
print Scores[MaxInd], Centroid[MaxInd]
Matched = 1
#8
if Matched ==0:
Clusters.append([string])
Centroid.append([string])
Scores.append([])
return Clusters
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment