Skip to content

Instantly share code, notes, and snippets.

@boblannon
Created May 8, 2013 20:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save boblannon/5543531 to your computer and use it in GitHub Desktop.
Save boblannon/5543531 to your computer and use it in GitHub Desktop.
script to demonstrate basic python-superfastmatch pairwise comparison of all documents in a collection
from superfastmatch import client
import uuid
sfm_client = client.Client(url='http://127.0.0.1:9000/')
class Document():
def __init__(self,title_string,content_string):
self.doc_id = uuid.uuid4()
self.title = title_string
self.content = content_string
self.near_neighbors = []
documents = [doc_1,doc_2,doc_3]
for doc in documents:
sfm_client.add( 1, doc.doc_id, doc.content, title=doc.title)
for doc in documents:
sfm_doc = eval(sfm_client.get(1,doc.doc_id))
comp_list = []
try:
for other_doc in sfm_doc['documents']['rows']:
shared = 0
for frag in other_doc['fragments']:
shared += frag[2]
comp_list.append((other_doc['docid'],shared))
except KeyError:
continue
#this takes the top ten, but you could also do something more clever
comp_list = sorted(comp_list,key=lambda x: x[1],reverse=True)[0:10]
for i in range(min([len(comp_list),10])):
other_doc_id,s = comp_list[i]
doc.near_neighbors.append(other_doc_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment