Skip to content

Instantly share code, notes, and snippets.

@dchaplinsky
Forked from z4y4ts/exception
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dchaplinsky/8bd153ac610ea1319014 to your computer and use it in GitHub Desktop.
Save dchaplinsky/8bd153ac610ea1319014 to your computer and use it in GitHub Desktop.
Traceback (most recent call last):
File "metrics.py", line 32, in <module>
test()
File "metrics.py", line 27, in test
distance_type='jaccard')
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/queryset/base.py", line 201, in create
return self._document(**kwargs).save()
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/document.py", line 241, in save
object_id = collection.save(doc, **write_concern)
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/pymongo/collection.py", line 266, in save
return self.insert(to_save, manipulate, safe, check_keys, **kwargs)
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/pymongo/collection.py", line 362, in insert
self.database.connection)
bson.errors.InvalidDocument: Cannot encode object: <Shreds: 100>
from __future__ import division
from itertools import chain, combinations
# from string import lower
from app import db
from flask.ext.mongoengine import Document
from mongoengine import ListField, ReferenceField, StringField, FloatField
from models import Shreds
class ShredsDistances(Document):
shreds_pair = ListField(ReferenceField(Shreds), default=[])
distance_type = StringField(max_length=10, default='')
distance = FloatField()
def jaccard_distance(shred_a, shred_b):
""" http://en.wikipedia.org/wiki/Jaccard_index """
tags_a = set(map(lambda _: unicode(_.lower()), chain(*[s['tags'] for s in shred_a.tags])))
tags_b = set(map(lambda _: unicode(_.lower()), chain(*[s['tags'] for s in shred_b.tags])))
return 1 - len(tags_a.intersection(tags_b)) / len(tags_a.union(tags_b))
def calculate_jaccard_distance_matrix(shreds):
for shred_a, shred_b in combinations(shreds, 2):
yield shred_a, shred_b, jaccard_distance(shred_a, shred_b)
def test():
shreds = Shreds.objects.exclude("features", "contour")
for a, b, d in calculate_jaccard_distance_matrix(shreds):
ShredsDistances.objects.create(
shreds_pair=[a, b],
distance=d,
distance_type='jaccard')
if __name__ == '__main__':
# import ipdb; ipdb.set_trace()
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment