Skip to content

Instantly share code, notes, and snippets.

@z4y4ts
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save z4y4ts/775659a33454d29ab654 to your computer and use it in GitHub Desktop.
Save z4y4ts/775659a33454d29ab654 to your computer and use it in GitHub Desktop.
mongoshmongo
Traceback (most recent call last):
File "metrics.py", line 32, in <module>
test()
File "metrics.py", line 27, in test
distance_type='jaccard')
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/queryset/base.py", line 201, in create
return self._document(**kwargs).save()
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/document.py", line 229, in save
doc = self.to_mongo()
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/base/document.py", line 255, in to_mongo
value = field.to_mongo(value)
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/base/fields.py", line 305, in to_mongo
for key, item in value.iteritems()])
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/fields.py", line 924, in to_mongo
id_ = id_field.to_mongo(id_)
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/base/fields.py", line 401, in to_mongo
self.error(unicode(e))
File "/Users/ai/.virtualenvs/unshred/lib/python2.7/site-packages/mongoengine/base/fields.py", line 124, in error
raise ValidationError(message, errors=errors, field_name=field_name)
mongoengine.errors.ValidationError: doc_1:page1_100 is not a valid ObjectId
from __future__ import division
from itertools import chain, combinations
# from string import lower
from app import db
from models import Shreds
class ShredsDistances(Document):
shreds_pair = ListField(ReferenceField(Shreds), default=[])
distance_type = StringField(max_length=10, default='')
distance = FloatField()
def jaccard_distance(shred_a, shred_b):
""" http://en.wikipedia.org/wiki/Jaccard_index """
tags_a = set(map(lambda _: unicode(_.lower()), chain(*[s['tags'] for s in shred_a.tags])))
tags_b = set(map(lambda _: unicode(_.lower()), chain(*[s['tags'] for s in shred_b.tags])))
return 1 - len(tags_a.intersection(tags_b)) / len(tags_a.union(tags_b))
def calculate_jaccard_distance_matrix(shreds):
for shred_a, shred_b in combinations(shreds, 2):
yield shred_a, shred_b, jaccard_distance(shred_a, shred_b)
def test():
shreds = Shreds.objects()
for a, b, d in calculate_jaccard_distance_matrix(shreds):
ShredsDistances.objects.create(
shreds_pair=[a, b],
distance=d,
distance_type='jaccard')
if __name__ == '__main__':
# import ipdb; ipdb.set_trace()
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment