Skip to content

Instantly share code, notes, and snippets.

@sente
Last active August 2, 2017 03:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sente/6a227e4eb13f46d38d8761ec5080eafd to your computer and use it in GitHub Desktop.
Save sente/6a227e4eb13f46d38d8761ec5080eafd to your computer and use it in GitHub Desktop.
tweet repetition testing w/ mismatched goldstandard-players
from bson.objectid import ObjectId
from analysis.tw import *
import pprint
client = pymongo.MongoClient('mongodb://algdev.sportsmanias.com')
db = client['sportmanias']['tweets']
parent = Tw(db.find_one({'id_str':'890067420207042561'}))
a = Tw(db.find_one({'id_str':'889706097598054400'}))
b = Tw(db.find_one({'id_str':'889694791969030144'}))
print('parent.text: {}'.format(parent.text))
print('a.text: {}'.format(a.text))
print('b.text: {}'.format(b.text))
print('\n\n')
print('parent.players: {}'.format(parent.players))
print('a.players: {}'.format(a.players))
print('b.players: {}'.format(b.players))
print('\n\n')
print("parent compared to a")
pprint.pprint(parent.compare(a))
print('\n\n')
print("parent compared to b")
pprint.pprint(parent.compare(b))
from bson.objectid import ObjectId
from analysis.tw import *
import pprint
print('\n\n\n\n\n')
client = pymongo.MongoClient('mongodb://algdev.sportsmanias.com')
db = client['sportmanias']['tweets']
tweets = ['891850453935300608','891843946401325056']
#tweets = ['892498705911296005', '892174106652704768']
#tweets = ['892497331500838912', '892485281793921024', '892123146119204865']
#tweets = ['892496175063982080', '892489097637384192']
#tweets = ['892492385111900160', '892144636491423744']
a = Tw(db.find_one({'id_str':tweets[0]}))
b = Tw(db.find_one({'id_str':tweets[1]}))
print('a.text: {}'.format(a.text))
print('b.text: {}'.format(b.text))
print('\n\n')
print('a.players: {}'.format(a.players))
print('b.players: {}'.format(b.players))
print('\n\n')
print("a compared to b:\n")
pprint.pprint(a.compare(b))
a.text: Ravin pitching for Dodgers.
b.text: Paredes pitching for Dodgers.
a.players: ['Josh Ravin']
b.players: [None]
a compared to b:
{'common-links': [],
'duplicate': False,
'links': [],
'links2': [],
'numbers': [],
'numbers2': [],
'players': ['Josh Ravin'],
'players2': [None],
'reason': 'goldstandard players mismatch',
'similarity': 0.99999999999999989,
'similarity_threshold': 0.85,
'text': 'Ravin pitching for Dodgers.',
'text2': 'Paredes pitching for Dodgers.'}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment