Little script to count duplicates from Twitter Public API Stream
| #! /usr/bin/env python | |
| #-*- coding: utf-8 -*- | |
| from twitter import * | |
| consumer_key='XXXXXXXXXX' | |
| consumer_secret='XXXXXXXXXXXXXX' | |
| token = 'XXXXXXXXXXXXXXXXX' | |
| token_secret = 'XXXXXXXXXXXXXXX' | |
| auth=OAuth(token, token_secret, consumer_key, consumer_secret) | |
| import time | |
| import json | |
| start = time.time() | |
| count = 0 | |
| avg = [] | |
| stream = TwitterStream(auth=auth, domain='stream.twitter.com') | |
| ids = {} | |
| dups = 0 | |
| dups_old = 0 | |
| for msg in stream.statuses.sample(): | |
| try: | |
| msg_id = msg['id'] | |
| except: | |
| # not a msg | |
| msg_id = 0 | |
| continue | |
| if not msg_id in ids: | |
| ids[msg_id] = 1 | |
| else: | |
| dups +=1 | |
| count += 1 | |
| if time.time() - start > 1: | |
| avg.append(count) | |
| average = 0 | |
| for i in avg: | |
| average += i | |
| average /= len(avg) | |
| print count, '/s average:', average, 'dups:', dups - dups_old | |
| start = time.time() | |
| count = 0 | |
| dups_old = dups |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment