Skip to content

Instantly share code, notes, and snippets.

@csinchok
Last active December 15, 2015 10:29
Show Gist options
  • Save csinchok/5246201 to your computer and use it in GitHub Desktop.
Save csinchok/5246201 to your computer and use it in GitHub Desktop.
Trying to find the "unique" tweets for a URL.
import requests
import sys
import pprint
import re
from difflib import SequenceMatcher
from requests_oauthlib import OAuth1
def clean_tweet(text):
return " ".join(re.sub(r"http://t.co/\w+", "", text).split())
if len(sys.argv) != 2:
raise Exception("You need to provide a parameter")
oauth = OAuth1(
"CONSUMER_KEY",
client_secret="CONSUMER_SECRET",
resource_owner_key="ACCESS_TOKE",
resource_owner_secret="ACCESS_SECRET")
search_endpoint = "https://api.twitter.com/1.1/search/tweets.json"
url = sys.argv[1]
params = {'q': url, 'count': 100, 'result_type': 'recent', 'include_entities': 0}
response = requests.get(url=search_endpoint, params=params, auth=oauth)
unique_tweets = []
for x in xrange(2):
for status in response.json()['statuses']:
clean_text = clean_tweet(status['text'])
for match in unique_tweets:
seq = SequenceMatcher(None, match['text'], clean_text)
if seq.ratio() > 0.8:
match['tweets'].append(clean_text)
min_seq = SequenceMatcher(None, clean_text, match['min'])
blocks = min_seq.get_matching_blocks()[:-1]
match['min'] = "".join([clean_text[block[0]:block[0]+block[2]] for block in blocks])
break
else:
unique_tweets.append({
'text': clean_text,
'tweets': [],
'min': clean_text
})
next_results = response.json()['search_metadata'].get('next_results')
if next_results:
response = requests.get(url=search_endpoint + next_results, auth=oauth)
else:
break
for match in unique_tweets:
printed_min = match['min'].strip()
if len(printed_min) > 0:
print(match['min'])
else:
continue
additions = []
for tweet in match['tweets']:
seq = SequenceMatcher(None, printed_min, tweet)
addition = ""
for opcode in seq.get_opcodes():
if opcode[0] == 'insert':
addition += tweet[opcode[3]:opcode[4]]
if addition not in additions:
additions.append(addition)
for addition in additions:
if len(addition) > 0:
print(" + %s" % addition)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment