Skip to content

Instantly share code, notes, and snippets.

@NAVNEETOJHA
Last active December 22, 2015 22:29
Show Gist options
  • Save NAVNEETOJHA/6540748 to your computer and use it in GitHub Desktop.
Save NAVNEETOJHA/6540748 to your computer and use it in GitHub Desktop.
import re
from re import sub
import time
import cookielib
from cookielib import CookieJar
import urllib2
from urllib2 import urlopen
import difflib
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
keyWord = 'obama'
startingLink = 'https://twitter.com/search/realtime?q='
oldTwit = []
newTwit=[]
while 1 < 2:
try:
sourceCode = opener.open('https://twitter.com/search/realtime?q='+keyWord+'&src=hash').read()
splitSource = re.findall(r'<p class="js-tweet-text tweet-text">(.*?)</p>',sourceCode)
for item in splitSource:
print item
print '_______________'
aTweet = re.sub(r'<.*?>','',item)
print aTweet
newTwit.append(aTweet)
comparison = difflib.SequenceMatcher(None,newTwit,oldTwit)
howSim = comparison.ratio()
print '#############'
print 'This selection is',howSim,'similar to the past'
oldTwit = [None]
for eachItem in newTwit:
oldTwit.append(eachItem)
newTwit = [None]
time.sleep(howSim*1.5)
except Exception, e:
print str(e)
print 'error in the main try'
time.sleep(100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment