Skip to content

Instantly share code, notes, and snippets.

@lan496
Created March 13, 2015 05:14
Show Gist options
  • Save lan496/50275d5a7782c645f6d1 to your computer and use it in GitHub Desktop.
Save lan496/50275d5a7782c645f6d1 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from requests_oauthlib import OAuth1
import requests
import json
import re
from datetime import datetime,timedelta
import secret
def streaming(minutes):
consumerKey=secret.twDict['consumer_key']
consumerSecret=secret.twDict['consumer_secret']
accessToken=secret.twDict['access_token_key']
accessSecret=secret.twDict['access_token_secret']
url='https://stream.twitter.com/1.1/statuses/sample.json'
auth=OAuth1(consumerKey,consumerSecret,accessToken,accessSecret)
res=requests.get(url,auth=auth,stream=True)
start=datetime.now()
range_time=timedelta(minutes=minutes)
cnt = {}
cnt['w'] = [0 for i in range(4)]
cnt['W'] = [0 for i in range(4)]
cnt[u'w'] = [0 for i in range(4)]
for line in res.iter_lines():
if line:
data=json.loads(line)
try:
if data[u'lang']=='ja':
if data[u'text'].find('http')==-1:
res = searchWWW(data['text'].encode('utf-8'))
print res
for k,lst in cnt.items():
for i in range(4):
lst[i] += res[k][i]
except:
pass
end=datetime.now()
if end>start+range_time:
break
return [start.strftime("%Y/%m/%d %H:%M:%S"),(start+range_time).strftime("%Y/%m/%d %H:%M:%S"),cnt]
def searchWWW(s):
ss = {}
ss['w'] = [u'w',u'ww',u'www',u'www(w+)']
ss['W'] = [u'W',u'WW',u'WWW',u'WWW(W+)']
ss[u'w'] = [u'w',u'ww',u'www',u'www(w+)']
result = {}
result['w'] = [0]*4
result['W'] = [0]*4
result[u'w'] = [0]*4
for k,lst in ss.items():
for i in range(4):
searchstring = u'(^|[^(\w|w)])(' + lst[i] + u')($|[^(\w|w)])'
result[k][i] = len(re.findall(searchstring,s))
return result
def main():
result = streaming(1)
print result
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment