Skip to content

Instantly share code, notes, and snippets.

@tsubasa

tsubasa/get_all_tweets.py

Last active Nov 10, 2016
Embed
What would you like to do?
ツイッターで指定ユーザーの過去の全ツイートを取得
# -*- coding: utf-8 -*-
import datetime
import pytz
# パッケージは下記のリポジトリのモノを使用してください
# https://github.com/twopon/MonitUserstream
from tweepy.auth import OAuthHandler
from tweepy.parsers import ModelParser
from extends import ExAPI, ExModelFactory
# 必ず公式クライアントのトークンを使用
OFFICIAL_CONSUMER_KEY = ''
OFFICIAL_CONSUMER_SECRET = ''
OFFICIAL_ACCESS_KEY = ''
OFFICIAL_ACCESS_SECRET = ''
# タイムゾーン指定
timezone = pytz.timezone('Asia/Tokyo')
def oauth():
auth = OAuthHandler(OFFICIAL_CONSUMER_KEY, OFFICIAL_CONSUMER_SECRET)
auth.set_access_token(OFFICIAL_ACCESS_KEY, OFFICIAL_ACCESS_SECRET)
return auth
api = ExAPI(oauth(), parser=ModelParser(model_factory=ExModelFactory()))
def main(screen_name, start_date):
tweets_num = 0
last_created_at = 0
last_status_id = 0
last_date = None
last_id = 0
retry = 0
while True:
if start_date == last_date or last_id == last_status_id:
if retry > 10:
break
else:
retry += 1
if last_created_at:
start_date = last_date = (timezone.fromutc(last_created_at) - datetime.timedelta(hours=1 * retry)).strftime('%Y-%m-%d_%H:%M:%S')
print('SKIP: %s (-%sh)' % (start_date, 1 * retry))
else:
retry = 0
last_date = start_date
last_id = last_status_id
q = ''.join([keyword, ' until:', start_date, '_JST'])
print('query: {0}'.format(q))
for status in api.search_universal(q=q, count=200, modules='status', result_type='recent', tweet_mode='extended'):
# print(status)
last_created_at = status.created_at
last_status_id = status.id
tweets_num += 1
start_date = timezone.fromutc(last_created_at).strftime('%Y-%m-%d_%H:%M:%S')
print('finished. get tweets: %s' % tweets_num)
if __name__ == '__main__':
# screen_name は ユーザーID, start_date は遡る初日を指定
main(screen_name=input('screen_name='), start_date=input('start_date='))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.