Skip to content

Instantly share code, notes, and snippets.

@edsu edsu/twint-fetch.py
Last active Oct 10, 2019

Embed
What would you like to do?
Convincing twint to not give up.
#!/usr/bin/env python3
import os
import csv
import time
import twint
import random
config = twint.Config()
config.Search = 'nodapl'
config.Store_csv = True
config.Output = 'nodapl.csv'
# a function that returns the last date in the csv, or None
def last_date():
if os.path.isfile(config.Output):
for row in csv.DictReader(open(config.Output)):
pass
return row['date']
return None
until = last_date()
while True:
# set an upper date bound if we have one
if until:
config.Until = until
# get the data
twint.run.Search(config)
# if we didn't get any new data we're done
new_until = last_date()
if until and until == new_until:
break
# otherwise fetch data until the new date
until = new_until
# but try to make the traffic look a bit less formulaic :)
time.sleep(random.randint(0, 30))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.