Skip to content

Instantly share code, notes, and snippets.

@edsu
Last active October 10, 2019 20:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edsu/125109491d2b35bb82c4e4da5802533e to your computer and use it in GitHub Desktop.
Save edsu/125109491d2b35bb82c4e4da5802533e to your computer and use it in GitHub Desktop.
Convincing twint to not give up.
#!/usr/bin/env python3
import os
import csv
import time
import twint
import random
config = twint.Config()
config.Search = 'nodapl'
config.Store_csv = True
config.Output = 'nodapl.csv'
# a function that returns the last date in the csv, or None
def last_date():
if os.path.isfile(config.Output):
for row in csv.DictReader(open(config.Output)):
pass
return row['date']
return None
until = last_date()
while True:
# set an upper date bound if we have one
if until:
config.Until = until
# get the data
twint.run.Search(config)
# if we didn't get any new data we're done
new_until = last_date()
if until and until == new_until:
break
# otherwise fetch data until the new date
until = new_until
# but try to make the traffic look a bit less formulaic :)
time.sleep(random.randint(0, 30))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment