Skip to content

Instantly share code, notes, and snippets.

@ArtOfCode-
Last active June 6, 2016 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ArtOfCode-/184bd4cdd780d2be405c6ef2f138f63f to your computer and use it in GitHub Desktop.
Save ArtOfCode-/184bd4cdd780d2be405c6ef2f138f63f to your computer and use it in GitHub Desktop.
import requests
import time
request_key = ""
request_url = ""
# Fill out this method with whatever checks you want. Return True if the post title should be added; False otherwise.
def processor(item):
return False
def main():
items = []
page = 1
has_more = True
while len(items) < 50000 and has_more:
response = requests.get(request_url + '&key={}&page={}'.format(request_key, page))
if response.status_code == 200:
jsn = response.json()
added_count = 0
for item in jsn['items']:
if processor(item):
if 'title' in item:
items.append(item['title'] + "\n")
added_count += 1
print("Added {} titles; new length {}".format(str(added_count), str(len(items))))
page += 1
if 'backoff' in jsn:
print("Received backoff of {} seconds".format(jsn['backoff']))
time.sleep(int(jsn['backoff']))
if 'has_more' in jsn:
has_more = jsn['has_more']
print("Has more: {}".format(has_more))
print("Final length {}".format(str(len(items))))
with open("titles.txt", "w") as f:
f.writelines(items)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment