Skip to content

Instantly share code, notes, and snippets.

@jamilnyc
Last active May 23, 2021 00:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamilnyc/70200814ac332cff07944089cfa69206 to your computer and use it in GitHub Desktop.
Save jamilnyc/70200814ac332cff07944089cfa69206 to your computer and use it in GitHub Desktop.
A simple python script for sequentially fetching URL's, parsing their JSON response and writing them to files.
#!/usr/bin/python3
import json
import requests
from pathlib import Path
import sys
class bcolors:
"""
Color codes for the command line
"""
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
def print_error(output: str):
print("{}{}{}".format(bcolors.FAIL, output, bcolors.ENDC))
def print_warning(output: str):
print("{}{}{}".format(bcolors.WARNING, output, bcolors.ENDC))
def print_ok(output: str):
print("{}{}{}".format(bcolors.OKGREEN, output, bcolors.ENDC))
def get_urls_from_file(filename: str) -> list:
with open(filename) as f:
lines = f.readlines()
# Skip commentedout URL's
urls = [line.strip() for line in lines if line[0] != '#']
return urls
def write_to_file(content: str, directory: str, filename: str):
# Create directory if it doesn't exist
Path(directory).mkdir(parents=True, exist_ok=True)
rel_path = "{}/{}".format(directory, filename)
with open(rel_path, "w") as text_file:
text_file.write(content)
def save_response_to_file(url: str, directory: str, filename: str, print_result: bool = False) -> bool:
r = requests.get(url)
try:
if print_result:
print("[{}] {}".format(r.status_code, url))
resp = r.json()
json_str = json.dumps(resp, indent=2, sort_keys=True)
write_to_file(content=json_str, directory=directory, filename=filename)
return True
except json.decoder.JSONDecodeError:
if print_result:
print_error('JSON Decode Error: {}'.format(url))
return False
if __name__ == "__main__":
# Read command line arguments passed to this script
args = sys.argv
if len(args) != 3:
print_error('Usage: {} file_with_urls.txt directory_to_write_responses_to'.format(args[0]))
sys.exit(1)
filename = args[1]
directory = args[2]
urls = get_urls_from_file(filename)
saved = 0
for url in urls:
# TODO: Paralellize this operation or use asyncio?
response_filename = url.rsplit('/', 1)[-1]
success = save_response_to_file(url=url, directory=directory, filename=response_filename, print_result=True)
if success:
saved += 1
status = 'Saved {} of {} url responses'.format(saved, len(urls))
if saved == len(urls):
print_ok(status)
else:
print_warning(status)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment