Skip to content

Instantly share code, notes, and snippets.

@lurch
Last active December 5, 2020 18:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lurch/6ecc3c48c3989fa61b26721ee9c0d958 to your computer and use it in GitHub Desktop.
Save lurch/6ecc3c48c3989fa61b26721ee9c0d958 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import json
import os
import urllib.request
repo_list_url = 'https://raw.githubusercontent.com/procount/pinn-os/master/os/repo_list.json'
output_dir = 'os_metadata'
os.makedirs(output_dir, exist_ok=True)
os_names = set()
try:
print("Downloading %s" % repo_list_url)
with urllib.request.urlopen(repo_list_url) as repof:
repo_list_json = repof.read().decode('utf8')
try:
repo_list_obj = json.loads(repo_list_json)
for repo in repo_list_obj['repo_list']:
try:
print("\tDownloading %s repo_list" % repo['name'])
with urllib.request.urlopen(repo['url']) as listf:
os_list_json = listf.read().decode('utf8')
try:
os_list_obj = json.loads(os_list_json)
for os_obj in os_list_obj['os_list']:
print("\t\tProcessing %s (%s)" % (os_obj['os_name'], os_obj['description']))
if os_obj['os_name'] in os_names:
print("!!! Found duplicate os_name %s" % os_obj['os_name'])
os_names.add(os_obj['os_name'])
os_dirname = os.path.join(output_dir, os_obj['os_name'].replace('/', '_'))
os.makedirs(os_dirname, exist_ok=True)
try:
print("\t\tDownloading %s" % os_obj['os_info'])
# Some servers reply with a 403 if the User-Agent isn't set ?!?!
req = urllib.request.Request(os_obj['os_info'])
req.add_header('User-Agent', 'urllib-example/0.1')
with urllib.request.urlopen(req) as osf:
os_json = osf.read().decode('utf8')
try:
# We don't use actual_os_obj, but just check that os_json is valid
actual_os_obj = json.loads(os_json)
with open(os.path.join(os_dirname, os.path.basename(os_obj['os_info'])), 'w') as writef:
writef.write(os_json)
except:
print("!!! Couldn't parse %s as JSON" % os_obj['os_info'])
except:
print("!!! Couldn't download %s" % os_obj['os_info'])
print() # blank between each OS
except:
print("!!! Couldn't parse %s as JSON" % repo['url'])
except:
print("!!! Couldn't download %s" % repo['url'])
except:
print("!!! Couldn't parse %s as JSON" % repo_list_url)
except:
print("!!! Couldn't download %s" % repo_list_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment