Skip to content

Instantly share code, notes, and snippets.

@danielkza
Created June 15, 2016 23:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielkza/70e20c38d3a9b77be4ab460da65a29ca to your computer and use it in GitHub Desktop.
Save danielkza/70e20c38d3a9b77be4ab460da65a29ca to your computer and use it in GitHub Desktop.
extensions.gnome.org crawler - downloads extensions available, ordered by popularity. Usage: ./download_ego_top.py dest_dir max_num shell_version. Requires wget and GNU parallel installed.
#!/bin/env python3
import os
import sys
import urllib.request
import json
import subprocess
EXTENSION_BASE_URL = 'https://extensions.gnome.org'
EXTENSION_QUERY_URL_FORMAT = EXTENSION_BASE_URL + '/extension-query/?sort=popularity&page={page}'
EXTENSION_DOWNLOAD_URL_FORMAT = EXTENSION_BASE_URL + '/download-extension/{uuid}.shell-extension.zip?shell_version={shell_version}'
def fetch_extensions(max_num=None):
count = 0
page = 1
total_pages = None
while True:
with urllib.request.urlopen(EXTENSION_QUERY_URL_FORMAT.format(page=page)) as content:
js = json.loads(content.read().decode('utf-8'))
total_pages = js['numpages']
num_entries = js['total']
for extension in js['extensions']:
yield extension
count += 1
if max_num and count >= max_num:
return
page += 1
if total_pages and page > total_pages:
break
if __name__ == '__main__':
dest_dir = sys.argv[1]
max_num = (sys.argv[2:3] and int(sys.argv[2])) or None
shell_version = (sys.argv[3:4] and sys.argv[3]) or '3.18'
os.makedirs(dest_dir, exist_ok=True)
wget_cmd = ['wget', '--content-disposition', '-c', '-N', '--quiet']
parallel_cmd = ['parallel', '--bar', '-j8', '-n1', '-d\\n', '--'] + wget_cmd
with subprocess.Popen(parallel_cmd, stdin=subprocess.PIPE, cwd=dest_dir) as proc:
for ext in fetch_extensions(max_num=max_num):
url = EXTENSION_DOWNLOAD_URL_FORMAT.format(uuid=ext['uuid'], shell_version=shell_version)
proc.stdin.write(url.encode('utf-8'))
proc.stdin.write(b'\n')
proc.stdin.flush()
proc.stdin.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment