Skip to content

Instantly share code, notes, and snippets.

@senko
Created August 13, 2011 12:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save senko/1143824 to your computer and use it in GitHub Desktop.
Save senko/1143824 to your computer and use it in GitHub Desktop.
GitHub repository archive link finder
#!/usr/bin/env python
# A script to find recent download links for a GitHub
# repository.
#
# Written by: Senko Rasic <senko.rasic@dobarkod.hr>
#
# Released to Public Domain. Use it as you like.
import urllib
import urllib2
import sys
import lxml.etree
def get_archives(repo_url):
if 'github.com' not in repo_url:
sys.stderr.write('Not a GitHub repository\n')
sys.exit(-1)
if not repo_url.endswith('/'):
repo_url += '/'
archives_url = repo_url + 'archives/master'
try:
doc = lxml.etree.parse(urllib2.urlopen(archives_url),
lxml.etree.HTMLParser())
except urllib2.HTTPError, e:
sys.stderr.write("Can't get download links from %s: %s\n" %
(archives_url, e))
sys.exit(-1)
visited = {}
downloads = []
for el in doc.xpath('//div[@id = "archives"]//a'):
parts = el.attrib['href'].split('/')
if len(parts) == 5:
_, owner, project, archtype, treeish = parts
if archtype in [ 'zipball', 'tarball' ]:
if treeish not in visited:
visited[treeish] = True
downloads.extend([
'%starball/%s' % (repo_url, treeish),
'%szipball/%s' % (repo_url, treeish)
])
# A link to the full page with downloads on github, useful
# for finding older DLs
downloads.append(repo_url + 'downloads')
downloads.sort()
return downloads
if __name__ == '__main__':
if len(sys.argv) != 2:
print "Usage: %s <github-repo>" % sys.argv[0]
sys.exit(-1)
for link in get_archives(sys.argv[1]):
print link
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment