Skip to content

Instantly share code, notes, and snippets.

@dsoprea
Last active November 29, 2016 17:15
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save dsoprea/bfa789d0d1cc150a04f01c325a061486 to your computer and use it in GitHub Desktop.
Resolve a Go import path to the actual URL that hosts that package
#!/usr/bin/env python3.5
"""
A simple module to resolve a Go import path to the actual URL that hosts that
package. Sometimes these are not equal and you mustload one URL (and zero or
more redirects) until we encounter a meta tag that specifies the true import
URL.
Requirements:
- requests
- beautifulsoup4
"""
import sys
import argparse
import urllib.parse
import requests
import bs4
def _get_cycle(url):
while 1:
print("Reading: {}".format(url))
r = requests.get(url, allow_redirects=False)
r.raise_for_status()
bs = bs4.BeautifulSoup(r.text, 'html.parser')
def meta_filter(tag):
# We're looking for meta-tags like this:
#
# <meta name="go-import" content="googlemaps.github.io/maps git https://github.com/googlemaps/google-maps-services-go">
return \
tag.name == 'meta' and \
tag.attrs.get('name') == 'go-import'
for m in bs.find_all(meta_filter):
phrase = m.attrs['content']
_, vcs, repo_url_root = phrase.split(' ')
if vcs != 'git':
continue
return repo_url_root
next_url = r.headers.get('Location')
if next_url is None:
break
p = urllib.parse.urlparse(next_url)
if p.netloc == '':
# Take the schema, hostname, and port from the last URL.
p2 = urllib.parse.urlparse(url)
updated_url = '{}://{}{}'.format(p2.scheme, p2.netloc, next_url)
print(" [{}] => [{}]".format(next_url, updated_url))
url = updated_url
else:
url = next_url
return url
def _main():
description = "Determine the import URL for the given Go import path"
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
'import_path',
help='Go import path')
args = parser.parse_args()
initial_url = "https://{}".format(args.import_path)
final_url = _get_cycle(initial_url)
print("Final URL: [{}]".format(final_url))
if __name__ == '__main__':
_main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment