Skip to content

Instantly share code, notes, and snippets.

Last active November 29, 2016 17:15
What would you like to do?
Resolve a Go import path to the actual URL that hosts that package
#!/usr/bin/env python3.5
A simple module to resolve a Go import path to the actual URL that hosts that
package. Sometimes these are not equal and you mustload one URL (and zero or
more redirects) until we encounter a meta tag that specifies the true import
- requests
- beautifulsoup4
import sys
import argparse
import urllib.parse
import requests
import bs4
def _get_cycle(url):
while 1:
print("Reading: {}".format(url))
r = requests.get(url, allow_redirects=False)
bs = bs4.BeautifulSoup(r.text, 'html.parser')
def meta_filter(tag):
# We're looking for meta-tags like this:
# <meta name="go-import" content=" git">
return \ == 'meta' and \
tag.attrs.get('name') == 'go-import'
for m in bs.find_all(meta_filter):
phrase = m.attrs['content']
_, vcs, repo_url_root = phrase.split(' ')
if vcs != 'git':
return repo_url_root
next_url = r.headers.get('Location')
if next_url is None:
p = urllib.parse.urlparse(next_url)
if p.netloc == '':
# Take the schema, hostname, and port from the last URL.
p2 = urllib.parse.urlparse(url)
updated_url = '{}://{}{}'.format(p2.scheme, p2.netloc, next_url)
print(" [{}] => [{}]".format(next_url, updated_url))
url = updated_url
url = next_url
return url
def _main():
description = "Determine the import URL for the given Go import path"
parser = argparse.ArgumentParser(description=description)
help='Go import path')
args = parser.parse_args()
initial_url = "https://{}".format(args.import_path)
final_url = _get_cycle(initial_url)
print("Final URL: [{}]".format(final_url))
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment