Resolve a Go import path to the actual URL that hosts that package
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.5 | |
""" | |
A simple module to resolve a Go import path to the actual URL that hosts that | |
package. Sometimes these are not equal and you mustload one URL (and zero or | |
more redirects) until we encounter a meta tag that specifies the true import | |
URL. | |
Requirements: | |
- requests | |
- beautifulsoup4 | |
""" | |
import sys | |
import argparse | |
import urllib.parse | |
import requests | |
import bs4 | |
def _get_cycle(url): | |
while 1: | |
print("Reading: {}".format(url)) | |
r = requests.get(url, allow_redirects=False) | |
r.raise_for_status() | |
bs = bs4.BeautifulSoup(r.text, 'html.parser') | |
def meta_filter(tag): | |
# We're looking for meta-tags like this: | |
# | |
# <meta name="go-import" content="googlemaps.github.io/maps git https://github.com/googlemaps/google-maps-services-go"> | |
return \ | |
tag.name == 'meta' and \ | |
tag.attrs.get('name') == 'go-import' | |
for m in bs.find_all(meta_filter): | |
phrase = m.attrs['content'] | |
_, vcs, repo_url_root = phrase.split(' ') | |
if vcs != 'git': | |
continue | |
return repo_url_root | |
next_url = r.headers.get('Location') | |
if next_url is None: | |
break | |
p = urllib.parse.urlparse(next_url) | |
if p.netloc == '': | |
# Take the schema, hostname, and port from the last URL. | |
p2 = urllib.parse.urlparse(url) | |
updated_url = '{}://{}{}'.format(p2.scheme, p2.netloc, next_url) | |
print(" [{}] => [{}]".format(next_url, updated_url)) | |
url = updated_url | |
else: | |
url = next_url | |
return url | |
def _main(): | |
description = "Determine the import URL for the given Go import path" | |
parser = argparse.ArgumentParser(description=description) | |
parser.add_argument( | |
'import_path', | |
help='Go import path') | |
args = parser.parse_args() | |
initial_url = "https://{}".format(args.import_path) | |
final_url = _get_cycle(initial_url) | |
print("Final URL: [{}]".format(final_url)) | |
if __name__ == '__main__': | |
_main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment