Created
December 20, 2013 17:41
-
-
Save jdp/8058516 to your computer and use it in GitHub Desktop.
quick command line tool for parsing, inspecting, and mutating URL's
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import itertools | |
import os | |
import sys | |
import urllib | |
import urlparse | |
ERR_NO_SUCH_QUERY = 2 | |
def parse(data): | |
url = urlparse.urlparse(data) | |
query = urlparse.parse_qsl(url.query) | |
return url, query | |
def build_netloc(username, password, hostname, port): | |
netloc = hostname | |
if username: | |
auth = username + ':' | |
if password: | |
auth += password | |
netloc = auth + '@' + netloc | |
if port: | |
netloc += ':' + port | |
return netloc | |
def process(args, url, query): | |
output = [] | |
scheme = args.scheme or url.scheme | |
username = args.username or (url.username or '') | |
password = args.password or (url.password or '') | |
hostname = args.hostname or (url.hostname or '') | |
port = args.port or (url.port or '') | |
params = args.params or url.params | |
fragment = args.fragment or url.fragment | |
netloc = build_netloc(username, password, hostname, port) | |
# Construct the path portion of the URL | |
path = url.path | |
if args.path: | |
if args.path.startswith('/'): | |
path = args.path | |
else: | |
path = os.path.join(url.path, args.path) | |
# Construct the query portion of the URL | |
if args.queries: | |
query.extend(p.split('=', 2) for p in args.queries) | |
query = [(q, v) for q, v in query if q not in args.ignored_queries] | |
if args.sort_query: | |
query = sorted(query, key=lambda p: p[0]) | |
encoded_query = urllib.urlencode(query) | |
suppress_default = False | |
if args.print_scheme: | |
suppress_default = True | |
output.append(scheme) | |
if args.print_username: | |
suppress_default = True | |
output.append(username) | |
if args.print_password: | |
suppress_default = True | |
output.append(password) | |
if args.print_hostname: | |
suppress_default = True | |
output.append(hostname) | |
if args.print_port: | |
suppress_default = True | |
output.append(port) | |
if args.print_netloc: | |
suppress_default = True | |
output.append(netloc) | |
if args.print_path: | |
suppress_default = True | |
output.append(path) | |
if args.print_params: | |
suppress_default = True | |
output.append(params) | |
if args.print_query: | |
suppress_default = True | |
output.append(encoded_query) | |
if args.query_value: | |
suppress_default = True | |
query_map = {} | |
for q, v in query: | |
if q not in query_map: | |
query_map[q] = [] | |
query_map[q].append(v) | |
for q in args.query_value: | |
if q not in query_map: | |
sys.exit(ERR_NO_SUCH_QUERY) | |
output.extend(query_map[q]) | |
if args.print_query_names: | |
suppress_default = True | |
output.extend(q[0] for q in query) | |
if args.print_query_values: | |
suppress_default = True | |
output.extend(q[1] for q in query) | |
if args.print_fragment: | |
suppress_default = True | |
output.append(fragment) | |
if not suppress_default: | |
unparsed = urlparse.urlunparse((scheme, netloc, path, params, | |
encoded_query, fragment)) | |
output.append(unparsed) | |
return output | |
def main(): | |
ap = argparse.ArgumentParser() | |
# URL-printing options | |
ap.add_argument('-s', '--scheme', action='store_true', dest='print_scheme', help="print scheme") | |
ap.add_argument('-n', '--netloc', action='store_true', dest='print_netloc', help="print netloc") | |
ap.add_argument('-u', '--username', action='store_true', dest='print_username', help="print username") | |
ap.add_argument('-w', '--password', action='store_true', dest='print_password', help="print password") | |
ap.add_argument('-o', '--hostname', action='store_true', dest='print_hostname', help="print hostname") | |
ap.add_argument('-p', '--port', action='store_true', dest='print_port', help="print port") | |
ap.add_argument('-d', '--path', action='store_true', dest='print_path', help="print path") | |
ap.add_argument( '--params', action='store_true', dest='print_params', help="print params") | |
ap.add_argument('-q', '--query', action='store_true', dest='print_query', help="print query string") | |
ap.add_argument( '--query-names', action='store_true', dest='print_query_names', help="print only query parameter names") | |
ap.add_argument( '--query-values', action='store_true', dest='print_query_values', help="print only query parameter values") | |
ap.add_argument('-f', '--fragment', action='store_true', dest='print_fragment', help="print fragment") | |
ap.add_argument('-g', '--query-value', action='append', metavar='QUERY', dest='query_value', help="print value of query parameter") | |
# URL-mutating options | |
ap.add_argument('-S', '--set-scheme', action='store', dest='scheme', help="set scheme") | |
ap.add_argument('-U', '--set-username', action='store', dest='username', help="set username") | |
ap.add_argument('-W', '--set-password', action='store', dest='password', help="set password") | |
ap.add_argument('-O', '--set-hostname', action='store', dest='hostname', help="set hostname") | |
ap.add_argument('-P', '--set-port', action='store', dest='port', help="set port") | |
ap.add_argument('-D', '--set-path', action='store', dest='path', help="set or append path") | |
ap.add_argument( '--set-params', action='store', dest='params', help="set params") | |
ap.add_argument('-Q', '--set-query', metavar='NAME=VALUE', action='append', dest='queries', help="append query parameter") | |
ap.add_argument('-F', '--set-fragment', action='store', dest='fragment', help="set fragment") | |
# Behavior-modifying options | |
ap.add_argument( '--sort-query', action='store_true', help="sort printed query parameters") | |
ap.add_argument('-x', '--ignore-query', action='append', dest='ignored_queries', | |
metavar='QUERY', default=[], help="ignore query prameter") | |
# Positional arguments | |
ap.add_argument('urls', nargs='*', metavar='URL') | |
args = ap.parse_args() | |
# Use the field and record separators from the environment | |
ofs = os.environ.get('OFS', ' ') | |
rs = os.environ.get('RS', '\n') | |
# print args | |
# sys.exit(0) | |
inputs = [] | |
if not sys.stdin.isatty(): | |
inputs.append(sys.stdin) | |
inputs.append(args.urls) | |
for line in itertools.chain(*inputs): | |
url, query = parse(line.strip()) | |
output = process(args, url, query) | |
sys.stdout.write(ofs.join(output)) | |
sys.stdout.write(rs) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment