Skip to content

Instantly share code, notes, and snippets.

@jdp
Created December 20, 2013 17:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdp/8058516 to your computer and use it in GitHub Desktop.
Save jdp/8058516 to your computer and use it in GitHub Desktop.
quick command line tool for parsing, inspecting, and mutating URL's
#!/usr/bin/env python
import argparse
import itertools
import os
import sys
import urllib
import urlparse
ERR_NO_SUCH_QUERY = 2
def parse(data):
url = urlparse.urlparse(data)
query = urlparse.parse_qsl(url.query)
return url, query
def build_netloc(username, password, hostname, port):
netloc = hostname
if username:
auth = username + ':'
if password:
auth += password
netloc = auth + '@' + netloc
if port:
netloc += ':' + port
return netloc
def process(args, url, query):
output = []
scheme = args.scheme or url.scheme
username = args.username or (url.username or '')
password = args.password or (url.password or '')
hostname = args.hostname or (url.hostname or '')
port = args.port or (url.port or '')
params = args.params or url.params
fragment = args.fragment or url.fragment
netloc = build_netloc(username, password, hostname, port)
# Construct the path portion of the URL
path = url.path
if args.path:
if args.path.startswith('/'):
path = args.path
else:
path = os.path.join(url.path, args.path)
# Construct the query portion of the URL
if args.queries:
query.extend(p.split('=', 2) for p in args.queries)
query = [(q, v) for q, v in query if q not in args.ignored_queries]
if args.sort_query:
query = sorted(query, key=lambda p: p[0])
encoded_query = urllib.urlencode(query)
suppress_default = False
if args.print_scheme:
suppress_default = True
output.append(scheme)
if args.print_username:
suppress_default = True
output.append(username)
if args.print_password:
suppress_default = True
output.append(password)
if args.print_hostname:
suppress_default = True
output.append(hostname)
if args.print_port:
suppress_default = True
output.append(port)
if args.print_netloc:
suppress_default = True
output.append(netloc)
if args.print_path:
suppress_default = True
output.append(path)
if args.print_params:
suppress_default = True
output.append(params)
if args.print_query:
suppress_default = True
output.append(encoded_query)
if args.query_value:
suppress_default = True
query_map = {}
for q, v in query:
if q not in query_map:
query_map[q] = []
query_map[q].append(v)
for q in args.query_value:
if q not in query_map:
sys.exit(ERR_NO_SUCH_QUERY)
output.extend(query_map[q])
if args.print_query_names:
suppress_default = True
output.extend(q[0] for q in query)
if args.print_query_values:
suppress_default = True
output.extend(q[1] for q in query)
if args.print_fragment:
suppress_default = True
output.append(fragment)
if not suppress_default:
unparsed = urlparse.urlunparse((scheme, netloc, path, params,
encoded_query, fragment))
output.append(unparsed)
return output
def main():
ap = argparse.ArgumentParser()
# URL-printing options
ap.add_argument('-s', '--scheme', action='store_true', dest='print_scheme', help="print scheme")
ap.add_argument('-n', '--netloc', action='store_true', dest='print_netloc', help="print netloc")
ap.add_argument('-u', '--username', action='store_true', dest='print_username', help="print username")
ap.add_argument('-w', '--password', action='store_true', dest='print_password', help="print password")
ap.add_argument('-o', '--hostname', action='store_true', dest='print_hostname', help="print hostname")
ap.add_argument('-p', '--port', action='store_true', dest='print_port', help="print port")
ap.add_argument('-d', '--path', action='store_true', dest='print_path', help="print path")
ap.add_argument( '--params', action='store_true', dest='print_params', help="print params")
ap.add_argument('-q', '--query', action='store_true', dest='print_query', help="print query string")
ap.add_argument( '--query-names', action='store_true', dest='print_query_names', help="print only query parameter names")
ap.add_argument( '--query-values', action='store_true', dest='print_query_values', help="print only query parameter values")
ap.add_argument('-f', '--fragment', action='store_true', dest='print_fragment', help="print fragment")
ap.add_argument('-g', '--query-value', action='append', metavar='QUERY', dest='query_value', help="print value of query parameter")
# URL-mutating options
ap.add_argument('-S', '--set-scheme', action='store', dest='scheme', help="set scheme")
ap.add_argument('-U', '--set-username', action='store', dest='username', help="set username")
ap.add_argument('-W', '--set-password', action='store', dest='password', help="set password")
ap.add_argument('-O', '--set-hostname', action='store', dest='hostname', help="set hostname")
ap.add_argument('-P', '--set-port', action='store', dest='port', help="set port")
ap.add_argument('-D', '--set-path', action='store', dest='path', help="set or append path")
ap.add_argument( '--set-params', action='store', dest='params', help="set params")
ap.add_argument('-Q', '--set-query', metavar='NAME=VALUE', action='append', dest='queries', help="append query parameter")
ap.add_argument('-F', '--set-fragment', action='store', dest='fragment', help="set fragment")
# Behavior-modifying options
ap.add_argument( '--sort-query', action='store_true', help="sort printed query parameters")
ap.add_argument('-x', '--ignore-query', action='append', dest='ignored_queries',
metavar='QUERY', default=[], help="ignore query prameter")
# Positional arguments
ap.add_argument('urls', nargs='*', metavar='URL')
args = ap.parse_args()
# Use the field and record separators from the environment
ofs = os.environ.get('OFS', ' ')
rs = os.environ.get('RS', '\n')
# print args
# sys.exit(0)
inputs = []
if not sys.stdin.isatty():
inputs.append(sys.stdin)
inputs.append(args.urls)
for line in itertools.chain(*inputs):
url, query = parse(line.strip())
output = process(args, url, query)
sys.stdout.write(ofs.join(output))
sys.stdout.write(rs)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment