rgov/curl2requests.py

## curl2requests.py
#!/usr/bin/env python
import argparse
import pprint
import shlex
import urllib


# Lex the curl command into its constituent arguments
example = '''
curl 'http://seethroughny.net/tools/required/reports/payroll?action=get' \
-XPOST \
-H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \
-H 'Origin: http://seethroughny.net' \
-H 'Host: seethroughny.net' \
-H 'Accept: application/json, text/javascript, */*; q=0.01' \
-H 'Connection: keep-alive' \
-H 'Accept-Language: en-us' \
-H 'Accept-Encoding: gzip, deflate' \
-H 'Cookie: CONCRETE5=291419e390a3b67a3946e0854cc9e33e' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15' \
-H 'Referer: http://seethroughny.net/payrolls' \
-H 'Content-Length: 137' \
-H 'X-Requested-With: XMLHttpRequest' \
--data 'PayYear%5B%5D=2017&SortBy=YTDPay+DESC&current_page=0&result_id=0&url=%2Ftools%2Frequired%2Freports%2Fpayroll%3Faction%3Dget&nav_request=0'
'''.strip()

argv = shlex.split(example)
del argv[0]


# Now parse it into meaningful data
parser = argparse.ArgumentParser()
parser.add_argument('url')
parser.add_argument('--request', '-X', dest='method', default='GET')
parser.add_argument('--header', '-H', dest='headers', action='append')
parser.add_argument('--data')
args = parser.parse_args(argv)


def parse_qs(qs):
  # urllib's parse_qs returns a dict mapping names to lists of values, but this
  # isn't necessary if the list contains only one item
  d = urllib.parse.parse_qs(qs)
  for k, v in d.items():
    if isinstance(v, list) and len(v) == 1:
      d[k] = v[0]
  return d


def pformat(obj):
  # Crap to force it to pretty-print like I want
  pretty = pprint.pformat(obj, indent=4)
  if '\n' in pretty:
    pretty = pretty.replace('{   ', '{\n    ')
    if pretty.endswith('}'):
      pretty = pretty[:-1] + '\n  }'
  return pretty


# Start to build the call into requests
request = { }

# Figure out which method we want
methods = {
  'DELETE':  'delete',
  'GET':     'get',
  'HEAD':    'head',
  'OPTIONS': 'options',
  'POST':    'post',
  'PUT':     'put',
}
if args.method in methods:
  request['__fn__'] = methods[args.method]
else:
  request['__fn__'] = 'request'
  request['method'] = args.method


# Extract the URL query parameters
url = urllib.parse.urlparse(args.url)
if url.query:
  request['__url__'] = '{}://{}{}'.format(url.scheme, url.netloc, url.path)
  request['params'] = parse_qs(url.query)
else:
  request['__url__'] = args.url


# Extract headers
if args.headers:
  request['headers'] = {}
  for header in args.headers:
    name, _, value = header.partition(': ')
    request['headers'][name] = value


# Extract data
if args.data:
  request['data'] = args.data


# Drop the Host header, if present and uncustomized
if request.get('headers') and 'Host' in request['headers']:
  host, _, port = url.netloc.partition(':')
  default = []
  if url.scheme == 'http' and port == '80':
    default.append(host)
  if url.scheme == 'https' and port == '443':
    default.append(host)
  if port:
    default.append('{}:{}'.format(host, port))
  else:
    default.append(host)

  if request['headers']['Host'] in default:
    del request['headers']['Host']


# Drop the content-length if present and uncustomized
if request.get('headers') and 'Content-Length' in request['headers']:
  cl = int(request['headers']['Content-Length'])
  if request.get('data') and len(request['data']) == cl:
    del request['headers']['Content-Length']


# Convert URL-encoded content
if request.get('headers') and 'Content-Type' in request['headers']:
  ct = request['headers']['Content-Type']
  if ';' in ct:
    ct = ct[:ct.index(';')]
  if ct == 'application/x-www-form-urlencoded':
    if request.get('data'):
      request['data'] = parse_qs(request['data'])
      del request['headers']['Content-Type']


# Generate Python code
code  = 'requests.{}(\n'.format(request['__fn__'])
code += '  {},\n'.format(repr(request['__url__']))
for arg, value in request.items():
  if arg.startswith('__'): continue
  code += '  {}={},\n'.format(arg, pformat(value))
code += ')'

print(code)
	#!/usr/bin/env python
	import argparse
	import pprint
	import shlex
	import urllib


	# Lex the curl command into its constituent arguments
	example = '''
	curl 'http://seethroughny.net/tools/required/reports/payroll?action=get' \
	-XPOST \
	-H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \
	-H 'Origin: http://seethroughny.net' \
	-H 'Host: seethroughny.net' \
	-H 'Accept: application/json, text/javascript, /; q=0.01' \
	-H 'Connection: keep-alive' \
	-H 'Accept-Language: en-us' \
	-H 'Accept-Encoding: gzip, deflate' \
	-H 'Cookie: CONCRETE5=291419e390a3b67a3946e0854cc9e33e' \
	-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15' \
	-H 'Referer: http://seethroughny.net/payrolls' \
	-H 'Content-Length: 137' \
	-H 'X-Requested-With: XMLHttpRequest' \
	--data 'PayYear%5B%5D=2017&SortBy=YTDPay+DESC&current_page=0&result_id=0&url=%2Ftools%2Frequired%2Freports%2Fpayroll%3Faction%3Dget&nav_request=0'
	'''.strip()

	argv = shlex.split(example)
	del argv[0]


	# Now parse it into meaningful data
	parser = argparse.ArgumentParser()
	parser.add_argument('url')
	parser.add_argument('--request', '-X', dest='method', default='GET')
	parser.add_argument('--header', '-H', dest='headers', action='append')
	parser.add_argument('--data')
	args = parser.parse_args(argv)


	def parse_qs(qs):
	# urllib's parse_qs returns a dict mapping names to lists of values, but this
	# isn't necessary if the list contains only one item
	d = urllib.parse.parse_qs(qs)
	for k, v in d.items():
	if isinstance(v, list) and len(v) == 1:
	d[k] = v[0]
	return d


	def pformat(obj):
	# Crap to force it to pretty-print like I want
	pretty = pprint.pformat(obj, indent=4)
	if '\n' in pretty:
	pretty = pretty.replace('{ ', '{\n ')
	if pretty.endswith('}'):
	pretty = pretty[:-1] + '\n }'
	return pretty


	# Start to build the call into requests
	request = { }

	# Figure out which method we want
	methods = {
	'DELETE': 'delete',
	'GET': 'get',
	'HEAD': 'head',
	'OPTIONS': 'options',
	'POST': 'post',
	'PUT': 'put',
	}
	if args.method in methods:
	request['__fn__'] = methods[args.method]
	else:
	request['__fn__'] = 'request'
	request['method'] = args.method


	# Extract the URL query parameters
	url = urllib.parse.urlparse(args.url)
	if url.query:
	request['__url__'] = '{}://{}{}'.format(url.scheme, url.netloc, url.path)
	request['params'] = parse_qs(url.query)
	else:
	request['__url__'] = args.url


	# Extract headers
	if args.headers:
	request['headers'] = {}
	for header in args.headers:
	name, _, value = header.partition(': ')
	request['headers'][name] = value


	# Extract data
	if args.data:
	request['data'] = args.data


	# Drop the Host header, if present and uncustomized
	if request.get('headers') and 'Host' in request['headers']:
	host, _, port = url.netloc.partition(':')
	default = []
	if url.scheme == 'http' and port == '80':
	default.append(host)
	if url.scheme == 'https' and port == '443':
	default.append(host)
	if port:
	default.append('{}:{}'.format(host, port))
	else:
	default.append(host)

	if request['headers']['Host'] in default:
	del request['headers']['Host']


	# Drop the content-length if present and uncustomized
	if request.get('headers') and 'Content-Length' in request['headers']:
	cl = int(request['headers']['Content-Length'])
	if request.get('data') and len(request['data']) == cl:
	del request['headers']['Content-Length']


	# Convert URL-encoded content
	if request.get('headers') and 'Content-Type' in request['headers']:
	ct = request['headers']['Content-Type']
	if ';' in ct:
	ct = ct[:ct.index(';')]
	if ct == 'application/x-www-form-urlencoded':
	if request.get('data'):
	request['data'] = parse_qs(request['data'])
	del request['headers']['Content-Type']


	# Generate Python code
	code = 'requests.{}(\n'.format(request['__fn__'])
	code += ' {},\n'.format(repr(request['__url__']))
	for arg, value in request.items():
	if arg.startswith('__'): continue
	code += ' {}={},\n'.format(arg, pformat(value))
	code += ')'

	print(code)