Skip to content

Instantly share code, notes, and snippets.

@chisophugis
Created August 21, 2014 19:55
Show Gist options
  • Save chisophugis/942ae335103ac3391983 to your computer and use it in GitHub Desktop.
Save chisophugis/942ae335103ac3391983 to your computer and use it in GitHub Desktop.
Command line options as JSON
#!/usr/bin/env python
#
# A prototype for consistently handling command line options by
# interpreting them as a JSON object, following a schema.
# The result of command line option parsing is a "JSON object" which is
# can be passed around and manipulated as desired.
#
# Example:
# $ ./options.py --vectorizer-loop-enable=true --inliner-enable=true --inliner-threshold=6
# {
# "inliner": {
# "threshold": 6,
# "enable": true
# },
# "vectorizer": {
# "loop": {
# "enable": true
# }
# }
# }
#
# See the comment above the OPTION_SCHEMA variable for what the schema looks
# like (the functionality available now is quite limited, but hopefully
# conveys the gist).
#
# Skip down to main() to see the primary flow of the code.
#
# For now, '-' is used as the separator for properties This is just because
# '-' is common in command line option names. It actually might make more
# sense to use '.' which might make things more intuitive and make it clear
# that this is a more "formalized" option scheme:
# $ foo --foo.bar=true --bar.quux.quuux=8
# You can tweak the global SEP variable to change this to be '.' or whatever.
#
# Note that by a coincidence with braced shell expansion, this works:
# $ ./options.py --vectorizer{-aggressive=on,-loop{-enable=true,-max_depth=3}}
# Or with SEP == '.':
# $ ./options.py --vectorizer{.aggressive=on,.loop{.enable=true,.max_depth=3}}
# $ ./options.py --vectorizer.{aggressive=on,loop.{enable=true,max_depth=3}}
# $ ./options.py --vectorizer.{aggressive=on,loop.enable=true,loop.max_depth=3}
#
# (Note that I just pulled the option names for this schema out of thin
# air plus vague recollection of options I've seen in places; they probably
# don't make sense)
from __future__ import print_function
import sys
import json
SEP = '-'
# A leaf of this JSON object must be a string, which indicates the type.
# For now, there should be no arrays.
# The only types that can be specified currently are "bool" and "int".
# In the future, adding a list<T> might be useful, so you can do e.g.
# --internalize.assume_external=foo_func,bar_func,baz_func
# to get the value ["foo_func", "bar_func", "baz_func"] for a list<string>.
#
# See at the end of this for a strawman for a nice static table-based
# recursive way to define these options in C/C++.
OPTION_SCHEMA = json.loads('''
{
"vectorizer": {
"aggressive": "bool",
"fast": "bool",
"loop": {
"enable": "bool",
"max_depth": "int"
}
},
"inliner": {
"enable": "bool",
"threshold": "int"
},
"verbose": "bool"
}
''')
# E.g. turn {"a": {"b": "foo"}} into {"a-b":"foo"}
# This transformation simplifies the code by requiring less nesting.
# The function unflatten_dict does the opposite transformation.
def flatten_dict(d, separator=SEP):
ret = {}
def _rec(x, curprefix):
if isinstance(x, dict):
for k, v in x.items():
assert separator not in k
_rec(v, curprefix + [k])
else:
ret[separator.join(curprefix)] = x
_rec(d, [])
return ret
FLATTENED_OPTION_SCHEMA = flatten_dict(OPTION_SCHEMA)
def main():
options = {}
for opt in sys.argv[1:]:
# We expect opt to look like --foo-bar=baz
if not opt.startswith('--'):
print('option must start with "--"')
continue
before, _, after = opt[len('--'):].partition('=')
if after == '':
print('option not formatted correctly {!r}'.format(opt))
continue
ty = FLATTENED_OPTION_SCHEMA.get(before, None)
if ty is None or not isinstance(ty, (str, unicode)):
print('{!r} is not a valid key index'.format(before))
continue
parse = globals()['parse_{}_option'.format(ty)]
try:
value = parse(after)
except:
print('could not parse {!r} as {}'.format(after, ty))
continue
options[before] = value
# print('The resulting option JSON is:')
print(json.dumps(unflatten_dict(options), indent=2))
def unflatten_dict(d, separator=SEP):
ret = {}
for k, v in d.items():
multi_index_set(ret, k.split(separator), v)
return ret
def multi_index_set(d, indices, v): # set as in get/set, not set theory
assert len(indices) > 0
for k in indices[:-1]:
if k not in d:
d[k] = {}
d = d[k]
d[indices[-1]] = v
# Option parsing
def parse_bool_option(s):
s = s.lower()
if s in ('true', 'on', '1'):
return True
if s in ('false', 'off', '0'):
return False
raise Exception('Could not parse {!r} as bool'.format(s))
def parse_int_option(s):
return int(s)
main()
# Addendum:
#
# Strawman for distributing the schema across the source code of a C/C++
# program, to keep the definitions of options appropriately "local". Some
# of the options in the schema above don't really make sense from this
# point of view... but whatever.
#
# OptionSchema.h:
# >>>
# enum OptionKind {
# Bool,
# Int,
# Subobject
# }
#
# struct SchemaEntry {
# const char *Key;
# OptionKind ValueKind;
# SchemaEntry *OptionalSubobject;
# }
# <<<
#
#
# toplevel.cpp:
# >>>
# extern SchemaEntry VectorizerSchemaEntry[];
# extern SchemaEntry InlinerSchemaEntry[];
#
# SchemaEntry TopLevelSchemaEntry[] = {
# {"vectorizer", Subobject, &VectorizerSchemaEntry},
# {"inliner", Subobject, &InlinerSchemaEntry},
# {"verbose", Bool},
# {0}
# };
# <<<
#
#
# inliner.cpp:
# >>>
# SchemaEntry InlinerSchemaEntry[] = {
# {"enable", Bool},
# {"threshold", Int},
# {0}
# };
# <<<
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment