chisophugis/options.py

## options.py
#!/usr/bin/env python
#
# A prototype for consistently handling command line options by
# interpreting them as a JSON object, following a schema.
# The result of command line option parsing is a "JSON object" which is
# can be passed around and manipulated as desired.
#
# Example:
# $ ./options.py --vectorizer-loop-enable=true --inliner-enable=true --inliner-threshold=6
# {
#   "inliner": {
#     "threshold": 6,
#     "enable": true
#   },
#   "vectorizer": {
#     "loop": {
#       "enable": true
#     }
#   }
# }
#
# See the comment above the OPTION_SCHEMA variable for what the schema looks
# like (the functionality available now is quite limited, but hopefully
# conveys the gist).
#
# Skip down to main() to see the primary flow of the code.
#
# For now, '-' is used as the separator for properties This is just because
# '-' is common in command line option names.  It actually might make more
# sense to use '.' which might make things more intuitive and make it clear
# that this is a more "formalized" option scheme:
# $ foo --foo.bar=true --bar.quux.quuux=8
# You can tweak the global SEP variable to change this to be '.' or whatever.
#
# Note that by a coincidence with braced shell expansion, this works:
# $ ./options.py --vectorizer{-aggressive=on,-loop{-enable=true,-max_depth=3}}
# Or with SEP == '.':
# $ ./options.py --vectorizer{.aggressive=on,.loop{.enable=true,.max_depth=3}}
# $ ./options.py --vectorizer.{aggressive=on,loop.{enable=true,max_depth=3}}
# $ ./options.py --vectorizer.{aggressive=on,loop.enable=true,loop.max_depth=3}
#
# (Note that I just pulled the option names for this schema out of thin
# air plus vague recollection of options I've seen in places; they probably
# don't make sense)

from __future__ import print_function

import sys
import json

SEP = '-'

# A leaf of this JSON object must be a string, which indicates the type.
# For now, there should be no arrays.
# The only types that can be specified currently are "bool" and "int".
# In the future, adding a list<T> might be useful, so you can do e.g.
# --internalize.assume_external=foo_func,bar_func,baz_func
# to get the value ["foo_func", "bar_func", "baz_func"] for a list<string>.
#
# See at the end of this for a strawman for a nice static table-based
# recursive way to define these options in C/C++.
OPTION_SCHEMA = json.loads('''
{
  "vectorizer": {
    "aggressive": "bool",
    "fast": "bool",
    "loop": {
      "enable": "bool",
      "max_depth": "int"
    }
  },
  "inliner": {
    "enable": "bool",
    "threshold": "int"
  },
  "verbose": "bool"
}
''')

# E.g. turn {"a": {"b": "foo"}} into {"a-b":"foo"}
# This transformation simplifies the code by requiring less nesting.
# The function unflatten_dict does the opposite transformation.
def flatten_dict(d, separator=SEP):
    ret = {}
    def _rec(x, curprefix):
        if isinstance(x, dict):
            for k, v in x.items():
                assert separator not in k
                _rec(v, curprefix + [k])
        else:
            ret[separator.join(curprefix)] = x
    _rec(d, [])
    return ret

FLATTENED_OPTION_SCHEMA = flatten_dict(OPTION_SCHEMA)


def main():
    options = {}
    for opt in sys.argv[1:]:
        # We expect opt to look like --foo-bar=baz
        if not opt.startswith('--'):
            print('option must start with "--"')
            continue
        before, _, after = opt[len('--'):].partition('=')
        if after == '':
            print('option not formatted correctly {!r}'.format(opt))
            continue
        ty = FLATTENED_OPTION_SCHEMA.get(before, None)
        if ty is None or not isinstance(ty, (str, unicode)):
            print('{!r} is not a valid key index'.format(before))
            continue
        parse = globals()['parse_{}_option'.format(ty)]
        try:
            value = parse(after)
        except:
            print('could not parse {!r} as {}'.format(after, ty))
            continue
        options[before] = value
    # print('The resulting option JSON is:')
    print(json.dumps(unflatten_dict(options), indent=2))


def unflatten_dict(d, separator=SEP):
    ret = {}
    for k, v in d.items():
        multi_index_set(ret, k.split(separator), v)
    return ret
def multi_index_set(d, indices, v): # set as in get/set, not set theory
    assert len(indices) > 0
    for k in indices[:-1]:
        if k not in d:
            d[k] = {}
        d = d[k]
    d[indices[-1]] = v


# Option parsing

def parse_bool_option(s):
    s = s.lower()
    if s in ('true', 'on', '1'):
        return True
    if s in ('false', 'off', '0'):
        return False
    raise Exception('Could not parse {!r} as bool'.format(s))

def parse_int_option(s):
    return int(s)


main()


# Addendum:
#
# Strawman for distributing the schema across the source code of a C/C++
# program, to keep the definitions of options appropriately "local". Some
# of the options in the schema above don't really make sense from this
# point of view... but whatever.
#
# OptionSchema.h:
# >>>
# enum OptionKind {
#   Bool,
#   Int,
#   Subobject
# }
#
# struct SchemaEntry {
#   const char *Key;
#   OptionKind ValueKind;
#   SchemaEntry *OptionalSubobject;
# }
# <<<
#
#
# toplevel.cpp:
# >>>
# extern SchemaEntry VectorizerSchemaEntry[];
# extern SchemaEntry InlinerSchemaEntry[];
#
# SchemaEntry TopLevelSchemaEntry[] = {
#   {"vectorizer", Subobject, &VectorizerSchemaEntry},
#   {"inliner", Subobject, &InlinerSchemaEntry},
#   {"verbose", Bool},
#   {0}
# };
# <<<
#
#
# inliner.cpp:
# >>>
# SchemaEntry InlinerSchemaEntry[] = {
#   {"enable", Bool},
#   {"threshold", Int},
#   {0}
# };
# <<<
	#!/usr/bin/env python
	#
	# A prototype for consistently handling command line options by
	# interpreting them as a JSON object, following a schema.
	# The result of command line option parsing is a "JSON object" which is
	# can be passed around and manipulated as desired.
	#
	# Example:
	# $ ./options.py --vectorizer-loop-enable=true --inliner-enable=true --inliner-threshold=6
	# {
	# "inliner": {
	# "threshold": 6,
	# "enable": true
	# },
	# "vectorizer": {
	# "loop": {
	# "enable": true
	# }
	# }
	# }
	#
	# See the comment above the OPTION_SCHEMA variable for what the schema looks
	# like (the functionality available now is quite limited, but hopefully
	# conveys the gist).
	#
	# Skip down to main() to see the primary flow of the code.
	#
	# For now, '-' is used as the separator for properties This is just because
	# '-' is common in command line option names. It actually might make more
	# sense to use '.' which might make things more intuitive and make it clear
	# that this is a more "formalized" option scheme:
	# $ foo --foo.bar=true --bar.quux.quuux=8
	# You can tweak the global SEP variable to change this to be '.' or whatever.
	#
	# Note that by a coincidence with braced shell expansion, this works:
	# $ ./options.py --vectorizer{-aggressive=on,-loop{-enable=true,-max_depth=3}}
	# Or with SEP == '.':
	# $ ./options.py --vectorizer{.aggressive=on,.loop{.enable=true,.max_depth=3}}
	# $ ./options.py --vectorizer.{aggressive=on,loop.{enable=true,max_depth=3}}
	# $ ./options.py --vectorizer.{aggressive=on,loop.enable=true,loop.max_depth=3}
	#
	# (Note that I just pulled the option names for this schema out of thin
	# air plus vague recollection of options I've seen in places; they probably
	# don't make sense)

	from __future__ import print_function

	import sys
	import json

	SEP = '-'

	# A leaf of this JSON object must be a string, which indicates the type.
	# For now, there should be no arrays.
	# The only types that can be specified currently are "bool" and "int".
	# In the future, adding a list<T> might be useful, so you can do e.g.
	# --internalize.assume_external=foo_func,bar_func,baz_func
	# to get the value ["foo_func", "bar_func", "baz_func"] for a list<string>.
	#
	# See at the end of this for a strawman for a nice static table-based
	# recursive way to define these options in C/C++.
	OPTION_SCHEMA = json.loads('''
	{
	"vectorizer": {
	"aggressive": "bool",
	"fast": "bool",
	"loop": {
	"enable": "bool",
	"max_depth": "int"
	}
	},
	"inliner": {
	"enable": "bool",
	"threshold": "int"
	},
	"verbose": "bool"
	}
	''')

	# E.g. turn {"a": {"b": "foo"}} into {"a-b":"foo"}
	# This transformation simplifies the code by requiring less nesting.
	# The function unflatten_dict does the opposite transformation.
	def flatten_dict(d, separator=SEP):
	ret = {}
	def _rec(x, curprefix):
	if isinstance(x, dict):
	for k, v in x.items():
	assert separator not in k
	_rec(v, curprefix + [k])
	else:
	ret[separator.join(curprefix)] = x
	_rec(d, [])
	return ret

	FLATTENED_OPTION_SCHEMA = flatten_dict(OPTION_SCHEMA)



	def main():
	options = {}
	for opt in sys.argv[1:]:
	# We expect opt to look like --foo-bar=baz
	if not opt.startswith('--'):
	print('option must start with "--"')
	continue
	before, _, after = opt[len('--'):].partition('=')
	if after == '':
	print('option not formatted correctly {!r}'.format(opt))
	continue
	ty = FLATTENED_OPTION_SCHEMA.get(before, None)
	if ty is None or not isinstance(ty, (str, unicode)):
	print('{!r} is not a valid key index'.format(before))
	continue
	parse = globals()['parse_{}_option'.format(ty)]
	try:
	value = parse(after)
	except:
	print('could not parse {!r} as {}'.format(after, ty))
	continue
	options[before] = value
	# print('The resulting option JSON is:')
	print(json.dumps(unflatten_dict(options), indent=2))


	def unflatten_dict(d, separator=SEP):
	ret = {}
	for k, v in d.items():
	multi_index_set(ret, k.split(separator), v)
	return ret
	def multi_index_set(d, indices, v): # set as in get/set, not set theory
	assert len(indices) > 0
	for k in indices[:-1]:
	if k not in d:
	d[k] = {}
	d = d[k]
	d[indices[-1]] = v



	# Option parsing

	def parse_bool_option(s):
	s = s.lower()
	if s in ('true', 'on', '1'):
	return True
	if s in ('false', 'off', '0'):
	return False
	raise Exception('Could not parse {!r} as bool'.format(s))

	def parse_int_option(s):
	return int(s)


	main()


	# Addendum:
	#
	# Strawman for distributing the schema across the source code of a C/C++
	# program, to keep the definitions of options appropriately "local". Some
	# of the options in the schema above don't really make sense from this
	# point of view... but whatever.
	#
	# OptionSchema.h:
	# >>>
	# enum OptionKind {
	# Bool,
	# Int,
	# Subobject
	# }
	#
	# struct SchemaEntry {
	# const char *Key;
	# OptionKind ValueKind;
	# SchemaEntry *OptionalSubobject;
	# }
	# <<<
	#
	#
	# toplevel.cpp:
	# >>>
	# extern SchemaEntry VectorizerSchemaEntry[];
	# extern SchemaEntry InlinerSchemaEntry[];
	#
	# SchemaEntry TopLevelSchemaEntry[] = {
	# {"vectorizer", Subobject, &VectorizerSchemaEntry},
	# {"inliner", Subobject, &InlinerSchemaEntry},
	# {"verbose", Bool},
	# {0}
	# };
	# <<<
	#
	#
	# inliner.cpp:
	# >>>
	# SchemaEntry InlinerSchemaEntry[] = {
	# {"enable", Bool},
	# {"threshold", Int},
	# {0}
	# };
	# <<<