chrisklaiber/namedmap.py

## namedmap.py
"""A space-efficient frozen (write-once) mapping class.

A named map has two key properties:

  1. The interface is like a dict.
  2. Memory usage is like an object that defines __slots__.

Call namedmap.frozennamedmap() to define a subclass, passing the name
of the type and an iterable of key names.

  >>> Point = namedmap.frozennamedmap('Point', ('x', 'y'))
  >>> Point.__doc__
  'Point(x, y)'
  >>> origin = Point({'x': 0, 'y': 0})
  >>> origin['x']
  0
  >>> partial = Point({'y': 1})
  >>> partial['y']
  1
  >>> partial['x']
  KeyError: 'x'

This new data structure was motivated by a want to reduce the
in-memory size of Khan Academy's content database, which contains tens
of thousands of dicts. Read all about it here:

    http://engineering.khanacademy.org/posts/evolving-our-content-infrastructure.htm

When the keys and values are interned and heavily shared, like they
are in the frozen model store, the overhead of the container can be
significant. A dict (280 bytes) is much larger than an object with
__slots__ (120 bytes with one property, +8 bytes per property).

A named map keeps the smaller in-memory representation of an object
with __slots__ while serialization and access behave more like a dict.

From July 5, 2016, here is data from switching the 'item' key's list
of Khan Academy's FrozenExercise.problem_types to other data types:

  dict        in-memory:43465992 pickled:9503491 zipped:3922128
  namedtuple  in-memory:16743448 pickled:5496541 zipped:2945814
  namedmap    in-memory:16181984 pickled:6276168 zipped:2996040

namedmap can be more compact than namedtuple because namedtuple
requires that all values be specified, even if set to None, whereas
namedmap's keys are optional.
"""

import collections
import sys


def _extend_with_mapping_definitions(cls):
    """Leverage collections.Mapping's read-only mapping definitions.

    Why not use inheritance? collections.Mapping doesn't use __slots__
    and so inheriting from it creates a __dict__ on instances, which
    is what the use of __slots__ is trying to avoid!

    We use this as a class decorator to mix in the collections.Mapping
    definitions. Existing attributes are not overridden.
    """
    combined_dict = collections.Mapping.__dict__.copy()
    combined_dict.update(cls.__dict__)
    # Kind of hacky: a Python class's __dict__ is a read-only
    # dictproxy (as opposed to a class instance's __dict__, which
    # won't exist in this case because we're using __slots__). This
    # redefinition is how we modify the class's dictproxy.
    return type(cls.__name__, cls.__bases__, combined_dict)


@_extend_with_mapping_definitions
class _FrozenNamedMap(object):
    __slots__ = ()  # subclass must define __slots__

    def __new__(cls, *args, **kwargs):
        # We enforce the same creation interface as dict(), and we
        # further restrict to only known field names.
        argdict = dict(*args, **kwargs)
        result = object.__new__(cls)
        for key in cls.__slots__:
            if key in argdict:
                setattr(result, key, argdict.pop(key))
        if argdict:
            raise TypeError('%s unexpected keys: %r'
                            % (cls.__name__, argdict.keys()))
        return result

    def __repr__(self):
        return '%s({%s})' % (
            self.__class__.__name__,
            ', '.join('%r: %r' % kv_pair for kv_pair in self.iteritems()))

    def __reduce__(self):
        """Return self as a plain tuple of key-value pairs. Used by pickle."""
        # We could have more compact pickled representation but it's a
        # smallish savings and the cost is "unfreezing" this structure
        # by implementing __setitem__. To understand why, see pickle's
        # docs for __reduce__.
        #
        # The more compact implementation is:
        #   return (self.__class__, (), None, None, self.iteritems())
        #
        # Here are pickled sizes of ~8k FrozenExercise.problem_types
        # converted to namedtuple and namedmap, as implemented below
        # (baseline) and with the above modification (improved):
        #
        #                    pickled  zipped   %-of-baseline
        # namedtuple         5495485  2945178   87.6%
        # namedmap baseline  6275110  2995217  100.0%
        # namedmap improved  6047125  2966671   96.4%

        # __reduce__ provides fine-grained pickle control. The first
        # element of the result is a callable, our class. The second
        # element is a tuple of arguments for the callable object.
        # For a map with keys 'x', 'y' this returns:
        #   (self.__class__, ((('x', 1), ('y', 2)),))
        # which, in depickling, is the call:
        #   self.__class__((('x', 1), ('y', 2)))
        arg1 = tuple(self.iteritems())
        return (self.__class__, (arg1,))

    # The definitions from collections.Mapping require __len__,
    # __iter__, and __getitem__.

    def __len__(self):
        n = 0
        for _ in self:
            n += 1
        return n

    def __iter__(self):
        # Callers must not rely on the order of iteration. This is
        # intended as a substitute for dict.
        default = object()
        for key in self.__slots__:
            if getattr(self, key, default) is not default:
                yield key

    def __getitem__(self, key):
        if not isinstance(key, str):
            raise TypeError('%s keys must be strings, not %s'
                            % (self.__class__.__name__,
                               key.__class__.__name__))
        try:
            return getattr(self, key)
        except AttributeError:
            raise KeyError(key)

    # We may someday want to implement __setitem__ and __delitem__ and
    # use definitions from collections.MutableMapping. Maybe in a
    # subclass called _NamedMap, or maybe we invert the hierarchy to
    # have the frozen version be the subclass.  For now, though, it's
    # kind of nice to be immutable.

    def __setitem__(self, key, value):
        raise NotImplementedError()

    def __delitem__(self, key):
        raise NotImplementedError()


def frozennamedmap(typename, keys):
    """Return a new immutable namedmap with named keys."""
    keys = tuple(keys)
    for key in keys:
        assert isinstance(key, str), key
    typedict = {
        '__slots__': keys,
        '__doc__': '%s(%s)' % (typename, ', '.join(keys)),
        }
    result = type(typename, (_FrozenNamedMap,), typedict)
    # Setting __module__ is taken from collection.py in namedtuple().
    # For pickling to work, the __module__ variable needs to be set to
    # the frame where the named tuple is created.  Bypass this step in
    # environments where sys._getframe is not defined (Jython for
    # example) or sys._getframe is not defined for arguments greater
    # than 0 (IronPython).
    try:
        result.__module__ = sys._getframe(1).f_globals.get('__name__',
                                                           '__main__')
    except (AttributeError, ValueError):
        pass
    return result

## namedmap_test.py
import json
import pickle
import unittest

from pympler import asizeof

import namedmap

Point = namedmap.frozennamedmap('Point', ('x', 'y'))


class TestNamedMap(unittest.TestCase):
    def _assert_x1y2_point(self, pt):
        self.assertEqual(pt, {'x': 1, 'y': 2})
        self.assertEqual(pt['x'], 1)
        self.assertEqual(pt['y'], 2)
        with self.assertRaises(KeyError):
            pt['z']
        self.assertIn('x', pt)
        self.assertNotIn('z', pt)
        self.assertEqual(pt.get('x'), 1)
        self.assertEqual(pt.get('y'), 2)
        self.assertIsNone(pt.get('z'))
        self.assertEqual(dict(pt.items()), {'x': 1, 'y': 2})
        self.assertEqual(pt.values(), [1, 2])
        self.assertEqual(pt.keys(), ['x', 'y'])
        self.assertEqual(dict(pt.iteritems()), {'x': 1, 'y': 2})
        self.assertEqual(sorted(pt.itervalues()), [1, 2])
        self.assertEqual(sorted(pt.iterkeys()), ['x', 'y'])
        self.assertEqual(len(pt), 2)

    def test_interface(self):
        self.assertEqual(Point(), {})
        self.assertEqual(Point(x=1), {'x': 1})

        pt = Point(x=1, y=2)
        self._assert_x1y2_point(pt)

        # Check __slots__-object-like behaviors.
        self.assertEqual(pt.x, 1)
        self.assertEqual(pt.y, 2)
        with self.assertRaises(AttributeError):
            _ = Point().x

        # Check for unexpected keys.
        with self.assertRaises(TypeError):
            Point(z=3)

        # Ensure frozenness.
        with self.assertRaises(NotImplementedError):
            pt['x'] = 1
        with self.assertRaises(NotImplementedError):
            del pt['x']

        # Ensure that two instances that should have different
        # lengths, do have different lengths. This would have detected
        # an implementation bug where caching meant the same value was
        # returned for all instances.
        partial_pt = Point(x=1)
        self.assertEqual(len(pt), 2)
        self.assertEqual(len(partial_pt), 1)

    def test_pickling(self):
        pt = Point(x=1, y=2)

        # Ensure that all protocols work (this errored back when
        # namedmap implemented __getnewargs__ rather than __reduce__).
        for protocol in xrange(pickle.HIGHEST_PROTOCOL + 1):
            pt_again = pickle.loads(pickle.dumps(pt, protocol))
            self._assert_x1y2_point(pt_again)

        # Test pickling when only some keys are set.
        partial_pt = pickle.loads(pickle.dumps(Point(x=1)))
        self.assertEqual(partial_pt['x'], 1)
        self.assertNotIn('y', partial_pt)
        partial_pt = pickle.loads(pickle.dumps(Point(y=2)))
        self.assertEqual(partial_pt['y'], 2)
        self.assertNotIn('x', partial_pt)

    def test_constructor(self):
        pt = Point(y=2, x=1)
        self.assertEqual(pt['x'], 1)
        self.assertEqual(pt['y'], 2)
        self.assertEqual(Point([('x', 1)])['x'], 1)
        self.assertEqual(Point({'x': 1})['x'], 1)
        self.assertEqual(Point([('x', 1)], x=0)['x'], 0)

    def test_class_structure(self):
        self.assertEqual(Point().__slots__, ('x', 'y'))
        # Ensure we use __slots__ and don't incur __dict__ overhead.
        with self.assertRaises(AttributeError):
            Point().__dict__

    def test_memory_size(self):
        # Roughly test the core purpose of tuple map, in-memory size.
        solar_system_distances = dict((('Mercury', 0.39), ('Venus', 0.723),
                                       ('Earth', 1.0), ('Mars', 1.524),
                                       ('Jupiter', 5.203), ('Saturn', 9.539),
                                       ('Uranus', 19.18), ('Neptune', 30.06),
                                       ('Pluto', 39.53)))
        myclass = namedmap.frozennamedmap('myclass',
                                          tuple(solar_system_distances))
        asdicts = [dict(**solar_system_distances) for _ in xrange(1000)]
        asnamedmaps = [myclass(**solar_system_distances) for _ in xrange(1000)]
        # On chris' Macbook Pro dicts are ~1MB and namedmaps ~130KB.
        self.assertGreater(asizeof.asizeof(asdicts), 950 * 1000)
        self.assertLess(asizeof.asizeof(asnamedmaps), 150 * 1000)

    def test_json(self):
        # The json module from the standard library doesn't know how
        # to serialize custom object.
        with self.assertRaises(TypeError):
            json.dumps(Point(x=1, y=2))

## tuplemap.py
# NOTE: This library is obsoleted by namedmap, which should be used
# instead. The reason is that a tuplemap has serious serialization
# problems with Python's built-in json library (it serializes as a list,
# not an object). Furthermore, due to a bug in pympler.asizeof, a the
# space-saving benefits of a tuplemap had been exaggerated.

"""Defines a dict-like structure with a compact in-memory representation.

Call tuplemap.frozentuplemap() to define a subclass, passing the name
of the type and an iterable of field names:

  >>> Point = tuplemap.frozentuplemap('Point', ('x', 'y'))
  >>> Point.__doc__
  'Point(x, y)'
  >>> origin = Point(x=0, y=0)
  >>> origin['x']
  0
  >>> partial = Point(y=1)
  >>> partial['y']
  1
  >>> partial['x']
  KeyError: 'x'

Be careful when serializing tuplemaps. Using pickle is OK, but json,
yaml, or similar will serialize tuplemap to a list of key names and
lose the mapping. Instead use tuplemap.unpack() to get a dict:

  >>> Point = tuplemap.frozentuplemap('Point', ('x', 'y'))
  >>> partial = Point(y=1)
  >>> tuplemap.unpack(partial)
  {"y":1}

It's safe to unpack non-tuplemaps. They are returned as-is:

  >>> tuplemap.unpack(1)
  1
  >>> tuplemap.unpack([1, 2, 3])
  [1, 2, 3]

A tuple map has three key properties:

  1. The interface is like a dict.
  2. Memory usage is like a tuple.
  3. When pickled and unpickled, the ordering and name of fields is
     flexible, like a dict and unlike collections.namedtuple.

This new data structure was motivated by a want to reduce the
in-memory size of Khan Academy's content database, which contains tens
of thousands of dicts. Read all about it here:

    http://engineering.khanacademy.org/posts/evolving-our-content-infrastructure.htm

When the keys and values are interned and heavily shared, like they
are in the frozen model store, the overhead of the container can be
significant. An dict (280 bytes), and an object with __slots__ (120
bytes with one property, +8 bytes per property) can't compete with a
tuple (56 bytes +8 bytes per entry). However, tuples aren't very
robust to code changes and it's difficult to evolve their
schema. Furthermore, changing many downstream callers that expect a
dict to use something else is time-consuming and error-prone.

A tuple map keeps the in-memory representation as small as a tuple,
while pickling and access behave like a dict.

From May 2016, here is data from switching the 'item' key's list of
Khan Academy's FrozenExercise.problem_types to other data types:

  dict          in-memory:41626912 pickled:8974467 zipped:3754914
  tuplemap      in-memory:11618840 pickled:6259246 zipped:2975254
  namedtuple    in-memory:11686976 pickled:5590038 zipped:2931181

"""

import sys


class _FrozenTupleMap(tuple):
    __slots__ = ()
    _fields = ()  # subclasses should define named fields

    # This value is used to know a key isn't mapped to a value.
    _unset = object()

    def __new__(cls, *args, **kwargs):
        """Create new instance."""
        # We enforce the same creation interface as dict(), and we
        # further restrict to only known field names.
        argdict = dict(*args, **kwargs)
        # A key instance invariant: the backing tuple has len(cls._fields).
        values = tuple(argdict.pop(field, cls._unset) for field in cls._fields)
        if argdict:
            raise TypeError('%s unexpected field names: %r'
                            % (cls.__name__, argdict.keys()))
        return tuple.__new__(cls, values)

    def __repr__(self):
        """Return a nicely formatted representation string."""
        return '%s(%s)' % (
            self.__class__.__name__,
            ', '.join('%s=%r' % item for item in self.iteritems()))

    def __reduce__(self):
        """Return self as a plain tuple of key-value pairs. Used by pickle."""
        # __reduce__ provides fine-grained pickle control. The first
        # argument is our class, a callable, and the second argument
        # will be passed to it as *args on reconstruction.
        newargs = (tuple((f, self[f]) for f in self._fields if f in self),)
        return (self.__class__, newargs)

    def get(self, key, default=None):
        'D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.'
        try:
            return self[key]
        except KeyError:
            return default

    def __contains__(self, key):
        try:
            self[key]
        except KeyError:
            return False
        else:
            return True

    def __eq__(self, other):
        if isinstance(other, _FrozenTupleMap):
            return super(_FrozenTupleMap, self).__eq__(other)
        elif isinstance(other, dict):
            return dict(self.items()) == other
        else:
            return False

    def __ne__(self, other):
        return not (self == other)

    # Support functions for making dicts with tuplemap.unpack().

    def _unpacker(self):
        return {}

    def _unpack(self):
        """Return self as a dict."""
        result = dict(self.iteritems())
        result.update(self._unpacker())
        return result

    # The definitions from collections.Mapping require __len__,
    # __iter__, and __getitem__.

    def __len__(self):
        return super(_FrozenTupleMap, self).__len__() - self.count(self._unset)

    def __iter__(self):
        for i, key in enumerate(self._fields):
            val = super(_FrozenTupleMap, self).__getitem__(i)
            if val is not self._unset:
                yield key

    def __getitem__(self, key):
        if not isinstance(key, str):
            raise TypeError('%s keys must be strings, not %s'
                            % (self.__class__.__name__,
                               key.__class__.__name__))
        try:
            # For tuples with fewer than maybe 5 items, this is
            # actually faster than a hashed dict lookup!
            field_index = self._fields.index(key)
        except ValueError:
            raise KeyError(key)
        val = super(_FrozenTupleMap, self).__getitem__(field_index)
        if val is self._unset:
            raise KeyError(key)
        return val

    # We may someday want to implement __setitem__ and __delitem__ and
    # use definitions from collections.MutableMapping. Maybe in a
    # subclass called _TupleMap, or maybe we invert the hierarchy to
    # have the frozen version be the subclass.  For now, though, it's
    # kind of nice to be immutable.

    def __setitem__(self, key, value):
        # Overridden to avoid the awkwardness of being able to set
        # numeric indices that we inherit from our tuple superclass.
        raise NotImplementedError()

    def __delitem__(self, key):
        raise NotImplementedError()

    # Finally, the dict API.  Taken from collections.Mapping.__dict__.
    def iterkeys(self):
        return iter(self)

    def itervalues(self):
        for i in xrange(len(self._fields)):
            val = super(_FrozenTupleMap, self).__getitem__(i)
            if val is not self._unset:
                yield val

    def iteritems(self):
        for i, key in enumerate(self._fields):
            val = super(_FrozenTupleMap, self).__getitem__(i)
            if val is not self._unset:
                yield (key, val)

    def keys(self):
        return list(self.iterkeys())

    def items(self):
        return [(key, self[key]) for key in self]

    def values(self):
        return list(self.itervalues())


def frozentuplemap(typename, field_names, unpacker=None):
    """Return a new immutable tuplemap with named fields.

    If set, `unpacker` must be a function that accepts one argument, a
    tuplemap instance, and returns a value that will be passed into
    dict.update(). The function is used to augment the dict returned
    by tuplemap.unpack(). For example, to further unpack contained
    tuplemaps.
    """
    for field in field_names:
        assert isinstance(field, str)
    _fields = tuple(field_names)
    typedict = {
        '__slots__': (),
        '_fields': _fields,
        '__doc__': '%s(%s)' % (typename, repr(_fields).replace("'", "")[1:-1]),
        }
    if unpacker:
        typedict['_unpacker'] = unpacker
    result = type(typename, (_FrozenTupleMap,), typedict)
    # Setting __module__ is taken from collection.py in namedtuple().
    # For pickling to work, the __module__ variable needs to be set to
    # the frame where the named tuple is created.  Bypass this step in
    # environments where sys._getframe is not defined (Jython for
    # example) or sys._getframe is not defined for arguments greater
    # than 0 (IronPython).
    try:
        result.__module__ = sys._getframe(1).f_globals.get('__name__',
                                                           '__main__')
    except (AttributeError, ValueError):
        pass
    return result


def unpack(maybe_tuplemap):
    """Return a dict if passed a tuplemap, otherwise return the argument.

    If the argument is a tuplemap it may recursively unpack contained
    tuplemaps. See frozentuplemap()'s `unpacker` parameter.

    This is useful when a tuplemap used as a memory optimization needs
    to be serialized. For example, callers of json.dumps don't have a
    hook to control serialization for basic types like tuple and
    should call unpack() first.
    """
    if isinstance(maybe_tuplemap, _FrozenTupleMap):
        return maybe_tuplemap._unpack()
    return maybe_tuplemap

## tuplemap_test.py
import pickle
import unittest

from pympler import asizeof

import tuplemap

Point = tuplemap.frozentuplemap('Point', ('x', 'y'))


class TestTupleMap(unittest.TestCase):
    def _assert_x1y2_point(self, pt):
        self.assertEqual(pt, {'x': 1, 'y': 2})
        self.assertEqual(pt['x'], 1)
        self.assertEqual(pt['y'], 2)
        with self.assertRaises(KeyError):
            pt['z']
        self.assertIn('x', pt)
        self.assertNotIn('z', pt)
        self.assertEqual(pt.get('x'), 1)
        self.assertEqual(pt.get('y'), 2)
        self.assertIsNone(pt.get('z'))
        self.assertEqual(pt.items(), [('x', 1), ('y', 2)])
        self.assertEqual(pt.values(), [1, 2])
        self.assertEqual(pt.keys(), ['x', 'y'])
        self.assertEqual(list(pt.iteritems()), [('x', 1), ('y', 2)])
        self.assertEqual(list(pt.itervalues()), [1, 2])
        self.assertEqual(list(pt.iterkeys()), ['x', 'y'])

    def test_interface(self):
        self.assertEqual(Point(), {})

        pt = Point(x=1, y=2)
        self._assert_x1y2_point(pt)

        # Since the superclass is a tuple, there are still some
        # tuple-isms available.
        self.assertEqual(pt.count(2), 1)
        self.assertEqual(pt.index(1), 0)
        with self.assertRaises(ValueError):
            pt.index(3)

        # But it behaves like a dict here.
        self.assertEqual(Point(), {})
        self.assertEqual(Point(x=1), {'x': 1})
        self.assertNotEqual(Point(), tuple())
        self.assertNotEqual(Point(x=1), (1,))
        self.assertNotEqual(Point(x=1, y=2), (1, 2))

    def test_pickling(self):
        pt = Point(x=1, y=2)

        pt_again = pickle.loads(pickle.dumps(pt, pickle.HIGHEST_PROTOCOL))
        self._assert_x1y2_point(pt_again)

        # Ensure that older protocols work (this errored back when
        # tuplemap implemented __getnewargs__ rather than __reduce__).
        pt_again = pickle.loads(pickle.dumps(pt, 0))
        self._assert_x1y2_point(pt_again)

    def test_constructor(self):
        pt = Point(y=2, x=1)
        self.assertEqual(pt['x'], 1)
        self.assertEqual(pt['y'], 2)
        self.assertEqual(Point([('x', 1)])['x'], 1)
        self.assertEqual(Point({'x': 1})['x'], 1)
        self.assertEqual(Point([('x', 1)], x=0)['x'], 0)

    def test_new_definition(self):
        myclass = tuplemap.frozentuplemap('myclass', ('x',))
        self.assertEqual(myclass().__slots__, tuple())
        self.assertEqual(myclass()._fields, tuple(['x']))
        # Ensure we use __slots__ and don't incur __dict__ overhead.
        with self.assertRaises(AttributeError):
            myclass().__dict__

    def test_eq(self):
        pt1 = Point(y=2, x=1)
        pt2 = Point(x=1, y=2)
        pt3 = Point(x=2, y=1)
        self.assertEqual(pt1, pt2)
        self.assertNotEqual(pt1, pt3)
        self.assertEqual(pt1, {'x': 1, 'y': 2})
        self.assertNotEqual(pt3, {'x': 1, 'y': 2})
        self.assertNotEqual(pt1, ('x', 'y'))
        self.assertNotEqual(pt1, (1, 2))

    def test_memory_size(self):
        # Roughly test the core purpose of tuple map, in-memory size.
        solar_system_distances = dict((('Mercury', 0.39), ('Venus', 0.723),
                                       ('Earth', 1.0), ('Mars', 1.524),
                                       ('Jupiter', 5.203), ('Saturn', 9.539),
                                       ('Uranus', 19.18), ('Neptune', 30.06),
                                       ('Pluto (now a dwarf planet)', 39.53)))
        myclass = tuplemap.frozentuplemap('myclass',
                                          tuple(solar_system_distances))
        asdicts = [dict(**solar_system_distances) for _ in xrange(1000)]
        astuplemaps = [myclass(**solar_system_distances) for _ in xrange(1000)]
        # On chris' Macbook Pro dicts are ~1MB and tuplemaps ~130KB.
        self.assertGreater(asizeof.asizeof(asdicts), 950 * 1000)
        self.assertLess(asizeof.asizeof(astuplemaps), 150 * 1000)


class TestUnpack(unittest.TestCase):
    def test_non_tuplemap(self):
        self.assertEqual(1, tuplemap.unpack(1))
        self.assertEqual([1, 2, 3], tuplemap.unpack([1, 2, 3]))
        self.assertEqual({'x': 1}, tuplemap.unpack({'x': 1}))
        obj = object()
        self.assertEqual(id(obj), id(tuplemap.unpack(obj)))

    def test_tuplemap(self):
        self.assertEqual({'x': 1, 'y': 2}, tuplemap.unpack(Point(x=1, y=2)))
        self.assertEqual({'x': 1}, tuplemap.unpack(Point(x=1)))
        self.assertEqual({}, tuplemap.unpack(Point()))

        # Test unpacking contained tuplemaps.
        innerclass = tuplemap.frozentuplemap('innerclass', ('x',))
        outerclass = tuplemap.frozentuplemap('outerclass', ('inner',),
            unpacker=lambda self: {
                'inner': [tuplemap.unpack(i) for i in self['inner']],
            })

        obj = outerclass(inner=[innerclass(x=1), innerclass(x=2)])
        self.assertEqual({'inner': [{'x': 1}, {'x': 2}]}, tuplemap.unpack(obj))
	"""A space-efficient frozen (write-once) mapping class.

	A named map has two key properties:

	1. The interface is like a dict.
	2. Memory usage is like an object that defines __slots__.

	Call namedmap.frozennamedmap() to define a subclass, passing the name
	of the type and an iterable of key names.

	>>> Point = namedmap.frozennamedmap('Point', ('x', 'y'))
	>>> Point.__doc__
	'Point(x, y)'
	>>> origin = Point({'x': 0, 'y': 0})
	>>> origin['x']
	0
	>>> partial = Point({'y': 1})
	>>> partial['y']
	1
	>>> partial['x']
	KeyError: 'x'

	This new data structure was motivated by a want to reduce the
	in-memory size of Khan Academy's content database, which contains tens
	of thousands of dicts. Read all about it here:

	http://engineering.khanacademy.org/posts/evolving-our-content-infrastructure.htm

	When the keys and values are interned and heavily shared, like they
	are in the frozen model store, the overhead of the container can be
	significant. A dict (280 bytes) is much larger than an object with
	__slots__ (120 bytes with one property, +8 bytes per property).

	A named map keeps the smaller in-memory representation of an object
	with __slots__ while serialization and access behave more like a dict.

	From July 5, 2016, here is data from switching the 'item' key's list
	of Khan Academy's FrozenExercise.problem_types to other data types:

	dict in-memory:43465992 pickled:9503491 zipped:3922128
	namedtuple in-memory:16743448 pickled:5496541 zipped:2945814
	namedmap in-memory:16181984 pickled:6276168 zipped:2996040

	namedmap can be more compact than namedtuple because namedtuple
	requires that all values be specified, even if set to None, whereas
	namedmap's keys are optional.
	"""

	import collections
	import sys


	def _extend_with_mapping_definitions(cls):
	"""Leverage collections.Mapping's read-only mapping definitions.

	Why not use inheritance? collections.Mapping doesn't use __slots__
	and so inheriting from it creates a __dict__ on instances, which
	is what the use of __slots__ is trying to avoid!

	We use this as a class decorator to mix in the collections.Mapping
	definitions. Existing attributes are not overridden.
	"""
	combined_dict = collections.Mapping.__dict__.copy()
	combined_dict.update(cls.__dict__)
	# Kind of hacky: a Python class's __dict__ is a read-only
	# dictproxy (as opposed to a class instance's __dict__, which
	# won't exist in this case because we're using __slots__). This
	# redefinition is how we modify the class's dictproxy.
	return type(cls.__name__, cls.__bases__, combined_dict)


	@_extend_with_mapping_definitions
	class _FrozenNamedMap(object):
	__slots__ = () # subclass must define __slots__

	def __new__(cls, args, *kwargs):
	# We enforce the same creation interface as dict(), and we
	# further restrict to only known field names.
	argdict = dict(args, *kwargs)
	result = object.__new__(cls)
	for key in cls.__slots__:
	if key in argdict:
	setattr(result, key, argdict.pop(key))
	if argdict:
	raise TypeError('%s unexpected keys: %r'
	% (cls.__name__, argdict.keys()))
	return result

	def __repr__(self):
	return '%s({%s})' % (
	self.__class__.__name__,
	', '.join('%r: %r' % kv_pair for kv_pair in self.iteritems()))

	def __reduce__(self):
	"""Return self as a plain tuple of key-value pairs. Used by pickle."""
	# We could have more compact pickled representation but it's a
	# smallish savings and the cost is "unfreezing" this structure
	# by implementing __setitem__. To understand why, see pickle's
	# docs for __reduce__.
	#
	# The more compact implementation is:
	# return (self.__class__, (), None, None, self.iteritems())
	#
	# Here are pickled sizes of ~8k FrozenExercise.problem_types
	# converted to namedtuple and namedmap, as implemented below
	# (baseline) and with the above modification (improved):
	#
	# pickled zipped %-of-baseline
	# namedtuple 5495485 2945178 87.6%
	# namedmap baseline 6275110 2995217 100.0%
	# namedmap improved 6047125 2966671 96.4%

	# __reduce__ provides fine-grained pickle control. The first
	# element of the result is a callable, our class. The second
	# element is a tuple of arguments for the callable object.
	# For a map with keys 'x', 'y' this returns:
	# (self.__class__, ((('x', 1), ('y', 2)),))
	# which, in depickling, is the call:
	# self.__class__((('x', 1), ('y', 2)))
	arg1 = tuple(self.iteritems())
	return (self.__class__, (arg1,))

	# The definitions from collections.Mapping require __len__,
	# __iter__, and __getitem__.

	def __len__(self):
	n = 0
	for _ in self:
	n += 1
	return n

	def __iter__(self):
	# Callers must not rely on the order of iteration. This is
	# intended as a substitute for dict.
	default = object()
	for key in self.__slots__:
	if getattr(self, key, default) is not default:
	yield key

	def __getitem__(self, key):
	if not isinstance(key, str):
	raise TypeError('%s keys must be strings, not %s'
	% (self.__class__.__name__,
	key.__class__.__name__))
	try:
	return getattr(self, key)
	except AttributeError:
	raise KeyError(key)

	# We may someday want to implement __setitem__ and __delitem__ and
	# use definitions from collections.MutableMapping. Maybe in a
	# subclass called _NamedMap, or maybe we invert the hierarchy to
	# have the frozen version be the subclass. For now, though, it's
	# kind of nice to be immutable.

	def __setitem__(self, key, value):
	raise NotImplementedError()

	def __delitem__(self, key):
	raise NotImplementedError()


	def frozennamedmap(typename, keys):
	"""Return a new immutable namedmap with named keys."""
	keys = tuple(keys)
	for key in keys:
	assert isinstance(key, str), key
	typedict = {
	'__slots__': keys,
	'__doc__': '%s(%s)' % (typename, ', '.join(keys)),
	}
	result = type(typename, (_FrozenNamedMap,), typedict)
	# Setting __module__ is taken from collection.py in namedtuple().
	# For pickling to work, the __module__ variable needs to be set to
	# the frame where the named tuple is created. Bypass this step in
	# environments where sys._getframe is not defined (Jython for
	# example) or sys._getframe is not defined for arguments greater
	# than 0 (IronPython).
	try:
	result.__module__ = sys._getframe(1).f_globals.get('__name__',
	'__main__')
	except (AttributeError, ValueError):
	pass
	return result
	import json
	import pickle
	import unittest

	from pympler import asizeof

	import namedmap

	Point = namedmap.frozennamedmap('Point', ('x', 'y'))


	class TestNamedMap(unittest.TestCase):
	def _assert_x1y2_point(self, pt):
	self.assertEqual(pt, {'x': 1, 'y': 2})
	self.assertEqual(pt['x'], 1)
	self.assertEqual(pt['y'], 2)
	with self.assertRaises(KeyError):
	pt['z']
	self.assertIn('x', pt)
	self.assertNotIn('z', pt)
	self.assertEqual(pt.get('x'), 1)
	self.assertEqual(pt.get('y'), 2)
	self.assertIsNone(pt.get('z'))
	self.assertEqual(dict(pt.items()), {'x': 1, 'y': 2})
	self.assertEqual(pt.values(), [1, 2])
	self.assertEqual(pt.keys(), ['x', 'y'])
	self.assertEqual(dict(pt.iteritems()), {'x': 1, 'y': 2})
	self.assertEqual(sorted(pt.itervalues()), [1, 2])
	self.assertEqual(sorted(pt.iterkeys()), ['x', 'y'])
	self.assertEqual(len(pt), 2)

	def test_interface(self):
	self.assertEqual(Point(), {})
	self.assertEqual(Point(x=1), {'x': 1})

	pt = Point(x=1, y=2)
	self._assert_x1y2_point(pt)

	# Check __slots__-object-like behaviors.
	self.assertEqual(pt.x, 1)
	self.assertEqual(pt.y, 2)
	with self.assertRaises(AttributeError):
	_ = Point().x

	# Check for unexpected keys.
	with self.assertRaises(TypeError):
	Point(z=3)

	# Ensure frozenness.
	with self.assertRaises(NotImplementedError):
	pt['x'] = 1
	with self.assertRaises(NotImplementedError):
	del pt['x']

	# Ensure that two instances that should have different
	# lengths, do have different lengths. This would have detected
	# an implementation bug where caching meant the same value was
	# returned for all instances.
	partial_pt = Point(x=1)
	self.assertEqual(len(pt), 2)
	self.assertEqual(len(partial_pt), 1)

	def test_pickling(self):
	pt = Point(x=1, y=2)

	# Ensure that all protocols work (this errored back when
	# namedmap implemented __getnewargs__ rather than __reduce__).
	for protocol in xrange(pickle.HIGHEST_PROTOCOL + 1):
	pt_again = pickle.loads(pickle.dumps(pt, protocol))
	self._assert_x1y2_point(pt_again)

	# Test pickling when only some keys are set.
	partial_pt = pickle.loads(pickle.dumps(Point(x=1)))
	self.assertEqual(partial_pt['x'], 1)
	self.assertNotIn('y', partial_pt)
	partial_pt = pickle.loads(pickle.dumps(Point(y=2)))
	self.assertEqual(partial_pt['y'], 2)
	self.assertNotIn('x', partial_pt)

	def test_constructor(self):
	pt = Point(y=2, x=1)
	self.assertEqual(pt['x'], 1)
	self.assertEqual(pt['y'], 2)
	self.assertEqual(Point([('x', 1)])['x'], 1)
	self.assertEqual(Point({'x': 1})['x'], 1)
	self.assertEqual(Point([('x', 1)], x=0)['x'], 0)

	def test_class_structure(self):
	self.assertEqual(Point().__slots__, ('x', 'y'))
	# Ensure we use __slots__ and don't incur __dict__ overhead.
	with self.assertRaises(AttributeError):
	Point().__dict__

	def test_memory_size(self):
	# Roughly test the core purpose of tuple map, in-memory size.
	solar_system_distances = dict((('Mercury', 0.39), ('Venus', 0.723),
	('Earth', 1.0), ('Mars', 1.524),
	('Jupiter', 5.203), ('Saturn', 9.539),
	('Uranus', 19.18), ('Neptune', 30.06),
	('Pluto', 39.53)))
	myclass = namedmap.frozennamedmap('myclass',
	tuple(solar_system_distances))
	asdicts = [dict(**solar_system_distances) for _ in xrange(1000)]
	asnamedmaps = [myclass(**solar_system_distances) for _ in xrange(1000)]
	# On chris' Macbook Pro dicts are ~1MB and namedmaps ~130KB.
	self.assertGreater(asizeof.asizeof(asdicts), 950 * 1000)
	self.assertLess(asizeof.asizeof(asnamedmaps), 150 * 1000)

	def test_json(self):
	# The json module from the standard library doesn't know how
	# to serialize custom object.
	with self.assertRaises(TypeError):
	json.dumps(Point(x=1, y=2))
	# NOTE: This library is obsoleted by namedmap, which should be used
	# instead. The reason is that a tuplemap has serious serialization
	# problems with Python's built-in json library (it serializes as a list,
	# not an object). Furthermore, due to a bug in pympler.asizeof, a the
	# space-saving benefits of a tuplemap had been exaggerated.

	"""Defines a dict-like structure with a compact in-memory representation.

	Call tuplemap.frozentuplemap() to define a subclass, passing the name
	of the type and an iterable of field names:

	>>> Point = tuplemap.frozentuplemap('Point', ('x', 'y'))
	>>> Point.__doc__
	'Point(x, y)'
	>>> origin = Point(x=0, y=0)
	>>> origin['x']
	0
	>>> partial = Point(y=1)
	>>> partial['y']
	1
	>>> partial['x']
	KeyError: 'x'

	Be careful when serializing tuplemaps. Using pickle is OK, but json,
	yaml, or similar will serialize tuplemap to a list of key names and
	lose the mapping. Instead use tuplemap.unpack() to get a dict:

	>>> Point = tuplemap.frozentuplemap('Point', ('x', 'y'))
	>>> partial = Point(y=1)
	>>> tuplemap.unpack(partial)
	{"y":1}

	It's safe to unpack non-tuplemaps. They are returned as-is:

	>>> tuplemap.unpack(1)
	1
	>>> tuplemap.unpack([1, 2, 3])
	[1, 2, 3]

	A tuple map has three key properties:

	1. The interface is like a dict.
	2. Memory usage is like a tuple.
	3. When pickled and unpickled, the ordering and name of fields is
	flexible, like a dict and unlike collections.namedtuple.

	This new data structure was motivated by a want to reduce the
	in-memory size of Khan Academy's content database, which contains tens
	of thousands of dicts. Read all about it here:

	http://engineering.khanacademy.org/posts/evolving-our-content-infrastructure.htm

	When the keys and values are interned and heavily shared, like they
	are in the frozen model store, the overhead of the container can be
	significant. An dict (280 bytes), and an object with __slots__ (120
	bytes with one property, +8 bytes per property) can't compete with a
	tuple (56 bytes +8 bytes per entry). However, tuples aren't very
	robust to code changes and it's difficult to evolve their
	schema. Furthermore, changing many downstream callers that expect a
	dict to use something else is time-consuming and error-prone.

	A tuple map keeps the in-memory representation as small as a tuple,
	while pickling and access behave like a dict.

	From May 2016, here is data from switching the 'item' key's list of
	Khan Academy's FrozenExercise.problem_types to other data types:

	dict in-memory:41626912 pickled:8974467 zipped:3754914
	tuplemap in-memory:11618840 pickled:6259246 zipped:2975254
	namedtuple in-memory:11686976 pickled:5590038 zipped:2931181

	"""

	import sys


	class _FrozenTupleMap(tuple):
	__slots__ = ()
	_fields = () # subclasses should define named fields

	# This value is used to know a key isn't mapped to a value.
	_unset = object()

	def __new__(cls, args, *kwargs):
	"""Create new instance."""
	# We enforce the same creation interface as dict(), and we
	# further restrict to only known field names.
	argdict = dict(args, *kwargs)
	# A key instance invariant: the backing tuple has len(cls._fields).
	values = tuple(argdict.pop(field, cls._unset) for field in cls._fields)
	if argdict:
	raise TypeError('%s unexpected field names: %r'
	% (cls.__name__, argdict.keys()))
	return tuple.__new__(cls, values)

	def __repr__(self):
	"""Return a nicely formatted representation string."""
	return '%s(%s)' % (
	self.__class__.__name__,
	', '.join('%s=%r' % item for item in self.iteritems()))

	def __reduce__(self):
	"""Return self as a plain tuple of key-value pairs. Used by pickle."""
	# __reduce__ provides fine-grained pickle control. The first
	# argument is our class, a callable, and the second argument
	# will be passed to it as *args on reconstruction.
	newargs = (tuple((f, self[f]) for f in self._fields if f in self),)
	return (self.__class__, newargs)

	def get(self, key, default=None):
	'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.'
	try:
	return self[key]
	except KeyError:
	return default

	def __contains__(self, key):
	try:
	self[key]
	except KeyError:
	return False
	else:
	return True

	def __eq__(self, other):
	if isinstance(other, _FrozenTupleMap):
	return super(_FrozenTupleMap, self).__eq__(other)
	elif isinstance(other, dict):
	return dict(self.items()) == other
	else:
	return False

	def __ne__(self, other):
	return not (self == other)

	# Support functions for making dicts with tuplemap.unpack().

	def _unpacker(self):
	return {}

	def _unpack(self):
	"""Return self as a dict."""
	result = dict(self.iteritems())
	result.update(self._unpacker())
	return result

	# The definitions from collections.Mapping require __len__,
	# __iter__, and __getitem__.

	def __len__(self):
	return super(_FrozenTupleMap, self).__len__() - self.count(self._unset)

	def __iter__(self):
	for i, key in enumerate(self._fields):
	val = super(_FrozenTupleMap, self).__getitem__(i)
	if val is not self._unset:
	yield key

	def __getitem__(self, key):
	if not isinstance(key, str):
	raise TypeError('%s keys must be strings, not %s'
	% (self.__class__.__name__,
	key.__class__.__name__))
	try:
	# For tuples with fewer than maybe 5 items, this is
	# actually faster than a hashed dict lookup!
	field_index = self._fields.index(key)
	except ValueError:
	raise KeyError(key)
	val = super(_FrozenTupleMap, self).__getitem__(field_index)
	if val is self._unset:
	raise KeyError(key)
	return val

	# We may someday want to implement __setitem__ and __delitem__ and
	# use definitions from collections.MutableMapping. Maybe in a
	# subclass called _TupleMap, or maybe we invert the hierarchy to
	# have the frozen version be the subclass. For now, though, it's
	# kind of nice to be immutable.

	def __setitem__(self, key, value):
	# Overridden to avoid the awkwardness of being able to set
	# numeric indices that we inherit from our tuple superclass.
	raise NotImplementedError()

	def __delitem__(self, key):
	raise NotImplementedError()

	# Finally, the dict API. Taken from collections.Mapping.__dict__.
	def iterkeys(self):
	return iter(self)

	def itervalues(self):
	for i in xrange(len(self._fields)):
	val = super(_FrozenTupleMap, self).__getitem__(i)
	if val is not self._unset:
	yield val

	def iteritems(self):
	for i, key in enumerate(self._fields):
	val = super(_FrozenTupleMap, self).__getitem__(i)
	if val is not self._unset:
	yield (key, val)

	def keys(self):
	return list(self.iterkeys())

	def items(self):
	return [(key, self[key]) for key in self]

	def values(self):
	return list(self.itervalues())


	def frozentuplemap(typename, field_names, unpacker=None):
	"""Return a new immutable tuplemap with named fields.

	If set, `unpacker` must be a function that accepts one argument, a
	tuplemap instance, and returns a value that will be passed into
	dict.update(). The function is used to augment the dict returned
	by tuplemap.unpack(). For example, to further unpack contained
	tuplemaps.
	"""
	for field in field_names:
	assert isinstance(field, str)
	_fields = tuple(field_names)
	typedict = {
	'__slots__': (),
	'_fields': _fields,
	'__doc__': '%s(%s)' % (typename, repr(_fields).replace("'", "")[1:-1]),
	}
	if unpacker:
	typedict['_unpacker'] = unpacker
	result = type(typename, (_FrozenTupleMap,), typedict)
	# Setting __module__ is taken from collection.py in namedtuple().
	# For pickling to work, the __module__ variable needs to be set to
	# the frame where the named tuple is created. Bypass this step in
	# environments where sys._getframe is not defined (Jython for
	# example) or sys._getframe is not defined for arguments greater
	# than 0 (IronPython).
	try:
	result.__module__ = sys._getframe(1).f_globals.get('__name__',
	'__main__')
	except (AttributeError, ValueError):
	pass
	return result


	def unpack(maybe_tuplemap):
	"""Return a dict if passed a tuplemap, otherwise return the argument.

	If the argument is a tuplemap it may recursively unpack contained
	tuplemaps. See frozentuplemap()'s `unpacker` parameter.

	This is useful when a tuplemap used as a memory optimization needs
	to be serialized. For example, callers of json.dumps don't have a
	hook to control serialization for basic types like tuple and
	should call unpack() first.
	"""
	if isinstance(maybe_tuplemap, _FrozenTupleMap):
	return maybe_tuplemap._unpack()
	return maybe_tuplemap