Skip to content

Instantly share code, notes, and snippets.

@rldotai
Last active July 31, 2020 22:57
Show Gist options
  • Save rldotai/0199692cba96cbf1c1a639b962ef9ca8 to your computer and use it in GitHub Desktop.
Save rldotai/0199692cba96cbf1c1a639b962ef9ca8 to your computer and use it in GitHub Desktop.
Common packages and utility functions that I find helpful
"""Setting up the local environment."""
import itertools
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from functools import reduce
import itertools
import operator
def is_iterable(x) -> bool:
"""Return `True` if `x` is iterable."""
try:
iter(x)
return True
except TypeError:
return False
def nditer(obj, iter_dict=True, use_dict_keys=False):
"""Given an object, flatten the iterables it contains into a single
(generated) sequence, while recording the n-dimensional coordinates.
It is similar to `numpy.ndenumerate` but supports sequences of variable
sizes (even infinite sizes, as it returns a generator) rather than what can
be represented as an array, and it doesn't modify datatypes
One caveat is with regards to string handling, because strings are indeed
iterables even if they are of zero length, or contain a single letter.
For simplicity (and following `numpy.ndenumerate`) they are treated as
terminal elements, and not expanded, although that behavior could be changed
in the future and will be available given the appropriate flag.
Another is with regards to dictionary handling, because while a `dict` is
an iterable, if we don't include the key as part of "coordinates", we lose
information. On the other hand, if we do incorporate the keys into the
coordinates, then we lose the ability to specify elements solely in terms of
integers. Keeping both helps no-one, because it alters the apparent depth
of the terminal elements in the sequence.
Consider:
`{'a': 1, 'b': [23, 32]}`
--> `[((0, 'a'), 1), ((1, 'b', 0), 23), (1, 'b', 1, 33)`
By default the keys are disregarded, although they can be used instead of
integer coordinates by setting a flag, as can the behavior of iterating over
dictionaries at all.
Parameters
----------
obj: A Python object.
Any Python object, although only iterables really make sense as input.
iter_dict: bool, optional
Iterate over dictionaries rather than just returning them as if they
were a non-iterable element. Defaults to `True`.
use_dict_keys: bool, optional
Whether to use dictionary keys in place of coordinates, only relevant if
the object contains dicts. Defaults to `False`
Returns
-------
out: Generator[Tuple]
A generator containing `(coordinate, element)` pairs, where each
`coordinate` is a tuple of nonnegative integers specifying the position
of the associated `element` from the iteration over the input `obj`.
See Also
--------
ndenum: A similar function that returns a list instead of a generator.
Examples
--------
See the examples for `ndenum`, which essentially wraps this function's
output with a call to `list()`.
"""
def func(elem, prefix):
"""A recursive generator function for unpacking sequences"""
if not is_iterable(elem):
yield (prefix, elem)
else:
# Handle special types of iterables
if isinstance(elem, str):
yield (prefix, elem)
# Handling dictionaries as a special case
elif isinstance(elem, dict):
if iter_dict:
if use_dict_keys:
for ix, x in elem.items():
yield from func(x, prefix+(ix,))
else:
for ix, x in enumerate(elem.values()):
yield from func(x, prefix+(ix,))
else:
yield (prefix, elem)
# Handle all other types of iterables the same way
else:
for ix, x in enumerate(elem):
yield from func(x, prefix+(ix,))
return func(obj, tuple())
def ndenum(obj, iter_dict=True, use_dict_keys=False):
"""Given an object, flatten the iterables it contains into a single
sequence, while recording the n-dimensional coordinates.
Similar to `numpy.ndenumerate` but supports sequences of variable sizes
rather than what can be represented as an array, and doesn't modify
datatypes.
See the documentation for `nditer` for more details.
Parameters
----------
obj: A Python object.
Any Python object, although only iterables really make sense as input.
iter_dict: bool, optional
Iterate over dictionaries rather than just returning them as if they
were a non-iterable element. Defaults to `True`.
use_dict_keys: bool, optional
Whether to use dictionary keys in place of coordinates, only relevant if
the object contains dicts. Defaults to `False`
Returns
-------
out: List[Tuple]
A generator containing `(coordinate, element)` pairs, where each
`coordinate` is a tuple of nonnegative integers specifying the position
of the associated `element` from the iteration over the input `obj`.
See Also
--------
nditer: A similar function that returns a list instead of a generator.
Examples
--------
>>> nditer(2)
Enumerate over lists or arrays:
>>> ndenum([[3, 2], [4, 1]])
[((0, 0), 3), ((0, 1), 2), ((1, 0), 4), ((1, 1), 1)]
>>> ndenum([2])
[((0), 2)]
A "deficient" 3-dimensional array:
>> ndenum([[[3, 2], [4, 1]],[[8], [6, 3, 3]]])
[((0, 0, 0), 3),
((0, 0, 1), 2),
((0, 1, 0), 4),
((0, 1, 1), 1),
((1, 0, 0), 8),
((1, 1, 0), 6),
((1, 1, 1), 3),
((1, 1, 2), 3)]
Given non-iterable inputs, it still returns a result, although how useful
this might be is debatable:
>>> ndenum(2)
[((), 2)]
"""
return list(nditer(obj, iter_dict=iter_dict, use_dict_keys=use_dict_keys))
def getter(index):
"""Abstract getter function for different possible indices"""
if isinstance(index, (list, tuple)):
if len(index) == 1:
index = index[0]
return lambda x: (x[index],)
elif index:
return operator.itemgetter(*index)
else:
return lambda x: ()
else:
return operator.itemgetter(index)
def sort(iterable, key=None, reverse=False):
"""Return a sorted list from an iterable using the builtin `sorted()`.
It has the same options but is more convenient because keys can be specified as
strings or indices in addition to functions.
# sort by index
>>> sort([[1, 2], [3, 4], [0, 5]], key=[1])
[[1, 2], [3, 4], [0, 5]]
>>> sort([[1, 2], [3, 4], [0, 5]], key=1)
[[1, 2], [3, 4], [0, 5]]
# sort by key name
>>> lst = [{'x': 109, 'y': 'm'}, {'x': 101, 'y': 'm'}, {'x': 101, 'y': 'o'}]
>>> sort(lst, 'y')
[{'x': 109, 'y': 'm'}, {'x': 101, 'y': 'm'}, {'x': 101, 'y': 'o'}]
# sort by multiple criteria
[{'x': 101, 'y': 'm'}, {'x': 109, 'y': 'm'}, {'x': 101, 'y': 'o'}]
"""
if key is not None and not callable(key):
key = getter(key)
return sorted(iterable, key=key, reverse=reverse)
def nwise(seq, n):
"""
Iterate over an iterable `seq` in groups of `n`.
For example:
>>> lst = list(range(1, 13)) # list of twelve elements
>>> for i in (1, 2, 3, 4, 6):
... print(i, list(nwise(lst, i)))
1 [(1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,), (10,), (11,), (12,)]
2 [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)]
3 [(1, 2, 3), (4, 5, 6), (7, 8, 9), (10, 11, 12)]
4 [(1, 2, 3, 4), (5, 6, 7, 8), (9, 10, 11, 12)]
6 [(1, 2, 3, 4, 5, 6), (7, 8, 9, 10, 11, 12)]
"""
iters = tee(seq, n)
return zip(*[iter(seq)]*n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment