Last active
July 15, 2018 04:09
-
-
Save FGtatsuro/d547a1b46c42698196d2d3df14740587 to your computer and use it in GitHub Desktop.
Generic flatten function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
import re | |
from typing import Any, Callable, Generator, Iterable, Iterator | |
def split_normal(word: str, sep: str = ',') -> Iterator[str]: | |
return iter(word.split(sep)) | |
def split_camel(word: str) -> Generator[str, None, None]: | |
return (entry for entry in re.split(r'([A-Z][a-z0-9]*)', word) if entry) | |
# In this context, Container means 'Iterable, but not string-like object.' | |
def is_container(target: Any) -> bool: | |
return isinstance(target, Iterable) and (not isinstance(target, (str, bytes, bytearray))) | |
def recurse_container(iterable: Iterable[Any]) -> Generator[Any, None, None]: | |
for i in iterable: | |
if is_container(i): | |
yield from recurse_container(i) | |
else: | |
yield i | |
def flatten( | |
data: Iterable[Any], | |
iterator_factory: Callable[[Any], Iterator[Any]], | |
can_delegate: Callable[[Any], bool] = lambda x: True | |
) -> Generator[Any, None, None]: | |
""" | |
>>> list(flatten(['test', 'Test2Test3', 'test4'], split_camel)) | |
['test', 'Test2', 'Test3', 'test4'] | |
>>> list(flatten(['test', 'test2,test3', 'test4'], split_normal)) | |
['test', 'test2', 'test3', 'test4'] | |
>>> list(flatten(['test', 'test2|test3', 'test4'], functools.partial(split_normal, sep='|'))) | |
['test', 'test2', 'test3', 'test4'] | |
>>> list(flatten(['test', ['Test2', 'Test3'], 'test4'], recurse_container, is_container)) | |
['test', 'Test2', 'Test3', 'test4'] | |
>>> list(flatten(['test', ['Test2', ['Test2-2', 'Test2-3']], 'test4'], recurse_container, is_container)) | |
['test', 'Test2', 'Test2-2', 'Test2-3', 'test4'] | |
>>> list(flatten(['test', ['Test2', [50, 60]], 'test4'], recurse_container, is_container)) | |
['test', 'Test2', 50, 60, 'test4'] | |
""" | |
for entry in data: | |
if can_delegate(entry): | |
yield from iterator_factory(entry) | |
else: | |
yield entry |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment