Last active
April 26, 2019 13:04
-
-
Save stkrp/f97401ebf4b0f71d2728f91df15bb03f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformation import pipeline, rename_fields, transform_field_value, replace, group_fields | |
if __name__ == '__main__': | |
car_transformer = pipeline( | |
rename_fields({"owner_yob": "owner_year_of_birth"}), | |
transform_field_value("owner_year_of_birth", int), | |
transform_field_value("owner_gender", replace({"female": 0, "male": 1}, None)), | |
group_fields("owner", ("owner_first_name", "owner_last_name", "owner_year_of_birth", "owner_gender")), | |
transform_field_value( | |
"owner", | |
rename_fields({ | |
"owner_first_name": "first_name", | |
"owner_last_name": "last_name", | |
"owner_year_of_birth": "year_of_birth", | |
"owner_gender": "gender", | |
}) | |
), | |
transform_field_value("reg_num", str.upper), | |
transform_field_value("millage", sum), | |
transform_field_value("engine_type", replace({"petrol": 0, "diesel": 1, "gas": 2}, None)), | |
) | |
car_data = { | |
"owner_first_name": "Vladimir", | |
"owner_last_name": "Kukov", | |
"owner_yob": "1977", | |
"owner_gender": "male", | |
"reg_num": "a176Bc99", | |
"millage": [ | |
100000, | |
56000, | |
30002, | |
], | |
"engine_power_hp": 190.1, | |
"engine_type": "petrol", | |
} | |
print(car_transformer(car_data)) | |
# Output: | |
# | |
# { | |
# 'owner': { | |
# 'first_name': 'Vladimir', | |
# 'last_name': 'Kukov', | |
# 'year_of_birth': 1977, | |
# 'gender': 1, | |
# }, | |
# 'reg_num': 'A176BC99', | |
# 'millage': 186002, | |
# 'engine_power_hp': 190.1, | |
# 'engine_type': 0, | |
# } | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Transformation layer of ETL | |
Transformer is a callable object that takes a value, performs certain actions | |
with it, and returns an altered or new value. Transformers can accept and | |
return values of different types. | |
""" | |
from typing import MutableMapping, Mapping, Any, Callable, Iterable, Hashable | |
Transformer = Callable[[Any], Any] | |
# ==== Generic ============================================================== # | |
def pipeline(*steps: Transformer) -> Transformer: | |
""" | |
Combines transformers into a chain of sequential calls. | |
The order of calls is the same as the order of the arguments (FIFO). | |
""" | |
def transformer(item: Any) -> Any: | |
for step in steps: | |
item = step(item) | |
return item | |
return transformer | |
def replace( | |
value_mapping: Mapping[Hashable, Any], default: Any = NotImplemented, | |
) -> Transformer: | |
""" | |
Replaces the values according to the mapping. | |
Old values for which there are no new values are replaced by the default | |
value. If the default value is not passed, the old values remain unchanged. | |
Mapping: old value -> new value. | |
[!] Return new instance for new value; original instance for old value. | |
""" | |
def transformer(value: Hashable) -> Any: | |
if value in value_mapping: | |
return value_mapping[value] | |
elif default is not NotImplemented: | |
return default | |
return value | |
return transformer | |
# ==== Mappings ============================================================= # | |
def transform_field_value( | |
field_name: str, field_value_transformer: Transformer, | |
) -> Transformer: | |
""" | |
Allows you to transform value of single field. | |
Starts a new branch of the transformation. The original value of the field | |
will be provided to the transformer when called, and the result of the call | |
of the transformer will overwrite the current value of the field. If the | |
field is not found, the original object is returned unchanged. | |
Due to the fact that the field is provided to the transformer, it is | |
possible to implement the processing of nested structures using the same | |
transformer as the argument: | |
>>> t = transform_field_value('parent', transform_field_value('child', int)) # NoQA | |
>>> t({'parent': {'child': '123'}}) | |
{'parent': {'child': 123}} | |
[!] Will change the original object. | |
# TODO: Move to unittest | |
# Flat | |
>>> transformer = transform_field_value('login', str.upper) | |
>>> input_ = {'login': 'bOb', 'password': '123qwe'} | |
>>> output = transformer(input_) | |
>>> output == {'login': 'BOB', 'password': '123qwe'} | |
True | |
>>> output is input_ | |
True | |
# Nested | |
>>> transformer = transform_field_value('a', transform_field_value('b', str.upper)) # NoQA | |
>>> input_ = {'id': 59, 'a': {'b': 'gog', 'key': 'abc'}} | |
>>> output = transformer(input_) | |
>>> output == {'id': 59, 'a': {'b': 'GOG', 'key': 'abc'}} | |
True | |
>>> output is input_ | |
True | |
>>> output['a'] is input_['a'] | |
True | |
# Empty | |
>>> transformer = transform_field_value('pk', str.upper) | |
>>> input_ = {'a': 1, 'b': 2} | |
>>> output = transformer(input_) | |
>>> output == {'a': 1, 'b': 2} | |
True | |
>>> output is input_ | |
True | |
""" | |
def transformer(item: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # NoQA | |
if field_name in item: | |
item[field_name] = field_value_transformer(item[field_name]) | |
return item | |
return transformer | |
def rename_fields(field_name_mapping: Mapping[str, str]) -> Transformer: | |
""" | |
Replace each old name to new name (rename keys of mapping). | |
[!] Will change the original object. | |
""" | |
def transformer(item: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # NoQA | |
for old_field_name in field_name_mapping.keys() & item.keys(): | |
new_field_name = field_name_mapping[old_field_name] | |
item[new_field_name] = item.pop(old_field_name) | |
return item | |
return transformer | |
def group_fields(group_name: str, field_names: Iterable[str]) -> Transformer: | |
""" | |
Groups the fields into a new mapping. | |
From the original mapping the fields are deleted. If the group name is | |
equal to the field name that is included in this group, the value of the | |
field will be stored in the group under the original name. But the group | |
always overwrites an existing field with the same name. Non-existent fields | |
not put in a group. | |
[!] Will change the original object. | |
>>> transformer = group_fields('user', ['name', 'nick', 'gender']) | |
>>> input_ = {'name': 'Ivan', 'age': 30, 'gender': 'm'} | |
>>> output = transformer(input_) | |
>>> output == {'age': 30, 'user': {'name': 'Ivan', 'gender': 'm'}} | |
True | |
>>> output is input_ | |
True | |
>>> transformer = group_fields('user', ['name', 'nick', 'gender', 'user']) | |
>>> input_ = {'name': 'Ivan', 'gender': 'm', 'age': 30, 'user': 'iv4'} | |
>>> output = transformer(input_) | |
>>> output == {'age': 30, 'user': {'name': 'Ivan', 'gender': 'm', 'user': 'iv4'}} # NoQA | |
True | |
>>> output is input_ | |
True | |
""" | |
field_names = set(field_names) | |
def transformer(item: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # NoQA | |
group = {} | |
for field_name in field_names & item.keys(): | |
group[field_name] = item.pop(field_name) | |
item[group_name] = group | |
return item | |
return transformer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment