Skip to content

Instantly share code, notes, and snippets.

@sansagara

sansagara/tags.py Secret

Last active Nov 16, 2020
Embed
What would you like to do?
Tag-based feature engineering
import operator as op
from typing import Any, Dict, List, Union
from pyspark.sql import functions as f
def regex_like(inputs: str, values: Union[List[str], str], tag_name: str) -> pyspark.sql.Column:
"""
Adds a tag to this row if value matches the given regex.
"""
if isinstance(values, str):
values = [values]
return (
f.when(f.col(inputs).rlike(values), tag_name)
.otherwise("null")
.alias(tag_name)
)
def is_in(inputs: str, values: List[str], tag_name: str) -> pyspark.sql.Column:
"""
Adds a tag to this row if the value is in inputs array.
"""
return (
f.when(f.col(inputs).isin(values), tag_name)
.otherwise("null")
.alias(tag_name)
)
def greater_equal(inputs: str, values: Union[int, float], tag_name: str) -> pyspark.sql.Column:
"""
Adds a tag to this row if value is greater or equal than input.
"""
return (
f.when(f.col(inputs) >= f.lit(values), tag_name)
.otherwise("null")
.alias(tag_name)
)
def greater(inputs: str, values: Union[int, float], tag_name: str) -> pyspark.sql.Column:
"""
Adds tag as the column value if column is greater than value.
"""
return (
f.when(f.col(inputs) > f.lit(values), tag_name)
.otherwise("null")
.alias(tag_name)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.