Skip to content

Instantly share code, notes, and snippets.

@winiar93
Last active January 10, 2023 15:50
Show Gist options
  • Save winiar93/e6b9d8878b8b224fe8a9ec08530beafa to your computer and use it in GitHub Desktop.
Save winiar93/e6b9d8878b8b224fe8a9ec08530beafa to your computer and use it in GitHub Desktop.
Utility functions that generate a dictionary with column name and corresponding varchar value.
import pandas as pd
import re
#example string
dtypes_correction = """
[sales] --5
,[id] --50
,[project_id]--50
,[another_column]--31
"""
def dtypes_updater(factor: int, dtypes_correction: str):
dtypes = {}
for d in dtypes_correction.split(","):
new_string = re.sub(r"[\([{})\]]", "", d)
d = new_string.split("--")
val = d[1].replace("\n", "")
dtypes[d[0].strip()] = f"varchar({int(val)* 2 })"
print(f"dtypes = {dtypes}")
def get_df_varchars(df: pd.DataFrame, excess:int=2):
"""Using maximum lengh of data in columns, function genreates varchar dtypes dictionary."""
max_length_object_cols = {col: df.loc[:, col].astype(str).apply(len).max() for col in df.columns}
for x in max_length_object_cols:
maximum = int(max_length_object_cols[x])
val = (2 * round(maximum/2)) * excess
if val < 10:
val = 10
max_length_object_cols[x] = f'varchar({val})'
return max_length_object_cols
@winiar93
Copy link
Author

dtypes utility

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment