Skip to content

Instantly share code, notes, and snippets.

@mrbungie
Last active July 10, 2017 21:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrbungie/d422ac8c39f5c8d1e7bba8b24a1f0f74 to your computer and use it in GitHub Desktop.
Save mrbungie/d422ac8c39f5c8d1e7bba8b24a1f0f74 to your computer and use it in GitHub Desktop.
discretizer_sql_sentence.py
def create_discretizer_sql(discr_df, table_name, other_cols=[]):
from bciutils.beta_v2.transform.sampling import na_filler
sql_string = "SELECT {other_columns}".format(other_columns=", ".join(other_cols))
if len(other_cols) > 0:
sql_string += ',\n'
columns = list(discr_df["feature_name"].unique())
for column in columns:
sql_string += "CASE "
col_df = discr_df[discr_df["feature_name"] == column]
categories = list(col_df["category"].fillna('NA').unique())
last_group = 'NULL'
if categories != ['NA']:
for group in col_df.to_dict(orient="records"):
sql_string += "WHEN {column} = '{category}' THEN {group_num} ".format(column=column, category=group["category"], group_num=group["grupo"])
else:
alone_numbers_groups = col_df[col_df["max"] == col_df["min"]].to_dict("records")
for group in alone_numbers_groups:
if group["min"] == na_filler:
sql_string += "WHEN {column} IS NULL THEN {group_num} ".format(column=column, group_num=group["grupo"])
else:
sql_string += "WHEN {column} = {number} THEN {group_num} ".format(column=column, number=group["max"], group_num=group["grupo"])
other_groups = col_df[col_df["max"] != col_df["min"]].sort_values("grupo").to_dict("records")
for group in other_groups:
if group["min"] == na_filler:
sql_string += "WHEN {column} IS NULL OR {column} <= {max} THEN {group_num} ".format(column=column, max=group["max"], min=group["min"], group_num=group["grupo"])
elif len(other_groups) == 1 or group["grupo"] != col_df[col_df["max"] != col_df["min"]]["grupo"].max():
sql_string += "WHEN {column} <= {max} THEN {group_num} ".format(column=column, max=group["max"], group_num=group["grupo"])
else:
last_group = group["grupo"]
sql_string += "ELSE {group_num} END AS {final_column_name},\n".format(group_num=last_group, final_column_name=column)
return sql_string[:-2] + "\nFROM {table_name};".format(table_name=table_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment