Last active
April 11, 2018 16:09
-
-
Save JoaoCarabetta/48c46239a54955c9c30b5461d08ec704 to your computer and use it in GitHub Desktop.
Split list values to rows on pandas enforcing output type
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_data_frame_list(df, | |
target_column, | |
output_type=float): | |
''' | |
Accepts a column with multiple types and splits list variables to several rows. | |
df: dataframe to split | |
target_column: the column containing the values to split | |
output_type: type of all outputs | |
returns: a dataframe with each entry for the target column separated, with each element moved into a new row. | |
The values in the other columns are duplicated across the newly divided rows. | |
''' | |
row_accumulator = [] | |
def split_list_to_rows(row): | |
split_row = row[target_column] | |
if isinstance(split_row, list): | |
for s in split_row: | |
new_row = row.to_dict() | |
new_row[target_column] = output_type(s) | |
row_accumulator.append(new_row) | |
else: | |
new_row = row.to_dict() | |
new_row[target_column] = output_type(split_row) | |
row_accumulator.append(new_row) | |
df.apply(split_list_to_rows, axis=1) | |
new_df = pd.DataFrame(row_accumulator) | |
return new_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment