Last active
July 25, 2016 22:27
-
-
Save flolas/8facfe9c01492c3e92f2e00559eab2d1 to your computer and use it in GitHub Desktop.
Unpack Pandas series to multiple Pandas series(unpacking cols)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def unpack_col(col_to_unpack, df_to_append = None, header = 'col', sep=',', na_value=''): | |
import pandas as pd | |
unpacked_cols = col_to_unpack.fillna(na_value).apply(lambda x: pd.Series(x.split(','))).fillna(na_value) | |
#add dynamic columns names based on # of rows and parameter header passed for prefix (header_#) | |
col_names = [] | |
for i in unpacked_cols.columns: | |
col_names.append(header + '_' + str(i)) | |
unpacked_cols.columns = col_names | |
if isinstance(df_to_append, pd.DataFrame): | |
#return df concatenated with previously unpacked columns | |
return pd.concat([df_to_append, unpacked_cols], axis=1) | |
else: | |
#return df only with unpacked columns | |
return unpacked_cols | |
def unpack_col_normalized(col_to_unpack, df_to_append, sep=',', na_value=''): | |
s = df_to_append[col_to_unpack].str.split(sep).apply(pd.Series,1).stack() | |
s.index = s.index.droplevel(-1) | |
s.name = str(df_to_append[col_to_unpack].name) | |
return df_to_append.drop(col_to_unpack,1).join(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment