Skip to content

Instantly share code, notes, and snippets.

@flolas
Last active July 25, 2016 22:27
Show Gist options
  • Save flolas/8facfe9c01492c3e92f2e00559eab2d1 to your computer and use it in GitHub Desktop.
Save flolas/8facfe9c01492c3e92f2e00559eab2d1 to your computer and use it in GitHub Desktop.
Unpack Pandas series to multiple Pandas series(unpacking cols)
def unpack_col(col_to_unpack, df_to_append = None, header = 'col', sep=',', na_value=''):
import pandas as pd
unpacked_cols = col_to_unpack.fillna(na_value).apply(lambda x: pd.Series(x.split(','))).fillna(na_value)
#add dynamic columns names based on # of rows and parameter header passed for prefix (header_#)
col_names = []
for i in unpacked_cols.columns:
col_names.append(header + '_' + str(i))
unpacked_cols.columns = col_names
if isinstance(df_to_append, pd.DataFrame):
#return df concatenated with previously unpacked columns
return pd.concat([df_to_append, unpacked_cols], axis=1)
else:
#return df only with unpacked columns
return unpacked_cols
def unpack_col_normalized(col_to_unpack, df_to_append, sep=',', na_value=''):
s = df_to_append[col_to_unpack].str.split(sep).apply(pd.Series,1).stack()
s.index = s.index.droplevel(-1)
s.name = str(df_to_append[col_to_unpack].name)
return df_to_append.drop(col_to_unpack,1).join(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment