karpanGit/pandas_ungroup.py

## pandas_ungroup.py
# pandas ungroup operation

import pandas as pd
# create example dataframe df1.loc[3,'b'] contains duplicate values that will be dropped
df1 = pd.DataFrame({'a':[1,2,3,4],'b':['a;b;c','g','j;w','h;j;h']})
print(df1)
'''
   a      b
0  1  a;b;c
1  2      g
2  3    j;w
3  4  h;j;h
'''

# ungroup the concatenated cells (operations listed separately for clarity)
res = df1['b'].str.split(';',expand=True).stack()
res.index = res.index.droplevel(level=1)
res.rename('b (new)', inplace=True)
res = res.groupby(level=0).unique().explode()
print(res)
'''
0    a
0    b
0    c
1    g
2    j
2    w
3    h
3    j
Name: b (new), dtype: object
'''

# join back to the dataframe
df1 = df1.join(res)
print(df1)
'''
   a      b b (new)
0  1  a;b;c       a
0  1  a;b;c       b
0  1  a;b;c       c
1  2      g       g
2  3    j;w       j
2  3    j;w       w
3  4  h;j;h       h
3  4  h;j;h       j
'''
	# pandas ungroup operation

	import pandas as pd
	# create example dataframe df1.loc[3,'b'] contains duplicate values that will be dropped
	df1 = pd.DataFrame({'a':[1,2,3,4],'b':['a;b;c','g','j;w','h;j;h']})
	print(df1)
	'''
	a b
	0 1 a;b;c
	1 2 g
	2 3 j;w
	3 4 h;j;h
	'''

	# ungroup the concatenated cells (operations listed separately for clarity)
	res = df1['b'].str.split(';',expand=True).stack()
	res.index = res.index.droplevel(level=1)
	res.rename('b (new)', inplace=True)
	res = res.groupby(level=0).unique().explode()
	print(res)
	'''
	0 a
	0 b
	0 c
	1 g
	2 j
	2 w
	3 h
	3 j
	Name: b (new), dtype: object
	'''

	# join back to the dataframe
	df1 = df1.join(res)
	print(df1)
	'''
	a b b (new)
	0 1 a;b;c a
	0 1 a;b;c b
	0 1 a;b;c c
	1 2 g g
	2 3 j;w j
	2 3 j;w w
	3 4 h;j;h h
	3 4 h;j;h j
	'''