Skip to content

Instantly share code, notes, and snippets.

@yamyamyuo
Created October 15, 2018 11:32
Show Gist options
  • Save yamyamyuo/d895b7cc4a868ddf116b9d224963bd67 to your computer and use it in GitHub Desktop.
Save yamyamyuo/d895b7cc4a868ddf116b9d224963bd67 to your computer and use it in GitHub Desktop.
reorder dataframe column
"""
sort by column in multi level dataframes
"""
import pandas
data = {'active_days': [2,6,1, 2],
'province': ['beijing', 'shanghai', 'shanghai','beijing'],
'__timestamp': ['2018-09-17', '2018-09-24', '2018-08-20', '2018-09-17'],
'sum(active_days_user)': [1033604, 256682, 3613940, 1]}
df = pandas.DataFrame.from_dict(data=data)
print "original df:\n"
print df
"""
original df:
__timestamp active_days province sum(active_days_user)
0 2018-09-17 2 beijing 1033604
1 2018-09-24 6 shanghai 256682
2 2018-08-20 1 shanghai 3613940
3 2018-09-17 2 beijing 1
"""
pt = df.pivot_table(index='__timestamp',
columns=['active_days', 'province'],
values='sum(active_days_user)',fill_value=0, aggfunc=sum)
print "\npivot table:\n"
print pt
"""
pivot table:
active_days 1 2 6
province shanghai beijing shanghai
__timestamp
2018-08-20 3613940 0 0
2018-09-17 0 1033605 0
2018-09-24 0 0 256682
"""
print "\ncolumns:\n"
print pt.columns
"""
columns:
MultiIndex(levels=[[1, 2, 6], [u'beijing', u'shanghai']],
labels=[[0, 1, 2], [1, 0, 1]],
names=[u'active_days', u'province'])
"""
sorted_level = pt.columns.sortlevel(level=['active_days'], ascending=False, sort_remaining=True)
print "\nsorted level:\n"
print pt[sorted_level[0]]
"""
sorted level:
active_days 6 2 1
province shanghai beijing shanghai
__timestamp
2018-08-20 0 0 3613940
2018-09-17 0 1033605 0
2018-09-24 256682 0 0
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment