Last active
February 15, 2019 16:44
-
-
Save adyork/696b091619c9c30b9b26a053fcedf78e to your computer and use it in GitHub Desktop.
Flatten column names (take multiple column names and flatten to one list)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
get_flat_header(group_results) | |
get_flat_header(group_results,combine=True) | |
* For use with pandas .groupby() and .agg() results | |
* For when you get back multilevel headers, and you just want one header list . | |
* Returns list of column names. | |
if combine=False (default), then it returns the column name as the last name in the list for that column. | |
e.g. | |
.agg({ | |
'orig colname':{ | |
'my name for col':'sum', | |
'num_rows': 'count' | |
} | |
becomes | |
('my name for col','num_rows') | |
if combine=True then it combines all names in list with an underscore. This is good if you didn't provide a name for the | |
for the agg operation. | |
e.g. | |
.agg('orig colname':['sum','count']) | |
becomes column names | |
('orig colname_sum','orig colname_count') | |
''' | |
def get_flat_header(group_results,combine=False): | |
header_array = group_results.columns.ravel() | |
#build new list | |
flat_header = list() | |
for colnames in list(header_array): | |
colnames = list(filter(None, colnames)) #remove blank ones | |
if combine: # if you didn't provide a name for the agg, this is best. | |
colname = '_'.join(colnames) | |
else: #use last name in list (if you provided a name for the agg, this is what you want.) | |
colname = colnames[-1] # take last element | |
flat_header.append(colname) | |
return flat_header | |
''' | |
Example 1: if you didn't provide names in agg() the use combine=True | |
group_results = df.groupby(['Group ID'],as_index=False).agg({'n instances':['sum','first','count']}) | |
list(group_results.columns.ravel()) | |
# [('Group ID', ''), ('n instances', 'sum'), ('n instances', 'first'), ('n instances', 'count')] | |
group_results.columns = get_flat_header(group_results,combine=True) | |
list(group_results.columns.ravel()) | |
# ['Group ID', 'n instances_sum', 'n instances_first', 'n instances_count'] | |
Example 2: If you provided column names in agg() | |
group_results = df.groupby(['Group ID'],as_index=False)\ | |
.agg({ | |
'n instances':{ | |
'my name for col':'sum', | |
'num_rows': 'count' | |
}, | |
'peptides including PTMs':{ | |
'num_peps_PTMs': 'nunique', | |
'peps_and_PTMs':lambda x : ';'.join(list(x)) | |
} | |
}) | |
# e.g. you have: | |
list(group_results.columns.ravel()) | |
# [('Group ID', ''), ('n instances', 'my name for col'), ('n instances', 'num_rows'), ('peptides including PTMs', 'num_peps_PTMs'), ('peptides including PTMs', 'peps_and_PTMs')] | |
group_results.columns = get_flat_header(group_results) | |
list(group_results.columns.ravel()) | |
# ['Group ID', 'my name for col', 'num_rows','num_peps_PTMs','peps_and_PTMs'] | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment