Last active
July 25, 2018 15:48
-
-
Save dkapitan/fa8566265d77c726f4f4ba103b4e817e to your computer and use it in GitHub Desktop.
Pandas tips & tricks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = pd.DataFrame({ | |
'Date': ['2015-05-08', '2015-05-07', '2015-05-06', '2015-05-05', '2015-05-08', '2015-05-07', '2015-05-06', '2015-05-05'], | |
'Sym': ['aapl', 'aapl', 'aapl', 'aapl', 'aaww', 'aaww', 'aaww', 'aaww'], | |
'Data2': [11, 8, 10, 15, 110, 60, 100, 40], | |
'Data3': [5, 8, 6, 1, 50, 100, 60, 120]}) | |
df['Data4'] = df['Data3'].groupby(df['Date']).transform('sum') | |
df | |
# Out[74]: | |
# Data2 Data3 Date Sym Data4 | |
# 0 11 5 2015-05-08 aapl 55 | |
# 1 8 8 2015-05-07 aapl 108 | |
# 2 10 6 2015-05-06 aapl 66 | |
# 3 15 1 2015-05-05 aapl 121 | |
# 4 110 50 2015-05-08 aaww 55 | |
# 5 60 100 2015-05-07 aaww 108 | |
# 6 100 60 2015-05-06 aaww 66 | |
# 7 40 120 2015-05-05 aaww 121 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# How to apply list of functions on pd.Dataframe# How t | |
# https://stackoverflow.com/questions/48767067/how-to-pass-list-of-custom-functions-to-pandas-dataframe-aggregate | |
import pandas as pd | |
import numpy as np | |
from scipy.stats import trim_mean | |
df = pd.DataFrame(np.random.randn(100, 3), columns=['A', 'B', 'C'], index=pd.date_range('1/1/2000', periods=100)) | |
# this works as expected | |
print(df.agg([np.sum, np.mean])) | |
# now with a different function, works also | |
print(df.agg(lambda x: trim_mean(x, 0.2))) | |
# apply also works | |
print(df.apply(lambda x: trim_mean(x, 0.2))) | |
# now with list of lambda: doesn't work | |
# df.apply([lambda x: trim_mean(x, 0.2)]) # tuple index out of range | |
# df.agg([lambda x: trim_mean(x, 0.2)]) # tuple index out of range | |
# solution: wrap list of functions in lambda# solut | |
c = ['trim_mean','mean','sum'] | |
print (df.agg(lambda x: pd.Series([trim_mean(x, 0.2), np.mean(x), np.sum(x)], index=c))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment