In :
df = pd.DataFrame({
"A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
"B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
"C": ["small", "large", "large", "small", "small", "large", "small", "small", "large"],
"D": [1, 2, 2, 3, 3, 4, 5, 6, 7]
})
In :
df
Out :
A B C D
0 foo one small 1
1 foo one large 2
2 foo one large 2
3 foo two small 3
4 foo two small 3
5 bar one large 4
6 bar one small 5
7 bar two small 6
8 bar two large 7
In :
pd.pivot_table(df, index='A', columns='B', values='C', aggfunc='count', fill_value=0)
Out :
B one two
A
bar 2 2
foo 3 2
In :
# Computa lo mismo que el anterior, pero en porcentajes
pd.pivot_table(df, index='A', columns='B', values='C',
aggfunc=lambda x: x.count() * 100 / df.shape[0], fill_value=0, margins=True)
Out :
B one two All
A
bar 22.222222 22.222222 44.444444
foo 33.333333 22.222222 55.555556
All 55.555556 44.444444 100.000000
In :
pd.pivot_table(df, index='A', columns='B', values='D', aggfunc='sum', fill_value=0)
Out :
B one two
A
bar 9 13
foo 5 6
Created
November 28, 2018 20:00
-
-
Save sanchezg/404b8161a9674dc45fd5178fa71adc1e to your computer and use it in GitHub Desktop.
pandas cheatsheet and common use cases
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment