Skip to content

Instantly share code, notes, and snippets.

@chendaniely
Last active January 15, 2022 19:53
Show Gist options
  • Save chendaniely/93845857e671da9666ee80bd0eb7002e to your computer and use it in GitHub Desktop.
Save chendaniely/93845857e671da9666ee80bd0eb7002e to your computer and use it in GitHub Desktop.
Pandas assign inplace example
# tl;dr: you can't assign in place because inplace returns None
import pandas as pd
dat = pd.util.testing.makeMixedDataFrame()
print(dat)
# A B C D
# 0 0.0 0.0 foo1 2009-01-01
# 1 1.0 1.0 foo2 2009-01-02
# 2 2.0 0.0 foo3 2009-01-05
# 3 3.0 1.0 foo4 2009-01-06
# 4 4.0 0.0 foo5 2009-01-07
# individual assignment with [ ]
dat1 = dat.copy()
dat1["new_col_1"] = dat1["A"] + dat1["B"] ## create new column
dat1["new_col_2"] = dat1["new_col_1"]*10 ## use new column
dat1 = dat1.loc[dat1["new_col_1"] >= 2] ## filter on existing column
print(dat1)
# A B C D new_col_1 new_col_2
# 1 1.0 1.0 foo2 2009-01-02 2.0 20.0
# 2 2.0 0.0 foo3 2009-01-05 2.0 20.0
# 3 3.0 1.0 foo4 2009-01-06 4.0 40.0
# 4 4.0 0.0 foo5 2009-01-07 4.0 40.0
# assign back to dataframe works just fine
dat2 = dat.copy()
dat2 = dat2.assign(a = 3,
b = lambda x: x["a"] * 10) ## using new column needs lambda notation
print(dat2)
# A B C D a b
# 0 0.0 0.0 foo1 2009-01-01 3 30
# 1 1.0 1.0 foo2 2009-01-02 3 30
# 2 2.0 0.0 foo3 2009-01-05 3 30
# 3 3.0 1.0 foo4 2009-01-06 3 30
# 4 4.0 0.0 foo5 2009-01-07 3 30
# case for "inplace"
dat2 = dat.copy()
dat2 = dat2.assign(new_col_1 = lambda x: x["A"] + x["B"],
new_col_2 = lambda x: x["new_col_1"]*10
).loc[dat2["new_col_1"] >= 2]
dat2
# KeyError: 'new_col_1' ## from the .loc call
# you would need to re-write the above as such
dat2 = dat.copy()
dat2 = dat2.assign(new_col_1 = lambda x: x["A"] + x["B"],
new_col_2 = lambda x: x["new_col_1"]*10
)
dat2 = dat2.loc[dat2["new_col_1"] >= 2]
print(dat2)
# A B C D new_col_1 new_col_2
# 1 1.0 1.0 foo2 2009-01-02 2.0 20.0
# 2 2.0 0.0 foo3 2009-01-05 2.0 20.0
# 3 3.0 1.0 foo4 2009-01-06 4.0 40.0
# 4 4.0 0.0 foo5 2009-01-07 4.0 40.0
# what it would look like with an "inplace_" parameter
# "inplace_" instead of "inplace" just incase there's a column named "inpalce"
# this woudln't work because normally inplace returns None
dat2 = dat.copy()
dat2 = (dat2
.assign(new_col_1 = lambda x: x["A"] + x["B"],
new_col_2 = lambda x: x["new_col_1"]*10,
inplace_ = True
)
.loc[dat2["new_col_1"] >= 2]
print(dat2)
# would expect the output to be the same as dat1 above:
# A B C D new_col_1 new_col_2
# 1 1.0 1.0 foo2 2009-01-02 2.0 20.0
# 2 2.0 0.0 foo3 2009-01-05 2.0 20.0
# 3 3.0 1.0 foo4 2009-01-06 4.0 40.0
# 4 4.0 0.0 foo5 2009-01-07 4.0 40.0
# Other methods that dont require direct reference would work
# below i show `.drop()`
dat3 = dat.copy()
dat3 = (dat3
.assign(new_col_1 = lambda x: x["A"] + x["B"],
new_col_2 = lambda x: x["new_col_1"]*10)
.drop(columns=["A", "B", "C"])
)
print(dat3)
# D new_col_1 new_col_2
# 0 2009-01-01 0.0 0.0
# 1 2009-01-02 2.0 20.0
# 2 2009-01-05 2.0 20.0
# 3 2009-01-06 4.0 40.0
# 4 2009-01-07 4.0 40.0
# .loc will stil fail in the example, even if you put inplace within drop call
# this is because inplace retuns None
# instead you would fix all of this by using query
dat3 = dat.copy()
dat3 = (dat3
.assign(new_col_1 = lambda x: x["A"] + x["B"],
new_col_2 = lambda x: x["new_col_1"]*10)
.drop(columns=["A", "B", "C"])
.query('new_col_1 >= 2')
)
print(dat3)
# D new_col_1 new_col_2
# 1 2009-01-02 2.0 20.0
# 2 2009-01-05 2.0 20.0
# 3 2009-01-06 4.0 40.0
# 4 2009-01-07 4.0 40.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment