Skip to content

Instantly share code, notes, and snippets.

>>> import pandas as pd
>>> import numpy as np
>>> series = pd.Series([12,32,54,2, np.nan, "a string", 6])
>>> series
0 12
1 32
2 54
3 2
4 NaN
5 a string
>>> series = pd.Series([1,2,np.nan, 4])
>>> series
0 1.0
1 2.0
2 NaN
3 4.0
dtype: float64
>>> d = {
"stats": pd.Series(np.arange(10,15,1.0)),
"year": pd.Series(["2012","2007","2012","2003"]),
"intake": pd.Series(["SUMMER","WINTER","WINTER","SUMMER"]),
}
>>> df = pd.DataFrame(d)
>>> df
>>> import pandas as pd
>>> nrows, ncols = 100000, 100
>>> rng = np.random.RandomState(42)
>>> df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols))
for i in range(4))
>>> %timeit df1 + df2 + df3 + df4
10 loops, best of 3: 103.1 ms per loop
>>> %timeit pd.eval('df1 + df2 + df3 + df4')
10 loops, best of 3: 53.6 ms per loop
>>> np.allclose(df1 + df2 + df3 + df4,d.eval('df1 + df2 + df3 + df4'))
True
>>> df = pd.read_csv('companies.csv')
>>> df.head()
>>> %timeit df.query("vertical == 'B2B' and year == 2009")
875 µs ± 24.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
>>> %timeit df[(df['vertical'] == 'B2B') & (df['year'] == 2009)]
1.69 ms ± 57 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)