-
-
Save dutc/a64d962f663c5eac203349f24690f60b to your computer and use it in GitHub Desktop.
`pandas` problem of the day: analysis (with `numpy.meshgrid`‽)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import Series | |
s = Series({ | |
4: 89.00, | |
6: 109.99, | |
8: 149.14, | |
10: 218.99, | |
12: 239.09, | |
14: 279.99, | |
16: 329.99, | |
18: 409.99, | |
}, name='prices') | |
print(s.round(2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import Series, DataFrame | |
from numpy import array, meshgrid, multiply | |
s = Series({ | |
4: 89.00, | |
6: 109.99, | |
8: 149.14, | |
10: 218.99, | |
12: 239.09, | |
14: 279.99, | |
16: 329.99, | |
18: 409.99, | |
}, name='prices') | |
df = DataFrame( | |
multiply(*meshgrid(s / s.index, s.index)).T, | |
index=s.index, columns=s.index, | |
) - s | |
res = df.loc[(minsize:=12):][df.columns[df.columns >= minsize]] | |
print(res.round(2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import Series | |
s = Series({ | |
4: 89.00, | |
6: 109.99, | |
8: 149.14, | |
10: 218.99, | |
12: 239.09, | |
14: 279.99, | |
16: 329.99, | |
18: 409.99, | |
}, name='prices') | |
(per_terabyte := s / s.index).name = '$/TB' | |
(per_dollar := s.index / s).name = 'TB/$' | |
print(per_terabyte.round(2)) | |
print(per_dollar.round(4)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import Series, DataFrame | |
from numpy import diag, isclose | |
s = Series({ | |
4: 89.00, | |
6: 109.99, | |
8: 149.14, | |
10: 218.99, | |
12: 239.09, | |
14: 279.99, | |
16: 329.99, | |
18: 409.99, | |
}, name='prices') | |
(per_terabyte := s / s.index).name = '$/TB' | |
abs_df = DataFrame({ | |
idx: s.index * per_tb - s | |
for idx, per_tb in per_terabyte.iteritems() | |
}).T # over/under-payment in absolute-$ vs list-price | |
pct_df = abs_df / s # over/under-payment in percentage-$ vs list-price | |
assert isclose(diag(abs_df), 0).all() and isclose(diag(pct_df), 0).all() | |
print(abs_df.round(2)) | |
print((100 * pct_df).round(0)) | |
# narrowed our actual options… | |
res = abs_df.loc[(minsize:=12):][abs_df.columns[abs_df.columns >= minsize]] | |
print(res.round(2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import Series, DataFrame | |
from numpy import array, tile, repeat | |
s = Series({ | |
4: 89.00, | |
6: 109.99, | |
8: 149.14, | |
10: 218.99, | |
12: 239.09, | |
14: 279.99, | |
16: 329.99, | |
18: 409.99, | |
}, name='prices') | |
df = DataFrame( | |
(tile((s / s.index).values, sz := s.index.size).reshape(sz, sz) | |
* repeat(s.index.values, sz).reshape(sz, sz)).T, | |
index=s.index, columns=s.index, | |
) - s | |
print(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from xarray import DataArray | |
from numpy import array, tile, repeat | |
prices = array([89.00, 109.99, 149.14, 218.99, 239.09, 279.99, 329.99, 409.99,]) | |
sizes = array([4, 6, 8, 10, 12, 14, 16, 18,]) | |
da = DataArray(( | |
tile(per_tb := prices/sizes, sz := prices.size).reshape(sz, sz) | |
* repeat(sizes, sz).reshape(sz, sz)).T - prices, | |
dims=['purchase', 'compare'], | |
coords={ | |
'purchase': sizes, | |
'compare': sizes, | |
},) | |
res = da.sel(purchase=(sz:=sizes[sizes > 10]), compare=sz) | |
print(res.round(2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy import array, tile, repeat | |
xs = array([1, 2, 3]) | |
ys = array([4, 5, 6]) | |
print(tile(xs, ys.size)) | |
print(repeat(ys, xs.size)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy import array, tile, repeat, meshgrid | |
xs = array([1, 2, 3]) | |
ys = array([4, 5, 6]) | |
a = tile(xs, ys.size).reshape(ys.size, xs.size) | |
b = repeat(ys, xs.size).reshape(ys.size, xs.size) | |
c, d = meshgrid(xs, ys) | |
print(a, b, c, d, sep='\n') | |
assert (a == c).all() and (b == d).all() and ((a * b) == (c * d)).all() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from xarray import DataArray | |
from numpy import array, meshgrid, multiply | |
prices = array([89.00, 109.99, 149.14, 218.99, 239.09, 279.99, 329.99, 409.99,]) | |
sizes = array([4, 6, 8, 10, 12, 14, 16, 18,]) | |
da = DataArray( | |
multiply(*meshgrid(prices / sizes, sizes)).T - prices, | |
dims=['purchase', 'compare'], | |
coords={ | |
'purchase': sizes, | |
'compare': sizes, | |
},) | |
res = da.sel(purchase=(sz:=sizes[sizes > 10]), compare=sz) | |
print(res.round(2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tweeted about here:
pandas
problem of the day: suppose we need to buy a hard drive to store our burgeoning collection of Arch Linux ISOs (among other things,) and we want to find the best “deal.” How do we structure this analysis, as fluently as possible?