Skip to content

Instantly share code, notes, and snippets.

@dutc
Created March 3, 2021 02:50
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dutc/a64d962f663c5eac203349f24690f60b to your computer and use it in GitHub Desktop.
Save dutc/a64d962f663c5eac203349f24690f60b to your computer and use it in GitHub Desktop.
`pandas` problem of the day: analysis (with `numpy.meshgrid`‽)
from pandas import Series
s = Series({
4: 89.00,
6: 109.99,
8: 149.14,
10: 218.99,
12: 239.09,
14: 279.99,
16: 329.99,
18: 409.99,
}, name='prices')
print(s.round(2))
from pandas import Series, DataFrame
from numpy import array, meshgrid, multiply
s = Series({
4: 89.00,
6: 109.99,
8: 149.14,
10: 218.99,
12: 239.09,
14: 279.99,
16: 329.99,
18: 409.99,
}, name='prices')
df = DataFrame(
multiply(*meshgrid(s / s.index, s.index)).T,
index=s.index, columns=s.index,
) - s
res = df.loc[(minsize:=12):][df.columns[df.columns >= minsize]]
print(res.round(2))
from pandas import Series
s = Series({
4: 89.00,
6: 109.99,
8: 149.14,
10: 218.99,
12: 239.09,
14: 279.99,
16: 329.99,
18: 409.99,
}, name='prices')
(per_terabyte := s / s.index).name = '$/TB'
(per_dollar := s.index / s).name = 'TB/$'
print(per_terabyte.round(2))
print(per_dollar.round(4))
from pandas import Series, DataFrame
from numpy import diag, isclose
s = Series({
4: 89.00,
6: 109.99,
8: 149.14,
10: 218.99,
12: 239.09,
14: 279.99,
16: 329.99,
18: 409.99,
}, name='prices')
(per_terabyte := s / s.index).name = '$/TB'
abs_df = DataFrame({
idx: s.index * per_tb - s
for idx, per_tb in per_terabyte.iteritems()
}).T # over/under-payment in absolute-$ vs list-price
pct_df = abs_df / s # over/under-payment in percentage-$ vs list-price
assert isclose(diag(abs_df), 0).all() and isclose(diag(pct_df), 0).all()
print(abs_df.round(2))
print((100 * pct_df).round(0))
# narrowed our actual options…
res = abs_df.loc[(minsize:=12):][abs_df.columns[abs_df.columns >= minsize]]
print(res.round(2))
from pandas import Series, DataFrame
from numpy import array, tile, repeat
s = Series({
4: 89.00,
6: 109.99,
8: 149.14,
10: 218.99,
12: 239.09,
14: 279.99,
16: 329.99,
18: 409.99,
}, name='prices')
df = DataFrame(
(tile((s / s.index).values, sz := s.index.size).reshape(sz, sz)
* repeat(s.index.values, sz).reshape(sz, sz)).T,
index=s.index, columns=s.index,
) - s
print(df)
from xarray import DataArray
from numpy import array, tile, repeat
prices = array([89.00, 109.99, 149.14, 218.99, 239.09, 279.99, 329.99, 409.99,])
sizes = array([4, 6, 8, 10, 12, 14, 16, 18,])
da = DataArray((
tile(per_tb := prices/sizes, sz := prices.size).reshape(sz, sz)
* repeat(sizes, sz).reshape(sz, sz)).T - prices,
dims=['purchase', 'compare'],
coords={
'purchase': sizes,
'compare': sizes,
},)
res = da.sel(purchase=(sz:=sizes[sizes > 10]), compare=sz)
print(res.round(2))
from numpy import array, tile, repeat
xs = array([1, 2, 3])
ys = array([4, 5, 6])
print(tile(xs, ys.size))
print(repeat(ys, xs.size))
from numpy import array, tile, repeat, meshgrid
xs = array([1, 2, 3])
ys = array([4, 5, 6])
a = tile(xs, ys.size).reshape(ys.size, xs.size)
b = repeat(ys, xs.size).reshape(ys.size, xs.size)
c, d = meshgrid(xs, ys)
print(a, b, c, d, sep='\n')
assert (a == c).all() and (b == d).all() and ((a * b) == (c * d)).all()
from xarray import DataArray
from numpy import array, meshgrid, multiply
prices = array([89.00, 109.99, 149.14, 218.99, 239.09, 279.99, 329.99, 409.99,])
sizes = array([4, 6, 8, 10, 12, 14, 16, 18,])
da = DataArray(
multiply(*meshgrid(prices / sizes, sizes)).T - prices,
dims=['purchase', 'compare'],
coords={
'purchase': sizes,
'compare': sizes,
},)
res = da.sel(purchase=(sz:=sizes[sizes > 10]), compare=sz)
print(res.round(2))