Skip to content

Instantly share code, notes, and snippets.

@alanbernstein
Created April 27, 2021 18:21
Show Gist options
  • Save alanbernstein/b7c74fb87264fdfb8d6f3dd3dc1c99de to your computer and use it in GitHub Desktop.
Save alanbernstein/b7c74fb87264fdfb8d6f3dd3dc1c99de to your computer and use it in GitHub Desktop.
Are Texas-sized crunchberries really 3x as big?
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox
normal_samples_min_max_diam = [
[12, 14],
[13, 15],
[12, 15],
[13, 16],
[12, 14],
[12, 15],
[12, 15],
[12, 14],
[11, 15],
[10, 14],
]
texas_samples_min_max_diam = [
[15, 18],
[15, 18],
[16, 20],
[14, 19],
[15, 19],
[15, 19],
[14, 19],
[14, 19],
[15, 19],
[13, 17],
]
d1 = normal_samples_min_max_diam
d2 = texas_samples_min_max_diam
def make_hist_xy(ds):
hist = defaultdict(int)
for mn, mx in ds:
for d in range(mn, mx+1):
hist[d] += 1
x = sorted(hist.keys())
y = [hist[xx] for xx in x]
return np.array(x), np.array(y)
x1, y1 = make_hist_xy(d1)
x2, y2 = make_hist_xy(d2)
y1_norm = y1 / np.sum(y1)
y2_norm = y2 / np.sum(y2)
mean1 = np.sum(x1 * y1_norm)
mean2 = np.sum(x2 * y2_norm)
diameter_ratio = mean2 / mean1
volume_ratio = mean2 ** 3 / mean1 ** 3
#print(mean1, mean2)
#print(volume_ratio)
# cbrt(1.98) * mean1 = mean2
#plt.plot(x1, y1/np.sum(y1), 'r', label='non-texas-sized')
#plt.plot(x2, y2/np.sum(y2), 'b', label='texas-sized')
plt.bar(x1, y1_norm, alpha=.5, facecolor='r', label='crunchberries')
plt.bar(x2, y2_norm, alpha=.5, facecolor='b', label='crunchberries (Texas sized)')
plt.plot(mean1, y1_norm[3], 'ro') # [3] is hardcoded interp1
plt.plot(mean2, y2_norm[3], 'bo') # [3] is hardcoded interp1
img_blue = mpimg.imread('crunchberries/crunchberry-blue.png')
img_red = mpimg.imread('crunchberries/crunchberry-red-scaled.png')
#ext_blue = [mean1, y1_norm[3], mean1+1, y1_norm[3]+.01]
#im = plt.imshow(img_blue, origin='lower', extent=ext_blue)
plt.text(mean1, y1_norm[3]+.01, ' ⌀ %4.2f mm' % mean1)
# texas_label = ' %4.2f = %4.2f * \sqrt{%4.2f}' % (mean2, volume_ratio, mean1)
texas_label = '⌀ %4.2f mm' % mean2
texas_label += '\n = %4.2f mm * %4.2f' % (mean1, diameter_ratio)
texas_label += '\n = %4.2f mm * ∛%4.2f' % (mean1, volume_ratio)
texas_label2 = '%4.2f < 3.0 — NO' % (volume_ratio)
# texas_label = r'\sqrt{2}'
plt.text(mean2+1, y2_norm[3]+.01, texas_label)
plt.text(mean2+1, y2_norm[3]-.01, texas_label2, fontweight='bold')
plt.ylim([0, .30])
plt.xlabel('diameter (mm)')
plt.ylabel('distribution')
title = 'Are Texas sized crunchberries really 3x bigger?'
# title += '\nNO. Sample size = (10, 10)'
plt.title(title)
plt.legend()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment