Skip to content

Instantly share code, notes, and snippets.

@nebuta
Created April 13, 2019 16:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nebuta/fed447861b2e40ee8fcb122ff88b313a to your computer and use it in GitHub Desktop.
Save nebuta/fed447861b2e40ee8fcb122ff88b313a to your computer and use it in GitHub Desktop.
import csv
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import rc
font = {'family':'IPAexGothic'}
rc('font', **font)
# https://stackoverflow.com/questions/39239087/run-a-chi-square-test-with-observation-and-expectation-counts-and-get-confidence
def diffprop(obs):
"""
`obs` must be a 2x2 numpy array.
Returns:
delta
The difference in proportions
ci
The Wald 95% confidence interval for delta
corrected_ci
Yates continuity correction for the 95% confidence interval of delta.
"""
n1, n2 = obs.sum(axis=1)
prop1 = obs[0,0] / n1
prop2 = obs[1,0] / n2
delta = prop1 - prop2
# Wald 95% confidence interval for delta
se = np.sqrt(prop1*(1 - prop1)/n1 + prop2*(1 - prop2)/n2)
ci = (delta - 1.96*se, delta + 1.96*se)
# Yates continuity correction for confidence interval of delta
correction = 0.5*(1/n1 + 1/n2)
corrected_ci = (ci[0] - correction, ci[1] + correction)
return delta, ci, corrected_ci
def one_row(r,j):
# mt: male total, fp: female pass, ff: female fail, etc.
v = [int(s.replace(',','').replace('-','0')) for s in r[j:j+6]]
return {'mt': v[0],'ft': v[1], 'mp': v[3], 'fp': v[4], 'mf': v[0] - v[3], 'ff': v[1] - v[4]}
def get_of_year(v):
return np.array([[v['mp'],v['mf']],[v['fp'],v['ff']]])
def get_confidence_interval(v):
m_bottom, m_up = stats.binom.interval(alpha=0.95, n=v['mt'], p=v['mp']/v['mt'], loc=0)
f_bottom, f_up = stats.binom.interval(alpha=0.95, n=v['ft'], p=v['fp']/v['ft'], loc=0)
return np.array(
[[v['mp']/v['mt'],m_bottom/v['mt'],m_up/v['mt']],
[v['fp']/v['ft'],f_bottom/v['ft'],f_up/v['ft']]])
values = {}
with open('data.csv') as f:
reader = csv.reader(f)
for _ in range(3):
next(reader)
for row in reader:
if row[3] == '合計':
vss = [one_row(row,loc) for i,loc in enumerate([4,12,20,28,36,44])]
vs_total = {'mt': sum([v['mt'] for v in vss]),'ft': sum([v['ft'] for v in vss]),
'mp': sum([v['mp'] for v in vss]),'fp': sum([v['fp'] for v in vss]),
'mf': sum([v['mf'] for v in vss]),'ff': sum([v['ff'] for v in vss])}
vss.append(vs_total)
values[row[2]] = vss
count = 1
figcount = 1
plt.subplots_adjust(hspace=0.7)
delta_cis = []
names = []
# plt.figure(figsize=(15,12))
for name,vss in values.items():
for vs,year in zip(vss,[2018,2017,2016,2015,2014,2013,'Total']):
if year != 'Total':
continue
try:
if count > 12:
plt.savefig('fig %d.pdf' % figcount)
plt.clf()
figcount += 1
# plt.show()
# plt.figure(figsize=(15,12))
plt.subplots_adjust(hspace=0.7)
count = 1
obs = get_of_year(vs)
chi2, p, dof, ex = stats.chi2_contingency(obs,correction=False)
print(year,name,'%.3f' % p)
print(obs)
m_ci,f_ci = get_confidence_interval(vs)
delta,delta_ci,_ = diffprop(obs)
delta_cis.append(delta_ci)
names.append(name)
print(delta,delta_ci)
ax = plt.subplot(3,4,count)
if count % 4 != 1:
ax.get_yaxis().set_visible(False)
plt.bar([0,1],[m_ci[0],f_ci[0]],tick_label=['男','女'],alpha=0.4)
plt.errorbar([0,1],[m_ci[0],f_ci[0]], yerr=[m_ci[0]-m_ci[1],f_ci[0]-f_ci[1]],fmt='.',markersize=0)
plt.title(name,fontsize=10)
plt.ylim([0,0.5])
count += 1
except Exception as e:
print('error',e)
plt.savefig('fig %d.pdf' % figcount)
plt.clf()
figcount += 1
# plt.show()
plt.close()
# plt.figure()
fig, ax = plt.subplots(figsize=(7,14))
ys = [(ds[0]+ds[1])/2 for ds in delta_cis]
yerr = [(ds[1]-ds[0])/2 for ds in delta_cis]
colors = []
for y,n,e in zip(ys,range(len(names)),yerr):
c = (0,0,0)
if y - e > 0:
c = (0,0,1)
elif y + e < 0:
c = (1,0,0)
colors.append(c)
plt.errorbar(y,n,xerr=e,fmt='.',c=c,elinewidth=4,alpha=0.4)
ax.yaxis.set_ticks(np.arange(len(names)))
plt.plot([0,0],[-1,len(names)],ls='--',c=(0.3,0.3,0.3))
plt.ylim([-1,len(names)])
plt.xlim([-0.15,0.15])
plt.tick_params(labeltop=True,top=True)
ax.set_yticklabels(names,fontsize=6)
for ytick, c in zip(ax.get_yticklabels(),colors):
ytick.set_color(c)
ax.invert_yaxis()
ax.set_aspect(aspect=0.01)
plt.savefig('all_difference.pdf')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment