Skip to content

Instantly share code, notes, and snippets.

@sureshgorakala
Created December 10, 2015 00:29
Show Gist options
  • Save sureshgorakala/2e154edf53c4616f6955 to your computer and use it in GitHub Desktop.
Save sureshgorakala/2e154edf53c4616f6955 to your computer and use it in GitHub Desktop.
chi-square of Independence
import pandas
import numpy
import scipy.stats
import seaborn
import matplotlib.pyplot as plt
# any additional libraries would be imported here
data = pandas.read_csv('C:\\Suresh\\Blog Posts\\datasets\\nesarc_pds1134\\SPLITDATA\\CourseData.csv', low_memory=False)
print (len(data)) #number of observations (rows)
print (len(data.columns)) # number of variables (columns)
print(data.shape)
#setting variables you will be working with to numeric
data['AGE'] = pandas.to_numeric(data['AGE'], errors='coerce')
data['SEX'] = pandas.to_numeric(data['SEX'], errors='coerce')
data['DYSDX12'] = pandas.to_numeric(data['DYSDX12'], errors='coerce')
data['S4CQ18A'] = pandas.to_numeric(data['S4CQ18A'], errors='coerce')
data['S4CQ18B'] = pandas.to_numeric(data['S4CQ18B'], errors='coerce')
data['S4CQ18C'] = pandas.to_numeric(data['S4CQ18C'], errors='coerce')
data['S4CQ19A'] = pandas.to_numeric(data['S4CQ19A'], errors='coerce')
data['S4CQ19B'] = pandas.to_numeric(data['S4CQ19B'], errors='coerce')
data['S4CQ19C'] = pandas.to_numeric(data['S4CQ19C'], errors='coerce')
data['S2AQ8A'] = pandas.to_numeric(data['S2AQ8A'], errors='coerce')
data['S2AQ8B'] = pandas.to_numeric(data['S2AQ8B'], errors='coerce')
#DYSLIFE
#subset of data of all young adults between 18 and 25 who drank had dysthemia
# in the last 12 months
sub1= data[(data["AGE"] >=18) & (data["AGE"] <= 25) ]
print(sub1.shape)
#SETTING MISSING DATA
sub1['S4CQ18A']=sub1['S4CQ18A'].replace(9, numpy.nan)
sub1['S4CQ18B']=sub1['S4CQ18B'].replace(9, numpy.nan)
sub1['S4CQ18C']=sub1['S4CQ18C'].replace(9, numpy.nan)
sub1['S4CQ19A']=sub1['S4CQ19A'].replace(9, numpy.nan)
sub1['S4CQ19B']=sub1['S4CQ19B'].replace(9, numpy.nan)
sub1['S4CQ19C']=sub1['S4CQ19C'].replace(9, numpy.nan)
sub1['S2AQ8A']=sub1['S2AQ8A'].replace(99, numpy.nan)
sub1['S2AQ8B']=sub1['S2AQ8B'].replace(99, numpy.nan)
sub1.head(10)
#recoding values for S3AQ3B1 into a new variable, USFREQMO
recode1 = {1: 365, 2: 300, 3: 162, 4: 96, 5:48 , 6: 30, 7:12, 8:9, 9:4.5,10:1.5}
sub1['USFREQYR']= sub1['S2AQ8A'].map(recode1)
# contingency table of observed counts
ct1=pandas.crosstab(sub1['S4CQ18A'], sub1['DYSDX12'])
print (ct1)
ct2=pandas.crosstab(sub1['S4CQ18B'], sub1['DYSDX12'])
print (ct2)
ct3=pandas.crosstab(sub1['S4CQ18C'], sub1['DYSDX12'])
print (ct3)
ct4=pandas.crosstab(sub1['S4CQ19A'], sub1['DYSDX12'])
print (ct4)
ct5=pandas.crosstab(sub1['S4CQ19B'], sub1['DYSDX12'])
print (ct5)
ct6=pandas.crosstab(sub1['S4CQ19C'], sub1['DYSDX12'])
print (ct6)
# column percentages
colsum=ct1.sum(axis=0)
colpct=ct1/colsum
print(colpct)
colsum=ct2.sum(axis=0)
colpct2=ct2/colsum
print(colpct2)
colsum=ct3.sum(axis=0)
colpct3=ct3/colsum
print(colpct3)
# chi-square
print ('chi-square value, p value, expected counts')
cs1= scipy.stats.chi2_contingency(ct1)
print (cs1)
print ('chi-square value, p value, expected counts')
cs2= scipy.stats.chi2_contingency(ct2)
print (cs2)
print ('chi-square value, p value, expected counts')
cs3= scipy.stats.chi2_contingency(ct3)
print (cs3)
print ('chi-square value, p value, expected counts')
cs4= scipy.stats.chi2_contingency(ct4)
print (cs4)
print ('chi-square value, p value, expected counts')
cs5= scipy.stats.chi2_contingency(ct5)
print (cs5)
print ('chi-square value, p value, expected counts')
cs6= scipy.stats.chi2_contingency(ct6)
print (cs6)
# contingency table of observed counts
ct=pandas.crosstab(sub1['DYSDX12'], sub1['USFREQYR'])
print (ct)
# column percentages
colsum=ct.sum(axis=0)
colpctx=ct/colsum
print(colpctx)
# chi-square
print ('chi-square value, p value, expected counts')
cs= scipy.stats.chi2_contingency(ct)
print (cs)
# set variable types
sub1["USFREQYR"] = sub1["USFREQYR"].astype('category')
# new code for setting variables to numeric:
sub1['USFREQYR'] = pandas.to_numeric(sub1['USFREQYR'], errors='coerce')
# graph percent with nicotine dependence within each smoking frequency group
seaborn.factorplot(x="USFREQYR", y="DYSDX12", data=sub1, kind="bar", ci=None)
plt.xlabel('Days drank per year')
plt.ylabel('Proportion Dysthemia')
recode2 = {1.5: 1.5, 12: 12}
sub1['COMP1v21']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct21=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v21'])
print (ct21)
# column percentages
colsum=ct21.sum(axis=0)
colpct21=ct21/colsum
print(colpct21)
print ('chi-square value, p value, expected counts')
cs21= scipy.stats.chi2_contingency(ct21)
print (cs21)
recode2 = {1.5: 1.5, 30:30}
sub1['COMP1v22']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct22=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v22'])
print (ct22)
# column percentages
colsum=ct22.sum(axis=0)
colpct22=ct22/colsum
print(colpct22)
print ('chi-square value, p value, expected counts')
cs22= scipy.stats.chi2_contingency(ct22)
print (cs22)
recode2 = {1.5: 1.5, 48:48}
sub1['COMP1v23']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct23=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v23'])
print (ct23)
# column percentages
colsum=ct23.sum(axis=0)
colpct23=ct23/colsum
print(colpct23)
print ('chi-square value, p value, expected counts')
cs23= scipy.stats.chi2_contingency(ct23)
print (cs23)
recode2 = {1.5: 1.5, 96:96}
sub1['COMP1v24']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct24=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v24'])
print (ct24)
# column percentages
colsum=ct24.sum(axis=0)
colpct24=ct24/colsum
print(colpct24)
print ('chi-square value, p value, expected counts')
cs24= scipy.stats.chi2_contingency(ct24)
print (cs24)
recode2 = {1.5: 1.5, 162:162}
sub1['COMP1v25']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct25=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v25'])
print (ct25)
# column percentages
colsum=ct25.sum(axis=0)
colpct25=ct25/colsum
print(colpct25)
print ('chi-square value, p value, expected counts')
cs25= scipy.stats.chi2_contingency(ct25)
print (cs25)
recode2 = {1.5: 1.5, 300:300}
sub1['COMP1v26']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct26=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v26'])
print (ct26)
# column percentages
colsum=ct26.sum(axis=0)
colpct26=ct26/colsum
print(colpct26)
print ('chi-square value, p value, expected counts')
cs26= scipy.stats.chi2_contingency(ct26)
print (cs26)
recode2 = {1.5: 1.5, 365:365}
sub1['COMP1v27']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct27=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v27'])
print (ct27)
# column percentages
colsum=ct27.sum(axis=0)
colpct27=ct27/colsum
print(colpct27)
print ('chi-square value, p value, expected counts')
cs27= scipy.stats.chi2_contingency(ct27)
print (cs27)
recode2 = {4.5: 4.5, 9:9}
sub1['COMP1v28']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct28=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v28'])
print (ct28)
# column percentages
colsum=ct28.sum(axis=0)
colpct28=ct28/colsum
print(colpct28)
print ('chi-square value, p value, expected counts')
cs28= scipy.stats.chi2_contingency(ct28)
print (cs28)
recode2 = {4.5:4.5, 12:12}
sub1['COMP1v29']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct29=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v29'])
print (ct29)
# column percentages
colsum=ct29.sum(axis=0)
colpct29=ct29/colsum
print(colpct29)
print ('chi-square value, p value, expected counts')
cs29= scipy.stats.chi2_contingency(ct29)
print (cs29)
recode2 = {4.5: 4.5, 30:30}
sub1['COMP1v30']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct30=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v30'])
print (ct30)
# column percentages
colsum=ct30.sum(axis=0)
colpct30=ct30/colsum
print(colpct30)
print ('chi-square value, p value, expected counts')
cs30= scipy.stats.chi2_contingency(ct30)
print (cs30)
recode2 = {4.5: 4.5, 48:48}
sub1['COMP1v31']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct31=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v31'])
print (ct31)
# column percentages
colsum=ct31.sum(axis=0)
colpct31=ct31/colsum
print(colpct31)
print ('chi-square value, p value, expected counts')
cs31= scipy.stats.chi2_contingency(ct31)
print (cs31)
recode2 = {4.5: 4.5, 96:96}
sub1['COMP1v32']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct32=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v32'])
print (ct32)
# column percentages
colsum=ct32.sum(axis=0)
colpct32=ct32/colsum
print(colpct32)
print ('chi-square value, p value, expected counts')
cs32= scipy.stats.chi2_contingency(ct32)
print (cs32)
recode2 = {4.5: 4.5, 162:162}
sub1['COMP1v33']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct33=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v33'])
print (ct33)
# column percentages
colsum=ct33.sum(axis=0)
colpct33=ct33/colsum
print(colpct33)
print ('chi-square value, p value, expected counts')
cs33= scipy.stats.chi2_contingency(ct33)
print (cs33)
recode2 = {4.5: 5.5, 300:300}
sub1['COMP1v34']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct34=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v34'])
print (ct34)
# column percentages
colsum=ct34.sum(axis=0)
colpct34=ct34/colsum
print(colpct34)
print ('chi-square value, p value, expected counts')
cs34= scipy.stats.chi2_contingency(ct34)
print (cs34)
recode2 = {4.5: 4.5, 365:365}
sub1['COMP1v35']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct35=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v35'])
print (ct35)
# column percentages
colsum=ct35.sum(axis=0)
colpct35=ct35/colsum
print(colpct35)
print ('chi-square value, p value, expected counts')
cs35= scipy.stats.chi2_contingency(ct35)
print (cs35)
recode2 = {9:9, 12:12}
sub1['COMP1v36']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct36=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v36'])
print (ct36)
# column percentages
colsum=ct36.sum(axis=0)
colpct36=ct36/colsum
print(colpct36)
print ('chi-square value, p value, expected counts')
cs36= scipy.stats.chi2_contingency(ct36)
print (cs36)
recode2 = {9:9,30:30}
sub1['COMP1v37']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct37=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v37'])
print (ct37)
# column percentages
colsum=ct37.sum(axis=0)
colpct37=ct37/colsum
print(colpct37)
print ('chi-square value, p value, expected counts')
cs37= scipy.stats.chi2_contingency(ct37)
print (cs37)
recode2 = {9:9, 48:48}
sub1['COMP1v38']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct38=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v38'])
print (ct38)
# column percentages
colsum=ct38.sum(axis=0)
colpct38=ct38/colsum
print(colpct38)
print ('chi-square value, p value, expected counts')
cs38= scipy.stats.chi2_contingency(ct38)
print (cs38)
recode2 = {9:9,96:96}
sub1['COMP1v39']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct39=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v39'])
print (ct39)
# column percentages
colsum=ct39.sum(axis=0)
colpct39=ct39/colsum
print(colpct39)
print ('chi-square value, p value, expected counts')
cs39= scipy.stats.chi2_contingency(ct39)
print (cs39)
recode2 = {9:9,162:162}
sub1['COMP1v40']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct40=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v40'])
print (ct40)
# column percentages
colsum=ct40.sum(axis=0)
colpct40=ct40/colsum
print(colpct40)
print ('chi-square value, p value, expected counts')
cs40= scipy.stats.chi2_contingency(ct40)
print (cs40)
recode2 = {9:9, 300:300}
sub1['COMP1v41']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct41=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v41'])
print (ct41)
# column percentages
colsum=ct41.sum(axis=0)
colpct41=ct41/colsum
print(colpct41)
print ('chi-square value, p value, expected counts')
cs41= scipy.stats.chi2_contingency(ct41)
print (cs41)
recode2 = {9:9, 365:365}
sub1['COMP1v42']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct42=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v42'])
print (ct42)
# column percentages
colsum=ct42.sum(axis=0)
colpct42=ct42/colsum
print(colpct42)
print ('chi-square value, p value, expected counts')
cs42= scipy.stats.chi2_contingency(ct42)
print (cs42)
recode2 = {12: 12, 30:30}
sub1['COMP1v43']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct43=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v43'])
print (ct43)
# column percentages
colsum=ct43.sum(axis=0)
colpct43=ct43/colsum
print(colpct43)
print ('chi-square value, p value, expected counts')
cs43= scipy.stats.chi2_contingency(ct43)
print (cs43)
recode2 = {12: 12, 48:48}
sub1['COMP1v44']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct44=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v44'])
print (ct44)
# column percentages
colsum=ct44.sum(axis=0)
colpct44=ct44/colsum
print(colpct44)
print ('chi-square value, p value, expected counts')
cs44= scipy.stats.chi2_contingency(ct44)
print (cs44)
recode2 = {12: 12, 96:96}
sub1['COMP1v45']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct45=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v45'])
print (ct45)
# column percentages
colsum=ct45.sum(axis=0)
colpct45=ct45/colsum
print(colpct45)
print ('chi-square value, p value, expected counts')
cs45= scipy.stats.chi2_contingency(ct45)
print (cs45)
recode2 = {12: 12, 162:162}
sub1['COMP1v46']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct46=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v46'])
print (ct46)
# column percentages
colsum=ct46.sum(axis=0)
colpct46=ct46/colsum
print(colpct46)
print ('chi-square value, p value, expected counts')
cs46= scipy.stats.chi2_contingency(ct46)
print (cs46)
recode2 = {12: 12, 300:300}
sub1['COMP1v47']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct47=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v47'])
print (ct47)
# column percentages
colsum=ct47.sum(axis=0)
colpct47=ct47/colsum
print(colpct47)
print ('chi-square value, p value, expected counts')
cs47= scipy.stats.chi2_contingency(ct47)
print (cs47)
recode2 = {12: 12,365:365}
sub1['COMP1v48']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct48=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v48'])
print (ct48)
# column percentages
colsum=ct48.sum(axis=0)
colpct48=ct48/colsum
print(colpct48)
print ('chi-square value, p value, expected counts')
cs48= scipy.stats.chi2_contingency(ct48)
print (cs48)
recode2 = {30:30, 48:48}
sub1['COMP1v49']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct49=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v49'])
print (ct49)
# column percentages
colsum=ct49.sum(axis=0)
colpct49=ct49/colsum
print(colpct49)
print ('chi-square value, p value, expected counts')
cs49= scipy.stats.chi2_contingency(ct49)
print (cs49)
recode2 = {30:30, 96:96}
sub1['COMP1v50']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct50=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v50'])
print (ct50)
# column percentages
colsum=ct50.sum(axis=0)
colpct50=ct50/colsum
print(colpct50)
print ('chi-square value, p value, expected counts')
cs50= scipy.stats.chi2_contingency(ct50)
print (cs50)
recode2 = {30:30, 162:162}
sub1['COMP1v51']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct51=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v51'])
print (ct51)
# column percentages
colsum=ct51.sum(axis=0)
colpct51=ct51/colsum
print(colpct51)
print ('chi-square value, p value, expected counts')
cs51= scipy.stats.chi2_contingency(ct51)
print (cs51)
recode2 = {30:30, 300:300}
sub1['COMP1v52']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct52=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v52'])
print (ct52)
# column percentages
colsum=ct52.sum(axis=0)
colpct52=ct52/colsum
print(colpct52)
print ('chi-square value, p value, expected counts')
cs52= scipy.stats.chi2_contingency(ct52)
print (cs52)
recode2 = {30:30, 365:365}
sub1['COMP1v53']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct53=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v53'])
print (ct53)
# column percentages
colsum=ct53.sum(axis=0)
colpct53=ct53/colsum
print(colpct53)
print ('chi-square value, p value, expected counts')
cs53= scipy.stats.chi2_contingency(ct53)
print (cs53)
recode2 = {96:96, 162:162}
sub1['COMP1v54']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct54=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v54'])
print (ct54)
# column percentages
colsum=ct54.sum(axis=0)
colpct54=ct54/colsum
print(colpct54)
print ('chi-square value, p value, expected counts')
cs54= scipy.stats.chi2_contingency(ct54)
print (cs54)
recode2 = {96:96, 300:300}
sub1['COMP1v55']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct55=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v55'])
print (ct55)
# column percentages
colsum=ct55.sum(axis=0)
colpct55=ct55/colsum
print(colpct55)
print ('chi-square value, p value, expected counts')
cs55= scipy.stats.chi2_contingency(ct55)
print (cs55)
recode2 = {96:96, 365:365}
sub1['COMP1v56']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct56=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v56'])
print (ct56)
# column percentages
colsum=ct56.sum(axis=0)
colpct56=ct56/colsum
print(colpct56)
print ('chi-square value, p value, expected counts')
cs56= scipy.stats.chi2_contingency(ct56)
print (cs56)
recode2 = {162:162, 300:300}
sub1['COMP1v57']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct57=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v57'])
print (ct57)
# column percentages
colsum=ct57.sum(axis=0)
colpct57=ct57/colsum
print(colpct57)
print ('chi-square value, p value, expected counts')
cs57= scipy.stats.chi2_contingency(ct57)
print (cs57)
recode2 = {300:300, 365:365}
sub1['COMP1v58']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct58=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v58'])
print (ct58)
# column percentages
colsum=ct58.sum(axis=0)
colpct58=ct58/colsum
print(colpct58)
print ('chi-square value, p value, expected counts')
cs58= scipy.stats.chi2_contingency(ct58)
print (cs58)
recode2 = {162:162, 365:365}
sub1['COMP1v59']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct59=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v59'])
print (ct59)
# column percentages
colsum=ct59.sum(axis=0)
colpct59=ct59/colsum
print(colpct59)
print ('chi-square value, p value, expected counts')
cs59= scipy.stats.chi2_contingency(ct59)
print (cs59)
recode2 = {1.5: 1.5, 4.5: 4.5}
sub1['COMP1v60']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct60=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v60'])
print (ct60)
# column percentages
colsum=ct60.sum(axis=0)
colpct60=ct60/colsum
print(colpct60)
print ('chi-square value, p value, expected counts')
cs60= scipy.stats.chi2_contingency(ct60)
print (cs60)
recode2 = {1.5: 1.5, 9: 9}
sub1['COMP1v61']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct61=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v61'])
print (ct61)
# column percentages
colsum=ct61.sum(axis=0)
colpct61=ct61/colsum
print(colpct61)
print ('chi-square value, p value, expected counts')
cs61= scipy.stats.chi2_contingency(ct61)
print (cs61)
recode2 = {48: 48, 365: 365}
sub1['COMP1v62']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct62=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v62'])
print (ct62)
# column percentages
colsum=ct62.sum(axis=0)
colpct62=ct62/colsum
print(colpct62)
print ('chi-square value, p value, expected counts')
cs62= scipy.stats.chi2_contingency(ct62)
print (cs62)
recode2 = {48: 48, 300: 300}
sub1['COMP1v63']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct63=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v63'])
print (ct63)
# column percentages
colsum=ct63.sum(axis=0)
colpct63=ct63/colsum
print(colpct63)
print ('chi-square value, p value, expected counts')
cs63= scipy.stats.chi2_contingency(ct63)
print (cs63)
recode2 = {48: 48, 162: 162}
sub1['COMP1v64']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct64=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v64'])
print (ct64)
# column percentages
colsum=ct64.sum(axis=0)
colpct64=ct64/colsum
print(colpct64)
print ('chi-square value, p value, expected counts')
cs64= scipy.stats.chi2_contingency(ct64)
print (cs64)
recode2 = {48: 48, 96: 96}
sub1['COMP1v65']= sub1['USFREQYR'].map(recode2)
# contingency table of observed counts
ct65=pandas.crosstab(sub1['DYSDX12'], sub1['COMP1v65'])
print (ct65)
# column percentages
colsum=ct65.sum(axis=0)
colpct65=ct65/colsum
print(colpct65)
print ('chi-square value, p value, expected counts')
cs65= scipy.stats.chi2_contingency(ct65)
print (cs65)
'''
43093
25
(43093, 25)
(5838, 25)
DYSDX12 0 1
S4CQ18A
1 19 26
2 83 94
DYSDX12 0 1
S4CQ18B
1 3 16
2 99 104
DYSDX12 0 1
S4CQ18C
1 19 24
2 83 96
DYSDX12 0 1
S4CQ19A
1 18 21
2 84 99
DYSDX12 0 1
S4CQ19B
1 2 9
2 95 106
DYSDX12 0 1
S4CQ19C
1 13 16
2 84 99
DYSDX12 0 1
S4CQ18A
1 0.186275 0.216667
2 0.813725 0.783333
DYSDX12 0 1
S4CQ18B
1 0.029412 0.133333
2 0.970588 0.866667
DYSDX12 0 1
S4CQ18C
1 0.186275 0.2
2 0.813725 0.8
chi-square value, p value, expected counts
(0.15511964107677009, 0.6936900824150003, 1, array([[ 20.67567568, 24.32432432],
[ 81.32432432, 95.67567568]]))
chi-square value, p value, expected counts
(6.3384979944790985, 0.011814488426313271, 1, array([[ 8.72972973, 10.27027027],
[ 93.27027027, 109.72972973]]))
chi-square value, p value, expected counts
(0.0076559622159894907, 0.93027539253689573, 1, array([[ 19.75675676, 23.24324324],
[ 82.24324324, 96.75675676]]))
chi-square value, p value, expected counts
(0.021979699824444317, 0.8821411156218304, 1, array([[ 17.91891892, 21.08108108],
[ 84.08108108, 98.91891892]]))
chi-square value, p value, expected counts
(2.4787122616005983, 0.11539669868109208, 1, array([[ 5.03301887, 5.96698113],
[ 91.96698113, 109.03301887]]))
chi-square value, p value, expected counts
(0.0085982359504715913, 0.92612070603106611, 1, array([[ 13.26886792, 15.73113208],
[ 83.73113208, 99.26886792]]))
USFREQYR 1.5 4.5 9.0 12.0 30.0 48.0 96.0 162.0 300.0 365.0
DYSDX12
0 536 468 258 427 649 520 463 350 121 128
1 14 10 13 9 11 8 12 10 7 6
USFREQYR 1.5 4.5 9.0 12.0 30.0 48.0 96.0 \
DYSDX12
0 0.974545 0.979079 0.95203 0.979358 0.983333 0.984848 0.974737
1 0.025455 0.020921 0.04797 0.020642 0.016667 0.015152 0.025263
USFREQYR 162.0 300.0 365.0
DYSDX12
0 0.972222 0.945312 0.955224
1 0.027778 0.054688 0.044776
chi-square value, p value, expected counts
(17.494163340774072, 0.041516886183372334, 9, array([[ 536.31840796, 466.10945274, 264.25870647, 425.15422886,
643.58208955, 514.86567164, 463.1840796 , 351.04477612,
124.8159204 , 130.66666667],
[ 13.68159204, 11.89054726, 6.74129353, 10.84577114,
16.41791045, 13.13432836, 11.8159204 , 8.95522388,
3.1840796 , 3.33333333]]))
COMP1v21 1.5 12.0
DYSDX12
0 536 427
1 14 9
COMP1v21 1.5 12.0
DYSDX12
0 0.974545 0.979358
1 0.025455 0.020642
chi-square value, p value, expected counts
(0.081110397918773974, 0.77579823589352459, 1, array([[ 537.1703854, 425.8296146],
[ 12.8296146, 10.1703854]]))
COMP1v22 1.5 30.0
DYSDX12
0 536 649
1 14 11
COMP1v22 1.5 30.0
DYSDX12
0 0.974545 0.983333
1 0.025455 0.016667
chi-square value, p value, expected counts
(0.75186779184247543, 0.38588552038873458, 1, array([[ 538.63636364, 646.36363636],
[ 11.36363636, 13.63636364]]))
COMP1v23 1.5 48.0
DYSDX12
0 536 520
1 14 8
COMP1v23 1.5 48.0
DYSDX12
0 0.974545 0.984848
1 0.025455 0.015152
chi-square value, p value, expected counts
(0.96145872790404086, 0.32681962580316493, 1, array([[ 538.7755102, 517.2244898],
[ 11.2244898, 10.7755102]]))
COMP1v24 1.5 96.0
DYSDX12
0 536 463
1 14 12
COMP1v24 1.5 96.0
DYSDX12
0 0.974545 0.974737
1 0.025455 0.025263
chi-square value, p value, expected counts
(0.032311134284818535, 0.85734649774401372, 1, array([[ 536.04878049, 462.95121951],
[ 13.95121951, 12.04878049]]))
COMP1v25 1.5 162.0
DYSDX12
0 536 350
1 14 10
COMP1v25 1.5 162.0
DYSDX12
0 0.974545 0.972222
1 0.025455 0.027778
chi-square value, p value, expected counts
(5.4034513684625857e-06, 0.99814529307319433, 1, array([[ 535.49450549, 350.50549451],
[ 14.50549451, 9.49450549]]))
COMP1v26 1.5 300.0
DYSDX12
0 536 121
1 14 7
COMP1v26 1.5 300.0
DYSDX12
0 0.974545 0.945312
1 0.025455 0.054688
chi-square value, p value, expected counts
(2.0626432685465224, 0.15094813001296287, 1, array([[ 532.96460177, 124.03539823],
[ 17.03539823, 3.96460177]]))
COMP1v27 1.5 365.0
DYSDX12
0 536 128
1 14 6
COMP1v27 1.5 365.0
DYSDX12
0 0.974545 0.955224
1 0.025455 0.044776
chi-square value, p value, expected counts
(0.81817284334079832, 0.36571492563005281, 1, array([[ 533.91812865, 130.08187135],
[ 16.08187135, 3.91812865]]))
COMP1v28 4.5 9.0
DYSDX12
0 468 258
1 10 13
COMP1v28 4.5 9.0
DYSDX12
0 0.979079 0.95203
1 0.020921 0.04797
chi-square value, p value, expected counts
(3.3913302621065133, 0.065540052123220491, 1, array([[ 463.32176235, 262.67823765],
[ 14.67823765, 8.32176235]]))
COMP1v29 4.5 12.0
DYSDX12
0 468 427
1 10 9
COMP1v29 4.5 12.0
DYSDX12
0 0.979079 0.979358
1 0.020921 0.020642
chi-square value, p value, expected counts
(0.041058300124229889, 0.83942542943834042, 1, array([[ 468.06345733, 426.93654267],
[ 9.93654267, 9.06345733]]))
COMP1v30 4.5 30.0
DYSDX12
0 468 649
1 10 11
COMP1v30 4.5 30.0
DYSDX12
0 0.979079 0.983333
1 0.020921 0.016667
chi-square value, p value, expected counts
(0.091887608780024402, 0.76179102883057059, 1, array([[ 469.17926186, 647.82073814],
[ 8.82073814, 12.17926186]]))
COMP1v31 4.5 48.0
DYSDX12
0 468 520
1 10 8
COMP1v31 4.5 48.0
DYSDX12
0 0.979079 0.984848
1 0.020921 0.015152
chi-square value, p value, expected counts
(0.20356003325923222, 0.65186250579499883, 1, array([[ 469.4473161, 518.5526839],
[ 8.5526839, 9.4473161]]))
COMP1v32 4.5 96.0
DYSDX12
0 468 463
1 10 12
COMP1v32 4.5 96.0
DYSDX12
0 0.979079 0.974737
1 0.020921 0.025263
chi-square value, p value, expected counts
(0.053197032823638704, 0.81759074518497599, 1, array([[ 466.96537251, 464.03462749],
[ 11.03462749, 10.96537251]]))
COMP1v33 4.5 162.0
DYSDX12
0 468 350
1 10 10
COMP1v33 4.5 162.0
DYSDX12
0 0.979079 0.972222
1 0.020921 0.027778
chi-square value, p value, expected counts
(0.17238506440400136, 0.67800076631344419, 1, array([[ 466.59188544, 351.40811456],
[ 11.40811456, 8.59188544]]))
COMP1v34 5.5 300.0
DYSDX12
0 468 121
1 10 7
COMP1v34 5.5 300.0
DYSDX12
0 0.979079 0.945312
1 0.020921 0.054688
chi-square value, p value, expected counts
(3.074504735762964, 0.079528813603024121, 1, array([[ 464.59075908, 124.40924092],
[ 13.40924092, 3.59075908]]))
COMP1v35 4.5 365.0
DYSDX12
0 468 128
1 10 6
COMP1v35 4.5 365.0
DYSDX12
0 0.979079 0.955224
1 0.020921 0.044776
chi-square value, p value, expected counts
(1.4962180738636439, 0.22125419123797496, 1, array([[ 465.50326797, 130.49673203],
[ 12.49673203, 3.50326797]]))
COMP1v36 9 12
DYSDX12
0 258 427
1 13 9
COMP1v36 9 12
DYSDX12
0 0.95203 0.979358
1 0.04797 0.020642
chi-square value, p value, expected counts
(3.2830471896409597, 0.069998852719869734, 1, array([[ 262.56718529, 422.43281471],
[ 8.43281471, 13.56718529]]))
COMP1v37 9 30
DYSDX12
0 258 649
1 13 11
COMP1v37 9 30
DYSDX12
0 0.95203 0.983333
1 0.04797 0.016667
chi-square value, p value, expected counts
(6.3015162206896074, 0.012063474767743424, 1, array([[ 264.01396348, 642.98603652],
[ 6.98603652, 17.01396348]]))
COMP1v38 9 48
DYSDX12
0 258 520
1 13 8
COMP1v38 9 48
DYSDX12
0 0.95203 0.984848
1 0.04797 0.015152
chi-square value, p value, expected counts
(6.3092001713192047, 0.012011301386107615, 1, array([[ 263.87734668, 514.12265332],
[ 7.12265332, 13.87734668]]))
COMP1v39 9 96
DYSDX12
0 258 463
1 13 12
COMP1v39 9 96
DYSDX12
0 0.95203 0.974737
1 0.04797 0.025263
chi-square value, p value, expected counts
(2.0906484676205248, 0.14820315104250653, 1, array([[ 261.91823056, 459.08176944],
[ 9.08176944, 15.91823056]]))
COMP1v40 9 162
DYSDX12
0 258 350
1 13 10
COMP1v40 9 162
DYSDX12
0 0.95203 0.972222
1 0.04797 0.027778
chi-square value, p value, expected counts
(1.2660757639067748, 0.26050415686771855, 1, array([[ 261.12202853, 346.87797147],
[ 9.87797147, 13.12202853]]))
COMP1v41 9 300
DYSDX12
0 258 121
1 13 7
COMP1v41 9 300
DYSDX12
0 0.95203 0.945312
1 0.04797 0.054688
chi-square value, p value, expected counts
(0.0017029976301370822, 0.96708272844891252, 1, array([[ 257.4160401, 121.5839599],
[ 13.5839599, 6.4160401]]))
COMP1v42 9 365
DYSDX12
0 258 128
1 13 6
COMP1v42 9 365
DYSDX12
0 0.95203 0.955224
1 0.04797 0.044776
chi-square value, p value, expected counts
(0.011378165416546245, 0.91505198627505746, 1, array([[ 258.28641975, 127.71358025],
[ 12.71358025, 6.28641975]]))
COMP1v43 12 30
DYSDX12
0 427 649
1 9 11
COMP1v43 12 30
DYSDX12
0 0.979358 0.983333
1 0.020642 0.016667
chi-square value, p value, expected counts
(0.062868106734959933, 0.8020188962236966, 1, array([[ 428.04379562, 647.95620438],
[ 7.95620438, 12.04379562]]))
COMP1v44 12 48
DYSDX12
0 427 520
1 9 8
COMP1v44 12 48
DYSDX12
0 0.979358 0.984848
1 0.020642 0.015152
chi-square value, p value, expected counts
(0.1590637539965693, 0.69001996225727391, 1, array([[ 428.31120332, 518.68879668],
[ 7.68879668, 9.31120332]]))
COMP1v45 12 96
DYSDX12
0 427 463
1 9 12
COMP1v45 12 96
DYSDX12
0 0.979358 0.974737
1 0.020642 0.025263
chi-square value, p value, expected counts
(0.059193068164174065, 0.80777590458431159, 1, array([[ 425.94950604, 464.05049396],
[ 10.05049396, 10.94950604]]))
COMP1v46 12 162
DYSDX12
0 427 350
1 9 10
COMP1v46 12 162
DYSDX12
0 0.979358 0.972222
1 0.020642 0.027778
chi-square value, p value, expected counts
(0.17907042035482401, 0.67217332657531625, 1, array([[ 425.59296482, 351.40703518],
[ 10.40703518, 8.59296482]]))
COMP1v47 12 300
DYSDX12
0 427 121
1 9 7
COMP1v47 12 300
DYSDX12
0 0.979358 0.945312
1 0.020642 0.054688
chi-square value, p value, expected counts
(3.0174434413819222, 0.08237322806184387, 1, array([[ 423.63120567, 124.36879433],
[ 12.36879433, 3.63120567]]))
COMP1v48 12 365
DYSDX12
0 427 128
1 9 6
COMP1v48 12 365
DYSDX12
0 0.979358 0.955224
1 0.020642 0.044776
chi-square value, p value, expected counts
(1.4832158942456082, 0.22327204289175773, 1, array([[ 424.52631579, 130.47368421],
[ 11.47368421, 3.52631579]]))
COMP1v49 30 48
DYSDX12
0 649 520
1 11 8
COMP1v49 30 48
DYSDX12
0 0.983333 0.984848
1 0.016667 0.015152
chi-square value, p value, expected counts
(0.00066858763675655254, 0.97937134843065576, 1, array([[ 649.44444444, 519.55555556],
[ 10.55555556, 8.44444444]]))
COMP1v50 30 96
DYSDX12
0 649 463
1 11 12
COMP1v50 30 96
DYSDX12
0 0.983333 0.974737
1 0.016667 0.025263
chi-square value, p value, expected counts
(0.64071491012931314, 0.42345203638584983, 1, array([[ 646.62555066, 465.37444934],
[ 13.37444934, 9.62555066]]))
COMP1v51 30 162
DYSDX12
0 649 350
1 11 10
COMP1v51 30 162
DYSDX12
0 0.983333 0.972222
1 0.016667 0.027778
chi-square value, p value, expected counts
(0.92838509505176137, 0.3352829089888848, 1, array([[ 646.41176471, 352.58823529],
[ 13.58823529, 7.41176471]]))
COMP1v52 30 300
DYSDX12
0 649 121
1 11 7
COMP1v52 30 300
DYSDX12
0 0.983333 0.945312
1 0.016667 0.054688
chi-square value, p value, expected counts
(5.3443028362630649, 0.020790095513174879, 1, array([[ 644.92385787, 125.07614213],
[ 15.07614213, 2.92385787]]))
COMP1v53 30 365
DYSDX12
0 649 128
1 11 6
COMP1v53 30 365
DYSDX12
0 0.983333 0.955224
1 0.016667 0.044776
chi-square value, p value, expected counts
(2.9660516422149432, 0.085029151995431748, 1, array([[ 645.86901763, 131.13098237],
[ 14.13098237, 2.86901763]]))
COMP1v54 96 162
DYSDX12
0 463 350
1 12 10
COMP1v54 96 162
DYSDX12
0 0.974737 0.972222
1 0.025263 0.027778
chi-square value, p value, expected counts
(4.2657673386938429e-05, 0.99478882621881659, 1, array([[ 462.48502994, 350.51497006],
[ 12.51497006, 9.48502994]]))
COMP1v55 96 300
DYSDX12
0 463 121
1 12 7
COMP1v55 96 300
DYSDX12
0 0.974737 0.945312
1 0.025263 0.054688
chi-square value, p value, expected counts
(1.9777037344390345, 0.1596325227118105, 1, array([[ 460.0331675, 123.9668325],
[ 14.9668325, 4.0331675]]))
COMP1v56 96 365
DYSDX12
0 463 128
1 12 6
COMP1v56 96 365
DYSDX12
0 0.974737 0.955224
1 0.025263 0.044776
chi-square value, p value, expected counts
(0.79049923040421732, 0.37394921188019159, 1, array([[ 460.96059113, 130.03940887],
[ 14.03940887, 3.96059113]]))
COMP1v57 162 300
DYSDX12
0 350 121
1 10 7
COMP1v57 162 300
DYSDX12
0 0.972222 0.945312
1 0.027778 0.054688
chi-square value, p value, expected counts
(1.3120675658798562, 0.25202065226590509, 1, array([[ 347.45901639, 123.54098361],
[ 12.54098361, 4.45901639]]))
COMP1v58 300 365
DYSDX12
0 121 128
1 7 6
COMP1v58 300 365
DYSDX12
0 0.945312 0.955224
1 0.054688 0.044776
chi-square value, p value, expected counts
(0.0071774973026434005, 0.93248390673692383, 1, array([[ 121.64885496, 127.35114504],
[ 6.35114504, 6.64885496]]))
COMP1v59 162 365
DYSDX12
0 350 128
1 10 6
COMP1v59 162 365
DYSDX12
0 0.972222 0.955224
1 0.027778 0.044776
chi-square value, p value, expected counts
(0.4396236163585836, 0.5073041756642318, 1, array([[ 348.34008097, 129.65991903],
[ 11.65991903, 4.34008097]]))
COMP1v60 1.5 4.5
DYSDX12
0 536 468
1 14 10
COMP1v60 1.5 4.5
DYSDX12
0 0.974545 0.979079
1 0.025455 0.020921
chi-square value, p value, expected counts
(0.074596244159914046, 0.78475856723511073, 1, array([[ 537.15953307, 466.84046693],
[ 12.84046693, 11.15953307]]))
COMP1v61 1.5 9.0
DYSDX12
0 536 258
1 14 13
COMP1v61 1.5 9.0
DYSDX12
0 0.974545 0.95203
1 0.025455 0.04797
chi-square value, p value, expected counts
(2.2291791087153672, 0.13542578166343269, 1, array([[ 531.91230207, 262.08769793],
[ 18.08769793, 8.91230207]]))
COMP1v62 48 365
DYSDX12
0 520 128
1 8 6
COMP1v62 48 365
DYSDX12
0 0.984848 0.955224
1 0.015152 0.044776
chi-square value, p value, expected counts
(3.2129624239064007, 0.073057097904547216, 1, array([[ 516.83383686, 131.16616314],
[ 11.16616314, 2.83383686]]))
COMP1v63 48 300
DYSDX12
0 520 121
1 8 7
COMP1v63 48 300
DYSDX12
0 0.984848 0.945312
1 0.015152 0.054688
chi-square value, p value, expected counts
(5.5465875604721164, 0.01851675302058986, 1, array([[ 515.92682927, 125.07317073],
[ 12.07317073, 2.92682927]]))
COMP1v64 48 162
DYSDX12
0 520 350
1 8 10
COMP1v64 48 162
DYSDX12
0 0.984848 0.972222
1 0.015152 0.027778
chi-square value, p value, expected counts
(1.1413595727388834, 0.28536522618096383, 1, array([[ 517.2972973, 352.7027027],
[ 10.7027027, 7.2972973]]))
COMP1v65 48 96
DYSDX12
0 520 463
1 8 12
COMP1v65 48 96
DYSDX12
0 0.984848 0.974737
1 0.015152 0.025263
chi-square value, p value, expected counts
(0.84198678587624431, 0.35882916664842746, 1, array([[ 517.47158524, 465.52841476],
[ 10.52841476, 9.47158524]]))
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment