Skip to content

Instantly share code, notes, and snippets.

@allatambov
Last active June 27, 2018 11:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allatambov/9452e5b60b890390fa12c1d3f8f93036 to your computer and use it in GitHub Desktop.
Save allatambov/9452e5b60b890390fa12c1d3f8f93036 to your computer and use it in GitHub Desktop.
# загрузить stats из scipy
# pandas - для базы данных
import scipy.stats as st
import pandas as pd
# загрузить базу
df = pd.read_csv('swiss.csv')
# работа
df[df.Catholic > 50]
sample1 = df[df.Catholic > 50]["Infant.Mortality"]
sample2 = df[df.Catholic <= 50]["Infant.Mortality"]
# ttest для двух выборок
st.ttest_ind(sample1, sample2)
st.ttest_ind(sample1, sample2, equal_var = False)
# wilcoxon test для двух выборок
st.wilcoxon(sample1, sample2)
st.mannwhitneyu(sample1, sample2)
# другая база для ANOVA и Kruskal
dat = pd.read_csv('chickwts.csv')
# задание: разбить на группы базу с помощью groupby
# по переменной feed
# и сохранить результаты в словарь
# ANOVA
# в oneway - перечень вборок по ключам
wgt = {}
for name, d in dat.groupby('feed'):
wgt[name] = d.weight
st.f_oneway(wgt['casein'], wgt['horsebean'], wgt['linseed'], wgt['meatmeal'], wgt['soybean'], wgt['sunflower'])
# Kruskal
st.kruskal(st.kruskal(wgt['casein'], wgt['horsebean'], wgt['linseed'], wgt['meatmeal'], wgt['soybean'], wgt['sunflower']))
# линейная регрессия
st.linregress(df.Agriculture, df.Catholic)
slope, intercept, rvalue, pvalue, stderr = st.linregress(df.Agriculture, df.Catholic)
d = {'slope': slope, 'intercept': intercept, 'pvalue': pvalue, 'stderr': stderr}
res = pd.DataFrame.from_dict(d, orient='index')
new = res.transpose()
cols = ['intercept', 'slope', 'stderr', 'pvalue']
new = new[cols]
def star(D):
if D.pvalue[0] > 0.01 and D.pvalue[0] <= 0.05:
return '*'
elif D.pvalue[0] > 0.001 and D.pvalue[0] <= 0.01:
return '**'
elif D.pvalue[0] <= 0.001:
return '***'
else:
return ''
def add_star(x, D=new):
s = star(D)
return
new['coef'] = new['slope'].apply(add_star)
def add_star(x, D=new):
s = star(D)
r = str('{:.2f}'.format(x)) + s
return r
# выбрать столбцы intercept, coef, pvalue
# расположить в нужном порядке
# выгрузить таблицу в TeX
final = new[['intercept', 'coef', 'pvalue']]
final.to_latex()
help(final.to_latex)
# sklearn
import sklearn.linear_model as lm
model = lm.LinearRegression()
X = df[['Catholic', 'Infant.Mortality']]
model.fit(X, df.Agriculture)
model.coef_
model.predict(X)
model.intercept_
model.residues_
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment