Last active
June 27, 2018 11:37
-
-
Save allatambov/9452e5b60b890390fa12c1d3f8f93036 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# загрузить stats из scipy | |
# pandas - для базы данных | |
import scipy.stats as st | |
import pandas as pd | |
# загрузить базу | |
df = pd.read_csv('swiss.csv') | |
# работа | |
df[df.Catholic > 50] | |
sample1 = df[df.Catholic > 50]["Infant.Mortality"] | |
sample2 = df[df.Catholic <= 50]["Infant.Mortality"] | |
# ttest для двух выборок | |
st.ttest_ind(sample1, sample2) | |
st.ttest_ind(sample1, sample2, equal_var = False) | |
# wilcoxon test для двух выборок | |
st.wilcoxon(sample1, sample2) | |
st.mannwhitneyu(sample1, sample2) | |
# другая база для ANOVA и Kruskal | |
dat = pd.read_csv('chickwts.csv') | |
# задание: разбить на группы базу с помощью groupby | |
# по переменной feed | |
# и сохранить результаты в словарь | |
# ANOVA | |
# в oneway - перечень вборок по ключам | |
wgt = {} | |
for name, d in dat.groupby('feed'): | |
wgt[name] = d.weight | |
st.f_oneway(wgt['casein'], wgt['horsebean'], wgt['linseed'], wgt['meatmeal'], wgt['soybean'], wgt['sunflower']) | |
# Kruskal | |
st.kruskal(st.kruskal(wgt['casein'], wgt['horsebean'], wgt['linseed'], wgt['meatmeal'], wgt['soybean'], wgt['sunflower'])) | |
# линейная регрессия | |
st.linregress(df.Agriculture, df.Catholic) | |
slope, intercept, rvalue, pvalue, stderr = st.linregress(df.Agriculture, df.Catholic) | |
d = {'slope': slope, 'intercept': intercept, 'pvalue': pvalue, 'stderr': stderr} | |
res = pd.DataFrame.from_dict(d, orient='index') | |
new = res.transpose() | |
cols = ['intercept', 'slope', 'stderr', 'pvalue'] | |
new = new[cols] | |
def star(D): | |
if D.pvalue[0] > 0.01 and D.pvalue[0] <= 0.05: | |
return '*' | |
elif D.pvalue[0] > 0.001 and D.pvalue[0] <= 0.01: | |
return '**' | |
elif D.pvalue[0] <= 0.001: | |
return '***' | |
else: | |
return '' | |
def add_star(x, D=new): | |
s = star(D) | |
return | |
new['coef'] = new['slope'].apply(add_star) | |
def add_star(x, D=new): | |
s = star(D) | |
r = str('{:.2f}'.format(x)) + s | |
return r | |
# выбрать столбцы intercept, coef, pvalue | |
# расположить в нужном порядке | |
# выгрузить таблицу в TeX | |
final = new[['intercept', 'coef', 'pvalue']] | |
final.to_latex() | |
help(final.to_latex) | |
# sklearn | |
import sklearn.linear_model as lm | |
model = lm.LinearRegression() | |
X = df[['Catholic', 'Infant.Mortality']] | |
model.fit(X, df.Agriculture) | |
model.coef_ | |
model.predict(X) | |
model.intercept_ | |
model.residues_ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment