This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def binomial(n, k): | |
""" | |
A fast way to calculate binomial coefficients by Andrew Dalke. | |
See http://stackoverflow.com/questions/3025162/statistics-combinations-in-python | |
""" | |
if 0 <= k <= n: | |
ntok = 1 | |
ktok = 1 | |
for t in xrange(1, min(k, n - k) + 1): | |
ntok *= n |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
%matplotlib inline | |
import random | |
import numpy as np | |
import pandas as pd | |
from sklearn import datasets, svm, cross_validation, tree, preprocessing, metrics | |
import sklearn.ensemble as ske | |
import tensorflow as tf | |
from tensorflow.contrib import skflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
titanic_df = pd.read_excel('titanic3.xls', 'titanic3', index_col=None, na_values=['NA']) | |
titanic_df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
titanic_df['survived'].mean() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class_sex_grouping = titanic_df.groupby(['pclass','sex']).mean() | |
class_sex_grouping | |
class_sex_grouping['survived'].plot.bar() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
titanic_df.groupby('pclass').mean() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
group_by_age = pd.cut(titanic_df["age"], np.arange(0, 90, 10)) | |
age_grouping = titanic_df.groupby(group_by_age).mean() | |
age_grouping['survived'].plot.bar() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
titanic_df.count() | |
titanic_df = titanic_df.drop(['body','cabin','boat'], axis=1) | |
titanic_df["home.dest"] = titanic_df["home.dest"].fillna("NA") | |
titanic_df = titanic_df.dropna() | |
titanic_df.count() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def preprocess_titanic_df(df): | |
processed_df = df.copy() | |
le = preprocessing.LabelEncoder() | |
processed_df.sex = le.fit_transform(processed_df.sex) | |
processed_df.embarked = le.fit_transform(processed_df.embarked) | |
processed_df = processed_df.drop(['name','ticket','home.dest'],axis=1) | |
return processed_df | |
processed_df = preprocess_titanic_df(titanic_df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X = processed_df.drop(['survived'], axis=1).values | |
y = processed_df['survived'].values | |
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2) |
OlderNewer