Skip to content

Instantly share code, notes, and snippets.

@hagino3000
Created March 11, 2015 04:06
Show Gist options
  • Save hagino3000/9c8c0b71b6302ca28f25 to your computer and use it in GitHub Desktop.
Save hagino3000/9c8c0b71b6302ca28f25 to your computer and use it in GitHub Desktop.
Kaggle Titanic Competition
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 358,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.cross_validation import train_test_split, cross_val_score, KFold\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 342,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def plot_confusion_matrix(cm):\n",
" fig, ax = plt.subplots()\n",
" im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)\n",
" ax.set_title('Confusion Matrix')\n",
" fig.colorbar(im)\n",
"\n",
" target_names = ['not survived', 'survived']\n",
"\n",
" tick_marks = np.arange(len(target_names))\n",
" ax.set_xticks(tick_marks)\n",
" ax.set_xticklabels(target_names, rotation=45)\n",
" ax.set_yticks(tick_marks)\n",
" ax.set_yticklabels(target_names)\n",
" ax.set_ylabel('True label')\n",
" ax.set_xlabel('Predicted label')\n",
" fig.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": 343,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df_train = pd.read_csv('./train.csv')\n",
"df_test = pd.read_csv('./test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 344,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 3</td>\n",
" <td> Braund, Mr. Owen Harris</td>\n",
" <td> male</td>\n",
" <td> 22</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> A/5 21171</td>\n",
" <td> 7.2500</td>\n",
" <td> NaN</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td> female</td>\n",
" <td> 38</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> PC 17599</td>\n",
" <td> 71.2833</td>\n",
" <td> C85</td>\n",
" <td> C</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Name \\\n",
"0 0 3 Braund, Mr. Owen Harris \n",
"1 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... \n",
"\n",
" Sex Age SibSp Parch Ticket Fare Cabin Embarked \n",
"0 male 22 1 0 A/5 21171 7.2500 NaN S \n",
"1 female 38 1 0 PC 17599 71.2833 C85 C "
]
},
"execution_count": 344,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.drop('PassengerId', axis=1, inplace=True)\n",
"df_train.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 345,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def _extract_title(name):\n",
" if name.find('Mr.') > 0:\n",
" return 'Mr'\n",
" elif name.find('Mrs.') > 0:\n",
" return 'Mrs'\n",
" elif name.find('Master.') > 0:\n",
" return 'Master'\n",
" elif name.find('Miss.') > 0:\n",
" return 'Miss'\n",
" else:\n",
" return None\n",
" \n",
"def extract_title(df):\n",
" df['Title'] = df.Name.apply(lambda n: _extract_title(n))\n",
" title_bin = pd.get_dummies(df.Title)\n",
" title_bin.rename(columns=lambda x: 'title' + \"_\" + str(x), inplace=True)\n",
" df = df.join(title_bin)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 346,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def fill_fare(df):\n",
" df['Fare'].fillna(0, inplace=True)\n",
" df['FareFill'] = df.Fare\n",
" df.FareFill[(df.Fare == 0) & (df.Pclass == 1)] = 86\n",
" df.FareFill[(df.Fare == 0) & (df.Pclass == 2)] = 21\n",
" df.FareFill[(df.Fare == 0) & (df.Pclass == 3)] = 13\n",
" df.FareFill = df.FareFill.apply(lambda f:np.log(f))\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 347,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def fill_age(df):\n",
" df['AgeFill'] = df.Age\n",
" df.AgeFill[df.Age.isnull() & (df.Sex == 'male') & (df.Pclass == 1)] = 40\n",
" df.AgeFill[df.Age.isnull() & (df.Sex == 'male') & (df.Pclass == 2)] = 31\n",
" df.AgeFill[df.Age.isnull() & (df.Sex == 'male') & (df.Pclass == 3)] = 26\n",
" df.AgeFill[df.Age.isnull() & (df.Title == 'Master')] = 3.5\n",
" \n",
" df.AgeFill[df.Age.isnull() & (df.Title == 'Mrs') & (df.Pclass == 1)] = 41.5\n",
" df.AgeFill[df.Age.isnull() & (df.Title == 'Mrs') & (df.Pclass == 2)] = 32\n",
" df.AgeFill[df.Age.isnull() & (df.Title == 'Mrs') & (df.Pclass == 3)] = 31\n",
" df.AgeFill[df.Age.isnull() & (df.Title == 'Miss') & (df.Pclass == 1)] = 30\n",
" df.AgeFill[df.Age.isnull() & (df.Title == 'Miss') & (df.Pclass == 2)] = 24\n",
" df.AgeFill[df.Age.isnull() & (df.Title == 'Miss') & (df.Pclass == 3)] = 18\n",
" df.AgeFill[df.AgeFill.isnull() & (df.Sex == 'female')] = 30\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 348,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def extract_pclass(df):\n",
" pclass_new = pd.get_dummies(df.Pclass)\n",
" pclass_new.rename(columns=lambda x: 'pclass' + \"_\" + str(x), inplace=True)\n",
" df = df.join(pclass_new)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 349,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def extract_parch(df):\n",
" dm = pd.get_dummies(df.Parch.apply(lambda p: min(p, 4)))\n",
" dm.rename(columns=lambda x: 'parch' + \"_\" + str(x), inplace=True)\n",
" df = df.join(dm)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 350,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def extract_sibsp(df):\n",
" dm = pd.get_dummies(df.SibSp.apply(lambda s: min(s, 4)))\n",
" dm.rename(columns=lambda x: 'sibsp' + \"_\" + str(x), inplace=True)\n",
" df = df.join(dm)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 351,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def convert_sex(df):\n",
" df['male'] = df.Sex.apply(lambda s: 0 if s == 'male' else 1)\n",
" df['female'] = df.Sex.apply(lambda s: 1 if s == 'male' else 0)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 352,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def extract_feature(df):\n",
" df = extract_title(df)\n",
" df = fill_age(df)\n",
" df = extract_pclass(df)\n",
" df = extract_sibsp(df)\n",
" df = extract_parch(df)\n",
" df = convert_sex(df)\n",
" df = fill_fare(df)\n",
" cols = df.columns\n",
" drop_cols = set(cols).intersection(set(['PassengerId', 'Title', 'Name', 'SibSp', 'Ticket', 'Fare', 'Pclass', 'Survived', 'Parch', 'Sex', 'Age', 'Ticket', 'Cabin', 'Embarked', 'CCabin']))\n",
" return df.drop(drop_cols, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 353,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_classifier():\n",
" clf = LogisticRegression(C=100, penalty='l2', tol=0.01)\n",
" #clf = RandomForestClassifier()\n",
" #clf = DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=2)\n",
" return clf"
]
},
{
"cell_type": "code",
"execution_count": 354,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def calc_classifier(df, clf=None):\n",
" X_train = extract_feature(df)\n",
" y_train = df['Survived']\n",
" X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.8, random_state=42)\n",
" print('Num of Training Samples: {}'.format(len(X_train)))\n",
" print('Num of Validation Samples: {}'.format(len(X_val)))\n",
" \n",
" if clf is None:\n",
" clf = get_classifier()\n",
" clf.fit(X_train, y_train)\n",
" y_train_pred = clf.predict(X_train)\n",
" y_val_pred = clf.predict(X_val)\n",
" print('Accuracy on Training Set: {:.3f}'.format(accuracy_score(y_train, y_train_pred)))\n",
" print('Accuracy on Validation Set: {:.3f}'.format(accuracy_score(y_val, y_val_pred)))\n",
" cm = confusion_matrix(y_val, y_val_pred)\n",
" return clf"
]
},
{
"cell_type": "code",
"execution_count": 355,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def cross_val(X, y, K, random_state=0, clf=None, ):\n",
" if clf is None:\n",
" clf = get_classifier()\n",
" cv = KFold(len(y), K, shuffle=True, random_state=random_state)\n",
" scores = cross_val_score(clf, X, y, cv=cv)\n",
" print('Scores:', scores)\n",
" print('Mean Score: {0:.3f} (+/-{1:.3f})'.format(scores.mean(), scores.std()*2))\n",
" return scores"
]
},
{
"cell_type": "code",
"execution_count": 356,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_train = extract_feature(df_train)\n",
"y_train = df_train.Survived"
]
},
{
"cell_type": "code",
"execution_count": 389,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Logistic Regression\n",
"('Scores:', array([ 0.78212291, 0.78089888, 0.79213483, 0.8258427 , 0.83707865]))\n",
"Mean Score: 0.804 (+/-0.047)\n",
"Linear Regression\n",
"('Scores:', array([ 0.44844448, 0.38164783, 0.40170421, 0.36806075, 0.48549067]))\n",
"Mean Score: 0.417 (+/-0.087)\n",
"Random Forest\n",
"('Scores:', array([ 0.79888268, 0.79213483, 0.82022472, 0.8258427 , 0.78089888]))\n",
"Mean Score: 0.804 (+/-0.034)\n",
"SVN (L1 regression)\n",
"('Scores:', array([ 0.81005587, 0.82022472, 0.8258427 , 0.81460674, 0.84269663]))\n",
"Mean Score: 0.823 (+/-0.023)\n",
"SVN (L2 regression and L1 loss)\n",
"('Scores:', array([ 0.82122905, 0.82022472, 0.8258427 , 0.8258427 , 0.84269663]))\n",
"Mean Score: 0.827 (+/-0.016)\n",
"SVN (L2)\n",
"('Scores:', array([ 0.82122905, 0.75842697, 0.82022472, 0.85393258, 0.84269663]))\n",
"Mean Score: 0.819 (+/-0.066)\n",
"SVN\n",
"('Scores:', array([ 0.79888268, 0.71910112, 0.75280899, 0.82022472, 0.83707865]))\n",
"Mean Score: 0.786 (+/-0.087)\n",
"Decision Tree\n",
"('Scores:', array([ 0.7877095 , 0.80898876, 0.80898876, 0.80337079, 0.8258427 ]))\n",
"Mean Score: 0.807 (+/-0.024)\n"
]
},
{
"data": {
"text/plain": [
"array([ 0.7877095 , 0.80898876, 0.80898876, 0.80337079, 0.8258427 ])"
]
},
"execution_count": 389,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print \"Logistic Regression\"\n",
"cross_val(X_train, y_train, 5, clf=LogisticRegression(C=0.1, penalty='l2', tol=0.01))\n",
"print \"Linear Regression\"\n",
"cross_val(X_train, y_train, 5, clf=LinearRegression())\n",
"print \"Random Forest\"\n",
"cross_val(X_train, y_train, 5, clf=RandomForestClassifier())\n",
"print \"SVN (L1 regression)\"\n",
"cross_val(X_train, y_train, 5, clf=LinearSVC(penalty='l1', dual=False))\n",
"print \"SVN (L2 regression and L1 loss)\"\n",
"cross_val(X_train, y_train, 5, clf=LinearSVC(penalty='l2', loss='l1'))\n",
"print \"SVN (L2)\"\n",
"cross_val(X_train, y_train, 5, clf=LinearSVC(penalty='l2'))\n",
"print \"SVN\"\n",
"cross_val(X_train, y_train, 5, clf=LinearSVC())\n",
"print \"Decision Tree\"\n",
"cross_val(X_train, y_train, 5, clf=DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=2))"
]
},
{
"cell_type": "code",
"execution_count": 384,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Num of Training Samples: 712\n",
"Num of Validation Samples: 179\n",
"Accuracy on Training Set: 0.833\n",
"Accuracy on Validation Set: 0.821\n"
]
}
],
"source": [
"clf = calc_classifier(df_train, clf=LinearSVC(penalty='l1', dual=False))"
]
},
{
"cell_type": "code",
"execution_count": 385,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title_Master</th>\n",
" <th>title_Miss</th>\n",
" <th>title_Mr</th>\n",
" <th>title_Mrs</th>\n",
" <th>AgeFill</th>\n",
" <th>pclass_1</th>\n",
" <th>pclass_2</th>\n",
" <th>pclass_3</th>\n",
" <th>sibsp_0</th>\n",
" <th>sibsp_1</th>\n",
" <th>...</th>\n",
" <th>sibsp_3</th>\n",
" <th>sibsp_4</th>\n",
" <th>parch_0</th>\n",
" <th>parch_1</th>\n",
" <th>parch_2</th>\n",
" <th>parch_3</th>\n",
" <th>parch_4</th>\n",
" <th>male</th>\n",
" <th>female</th>\n",
" <th>FareFill</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 22</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 1.981001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 38</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 4.266662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 26</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 2.070022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 35</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 3.972177</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 35</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2.085672</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" title_Master title_Miss title_Mr title_Mrs AgeFill pclass_1 pclass_2 \\\n",
"0 0 0 1 0 22 0 0 \n",
"1 0 0 0 1 38 1 0 \n",
"2 0 1 0 0 26 0 0 \n",
"3 0 0 0 1 35 1 0 \n",
"4 0 0 1 0 35 0 0 \n",
"\n",
" pclass_3 sibsp_0 sibsp_1 ... sibsp_3 sibsp_4 parch_0 parch_1 \\\n",
"0 1 0 1 ... 0 0 1 0 \n",
"1 0 0 1 ... 0 0 1 0 \n",
"2 1 1 0 ... 0 0 1 0 \n",
"3 0 0 1 ... 0 0 1 0 \n",
"4 1 1 0 ... 0 0 1 0 \n",
"\n",
" parch_2 parch_3 parch_4 male female FareFill \n",
"0 0 0 0 0 1 1.981001 \n",
"1 0 0 0 1 0 4.266662 \n",
"2 0 0 0 1 0 2.070022 \n",
"3 0 0 0 1 0 3.972177 \n",
"4 0 0 0 0 1 2.085672 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 385,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 386,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"Y = extract_feature(df_test)\n",
"df_test['Survived'] = clf.predict(Y)\n",
"submit_data = df_test[['PassengerId', 'Survived']]"
]
},
{
"cell_type": "code",
"execution_count": 387,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title_Master</th>\n",
" <th>title_Miss</th>\n",
" <th>title_Mr</th>\n",
" <th>title_Mrs</th>\n",
" <th>AgeFill</th>\n",
" <th>pclass_1</th>\n",
" <th>pclass_2</th>\n",
" <th>pclass_3</th>\n",
" <th>sibsp_0</th>\n",
" <th>sibsp_1</th>\n",
" <th>...</th>\n",
" <th>sibsp_3</th>\n",
" <th>sibsp_4</th>\n",
" <th>parch_0</th>\n",
" <th>parch_1</th>\n",
" <th>parch_2</th>\n",
" <th>parch_3</th>\n",
" <th>parch_4</th>\n",
" <th>male</th>\n",
" <th>female</th>\n",
" <th>FareFill</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 34.5</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2.057860</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 47.0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 1.945910</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 62.0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2.270836</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 27.0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2.159003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 22.0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td>...</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 2.508582</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" title_Master title_Miss title_Mr title_Mrs AgeFill pclass_1 pclass_2 \\\n",
"0 0 0 1 0 34.5 0 0 \n",
"1 0 0 0 1 47.0 0 0 \n",
"2 0 0 1 0 62.0 0 1 \n",
"3 0 0 1 0 27.0 0 0 \n",
"4 0 0 0 1 22.0 0 0 \n",
"\n",
" pclass_3 sibsp_0 sibsp_1 ... sibsp_3 sibsp_4 parch_0 parch_1 \\\n",
"0 1 1 0 ... 0 0 1 0 \n",
"1 1 0 1 ... 0 0 1 0 \n",
"2 0 1 0 ... 0 0 1 0 \n",
"3 1 1 0 ... 0 0 1 0 \n",
"4 1 0 1 ... 0 0 0 1 \n",
"\n",
" parch_2 parch_3 parch_4 male female FareFill \n",
"0 0 0 0 0 1 2.057860 \n",
"1 0 0 0 1 0 1.945910 \n",
"2 0 0 0 0 1 2.270836 \n",
"3 0 0 0 0 1 2.159003 \n",
"4 0 0 0 1 0 2.508582 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 387,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Y.head()"
]
},
{
"cell_type": "code",
"execution_count": 388,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"submit_data.to_csv('./submit_20150312_3.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment