Last active
March 5, 2018 11:45
-
-
Save maecha/43147125b888f512e15f733eca5d077e to your computer and use it in GitHub Desktop.
The first attack Titanic: Machine Learning from Disaster for Competition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd\nimport numpy as np\nfrom sklearn import tree", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "gender_map = {'female': 0, 'male': 1}\nfeature_fields = ['Pclass', 'Sex', 'Age', 'Fare']", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"scrolled": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_df = pd.read_csv('data/train.csv')\ntest_df = pd.read_csv('data/test.csv')", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_df['Sex'] = train_df['Sex'].map(gender_map).astype(int)\ntrain_df['Age'] = train_df['Age'].fillna(train_df['Age'].median())", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "test_df['Sex'] = test_df['Sex'].map(gender_map).astype(int)\ntest_df['Age'] = test_df['Age'].fillna(test_df['Age'].median())\ntest_df['Fare'] = test_df['Fare'].fillna(test_df['Fare'].median())", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "target = train_df['Survived'].values\nfeatures = train_df[feature_fields].values\ntest_features = test_df[feature_fields].values", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "dec_tree = tree.DecisionTreeClassifier(min_samples_split=4)\ndec_tree = dec_tree.fit(features, target)", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "prediction = dec_tree.predict(test_features)", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": true, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "pindex = test_df['PassengerId'].values\nsolution = pd.DataFrame(prediction, pindex, columns = ['Survived'])\nsolution.to_csv('solution.csv', index_label = 'PassengerId')", | |
"execution_count": 9, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.3", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "43147125b888f512e15f733eca5d077e", | |
"data": { | |
"description": "The first attack Titanic: Machine Learning from Disaster for Competition", | |
"public": true | |
} | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/43147125b888f512e15f733eca5d077e" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment