Skip to content

Instantly share code, notes, and snippets.

@maecha
Last active March 5, 2018 11:45
Show Gist options
  • Save maecha/43147125b888f512e15f733eca5d077e to your computer and use it in GitHub Desktop.
Save maecha/43147125b888f512e15f733eca5d077e to your computer and use it in GitHub Desktop.
The first attack Titanic: Machine Learning from Disaster for Competition
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd\nimport numpy as np\nfrom sklearn import tree",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "gender_map = {'female': 0, 'male': 1}\nfeature_fields = ['Pclass', 'Sex', 'Age', 'Fare']",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"scrolled": true,
"trusted": true
},
"cell_type": "code",
"source": "train_df = pd.read_csv('data/train.csv')\ntest_df = pd.read_csv('data/test.csv')",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "train_df['Sex'] = train_df['Sex'].map(gender_map).astype(int)\ntrain_df['Age'] = train_df['Age'].fillna(train_df['Age'].median())",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "test_df['Sex'] = test_df['Sex'].map(gender_map).astype(int)\ntest_df['Age'] = test_df['Age'].fillna(test_df['Age'].median())\ntest_df['Fare'] = test_df['Fare'].fillna(test_df['Fare'].median())",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "target = train_df['Survived'].values\nfeatures = train_df[feature_fields].values\ntest_features = test_df[feature_fields].values",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "dec_tree = tree.DecisionTreeClassifier(min_samples_split=4)\ndec_tree = dec_tree.fit(features, target)",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "prediction = dec_tree.predict(test_features)",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "pindex = test_df['PassengerId'].values\nsolution = pd.DataFrame(prediction, pindex, columns = ['Survived'])\nsolution.to_csv('solution.csv', index_label = 'PassengerId')",
"execution_count": 9,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "43147125b888f512e15f733eca5d077e",
"data": {
"description": "The first attack Titanic: Machine Learning from Disaster for Competition",
"public": true
}
},
"_draft": {
"nbviewer_url": "https://gist.github.com/43147125b888f512e15f733eca5d077e"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment