-
-
Save triestpa/257c261111b03fede4e2580017a21727 to your computer and use it in GitHub Desktop.
Data Cleaning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"pclass 1309\n", | |
"survived 1309\n", | |
"name 1309\n", | |
"sex 1309\n", | |
"age 1046\n", | |
"sibsp 1309\n", | |
"parch 1309\n", | |
"ticket 1309\n", | |
"fare 1308\n", | |
"cabin 295\n", | |
"embarked 1307\n", | |
"boat 486\n", | |
"body 121\n", | |
"home.dest 745\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"titanic_df.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"titanic_df = titanic_df.drop(['body','cabin','boat'], axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"titanic_df[\"home.dest\"] = titanic_df[\"home.dest\"].fillna(\"NA\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"titanic_df = titanic_df.dropna()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"pclass 1043\n", | |
"survived 1043\n", | |
"name 1043\n", | |
"sex 1043\n", | |
"age 1043\n", | |
"sibsp 1043\n", | |
"parch 1043\n", | |
"ticket 1043\n", | |
"fare 1043\n", | |
"embarked 1043\n", | |
"home.dest 1043\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"titanic_df.count()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment