Created
January 21, 2022 17:26
-
-
Save mccurcio/e9b23eb18f0bc932fa1e8657d75b8c7d to your computer and use it in GitHub Desktop.
test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"id": "7c4191ef", | |
"cell_type": "markdown", | |
"source": "## Introduction to Pandas dataframes" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "d7d4e532", | |
"cell_type": "code", | |
"source": "import pandas as pd\nurl = \"https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/houston_claims.csv\"\nclaims_df = pd.read_csv(url, index_col = 0)\n\nclaims_df[:3]", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": " reportedCity dateOfLoss elevatedBuildingIndicator floodZone \\\n0 HOUSTON 2017-08-27T00:00:00.000Z False X \n1 HOUSTON 2008-09-12T00:00:00.000Z False X \n2 HOUSTON 2004-06-29T00:00:00.000Z False X \n\n latitude longitude lowestFloodElevation amountPaidOnBuildingClaim \\\n0 29.7 -95.5 NaN 195857.43 \n1 29.5 -95.1 NaN 0.00 \n2 29.8 -95.6 NaN 1420.89 \n\n amountPaidOnContentsClaim yearofLoss reportedZipcode \\\n0 0.0 2017-01-01T00:00:00.000Z 77096 \n1 0.0 2008-01-01T00:00:00.000Z 77058 \n2 0.0 2004-01-01T00:00:00.000Z 77042 \n\n id \n0 5e398d6774cbd479fc898dea \n1 5e398d6774cbd479fc898dfc \n2 5e398d6774cbd479fc898e4b ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>reportedCity</th>\n <th>dateOfLoss</th>\n <th>elevatedBuildingIndicator</th>\n <th>floodZone</th>\n <th>latitude</th>\n <th>longitude</th>\n <th>lowestFloodElevation</th>\n <th>amountPaidOnBuildingClaim</th>\n <th>amountPaidOnContentsClaim</th>\n <th>yearofLoss</th>\n <th>reportedZipcode</th>\n <th>id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>HOUSTON</td>\n <td>2017-08-27T00:00:00.000Z</td>\n <td>False</td>\n <td>X</td>\n <td>29.7</td>\n <td>-95.5</td>\n <td>NaN</td>\n <td>195857.43</td>\n <td>0.0</td>\n <td>2017-01-01T00:00:00.000Z</td>\n <td>77096</td>\n <td>5e398d6774cbd479fc898dea</td>\n </tr>\n <tr>\n <th>1</th>\n <td>HOUSTON</td>\n <td>2008-09-12T00:00:00.000Z</td>\n <td>False</td>\n <td>X</td>\n <td>29.5</td>\n <td>-95.1</td>\n <td>NaN</td>\n <td>0.00</td>\n <td>0.0</td>\n <td>2008-01-01T00:00:00.000Z</td>\n <td>77058</td>\n <td>5e398d6774cbd479fc898dfc</td>\n </tr>\n <tr>\n <th>2</th>\n <td>HOUSTON</td>\n <td>2004-06-29T00:00:00.000Z</td>\n <td>False</td>\n <td>X</td>\n <td>29.8</td>\n <td>-95.6</td>\n <td>NaN</td>\n <td>1420.89</td>\n <td>0.0</td>\n <td>2004-01-01T00:00:00.000Z</td>\n <td>77042</td>\n <td>5e398d6774cbd479fc898e4b</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "13838d76", | |
"cell_type": "code", | |
"source": "type(claims_df)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "pandas.core.frame.DataFrame" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "8a300702", | |
"cell_type": "code", | |
"source": "type(loss_date_ser)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "NameError", | |
"evalue": "name 'loss_date_ser' is not defined", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
"Input \u001b[0;32mIn [6]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mtype\u001b[39m(\u001b[43mloss_date_ser\u001b[49m)\n", | |
"\u001b[0;31mNameError\u001b[0m: name 'loss_date_ser' is not defined" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "595c4788", | |
"cell_type": "code", | |
"source": "loss_date_ser.to_list()[:2]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "44e3e5a2", | |
"cell_type": "code", | |
"source": "claims_df[:2]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "96b54e52", | |
"cell_type": "code", | |
"source": "claims_df.index", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"id": "95dbb457", | |
"cell_type": "markdown", | |
"source": "### Working with columns" | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "74235562", | |
"cell_type": "code", | |
"source": "import pandas as pd\nurl = \"https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/houston_claims.csv\"\ndf = pd.read_csv(url, index_col = 0)\ndf[:1]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "29beb86b", | |
"cell_type": "code", | |
"source": "df.columns", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "266cdda6", | |
"cell_type": "code", | |
"source": "df['amountPaidOnBuildingClaim']", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "bbc31803", | |
"cell_type": "code", | |
"source": "df.amountPaidOnBuildingClaim", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"id": "25246655", | |
"cell_type": "markdown", | |
"source": "### Selecting mulitple columns" | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "a8c28dd4", | |
"cell_type": "code", | |
"source": "df.columns", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "548cd9b8", | |
"cell_type": "code", | |
"source": "columns = ['reportedZipcode', 'amountPaidOnBuildingClaim']\nselected_df = df[columns]\nselected_df[:3]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "bb175c47", | |
"cell_type": "code", | |
"source": "type(df[['latitude', 'longitude']])", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "23a32258", | |
"cell_type": "code", | |
"source": "type(df['amountPaidOnBuildingClaim'])", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"id": "bbed9e49", | |
"cell_type": "markdown", | |
"source": "### Dropping Columns" | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "6eb11bf4", | |
"cell_type": "code", | |
"source": "import pandas as pd\nurl = \"https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/houston_claims.csv\"\ndf = pd.read_csv(url, index_col = 0)\ndf[:2]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "ac503932", | |
"cell_type": "code", | |
"source": "df.columns", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "f4345456", | |
"cell_type": "code", | |
"source": "df_dropped = df.drop(columns = ['reportedCity', 'dateOfLoss', 'elevatedBuildingIndicator'])\ndf_dropped[:4]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "e2b90c32", | |
"cell_type": "code", | |
"source": "loss_date = df['dateOfLoss']\nloss_date[:3]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"id": "dbd6ce0f", | |
"cell_type": "markdown", | |
"source": "## Pandas Selecting Columns Lab" | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "3dd7de50", | |
"cell_type": "code", | |
"source": "url = 'https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/imdb_movies.csv'\n\nimdb_movies = pd.read_csv(url, index_col = 0)\nimdb_movies[:2]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "6cdb0a83", | |
"cell_type": "code", | |
"source": "df_cols = ['title', 'genre', 'budget', 'runtime', 'year', 'month', 'revenue']\ndf_cols", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "e2a70065", | |
"cell_type": "code", | |
"source": "imdb_movies.columns", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "d1f765d0", | |
"cell_type": "code", | |
"source": "imdb_movies.dtypes", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "ce8e32f6", | |
"cell_type": "code", | |
"source": "X = imdb_movies['budget', 'runtime', 'year', 'month']", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "2216421f", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.8.12", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "test", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment