Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mccurcio/e9b23eb18f0bc932fa1e8657d75b8c7d to your computer and use it in GitHub Desktop.
Save mccurcio/e9b23eb18f0bc932fa1e8657d75b8c7d to your computer and use it in GitHub Desktop.
test
{
"cells": [
{
"metadata": {},
"id": "7c4191ef",
"cell_type": "markdown",
"source": "## Introduction to Pandas dataframes"
},
{
"metadata": {
"trusted": true
},
"id": "d7d4e532",
"cell_type": "code",
"source": "import pandas as pd\nurl = \"https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/houston_claims.csv\"\nclaims_df = pd.read_csv(url, index_col = 0)\n\nclaims_df[:3]",
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 4,
"data": {
"text/plain": " reportedCity dateOfLoss elevatedBuildingIndicator floodZone \\\n0 HOUSTON 2017-08-27T00:00:00.000Z False X \n1 HOUSTON 2008-09-12T00:00:00.000Z False X \n2 HOUSTON 2004-06-29T00:00:00.000Z False X \n\n latitude longitude lowestFloodElevation amountPaidOnBuildingClaim \\\n0 29.7 -95.5 NaN 195857.43 \n1 29.5 -95.1 NaN 0.00 \n2 29.8 -95.6 NaN 1420.89 \n\n amountPaidOnContentsClaim yearofLoss reportedZipcode \\\n0 0.0 2017-01-01T00:00:00.000Z 77096 \n1 0.0 2008-01-01T00:00:00.000Z 77058 \n2 0.0 2004-01-01T00:00:00.000Z 77042 \n\n id \n0 5e398d6774cbd479fc898dea \n1 5e398d6774cbd479fc898dfc \n2 5e398d6774cbd479fc898e4b ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>reportedCity</th>\n <th>dateOfLoss</th>\n <th>elevatedBuildingIndicator</th>\n <th>floodZone</th>\n <th>latitude</th>\n <th>longitude</th>\n <th>lowestFloodElevation</th>\n <th>amountPaidOnBuildingClaim</th>\n <th>amountPaidOnContentsClaim</th>\n <th>yearofLoss</th>\n <th>reportedZipcode</th>\n <th>id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>HOUSTON</td>\n <td>2017-08-27T00:00:00.000Z</td>\n <td>False</td>\n <td>X</td>\n <td>29.7</td>\n <td>-95.5</td>\n <td>NaN</td>\n <td>195857.43</td>\n <td>0.0</td>\n <td>2017-01-01T00:00:00.000Z</td>\n <td>77096</td>\n <td>5e398d6774cbd479fc898dea</td>\n </tr>\n <tr>\n <th>1</th>\n <td>HOUSTON</td>\n <td>2008-09-12T00:00:00.000Z</td>\n <td>False</td>\n <td>X</td>\n <td>29.5</td>\n <td>-95.1</td>\n <td>NaN</td>\n <td>0.00</td>\n <td>0.0</td>\n <td>2008-01-01T00:00:00.000Z</td>\n <td>77058</td>\n <td>5e398d6774cbd479fc898dfc</td>\n </tr>\n <tr>\n <th>2</th>\n <td>HOUSTON</td>\n <td>2004-06-29T00:00:00.000Z</td>\n <td>False</td>\n <td>X</td>\n <td>29.8</td>\n <td>-95.6</td>\n <td>NaN</td>\n <td>1420.89</td>\n <td>0.0</td>\n <td>2004-01-01T00:00:00.000Z</td>\n <td>77042</td>\n <td>5e398d6774cbd479fc898e4b</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "13838d76",
"cell_type": "code",
"source": "type(claims_df)",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "pandas.core.frame.DataFrame"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "8a300702",
"cell_type": "code",
"source": "type(loss_date_ser)",
"execution_count": 6,
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'loss_date_ser' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [6]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mtype\u001b[39m(\u001b[43mloss_date_ser\u001b[49m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'loss_date_ser' is not defined"
]
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "595c4788",
"cell_type": "code",
"source": "loss_date_ser.to_list()[:2]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "44e3e5a2",
"cell_type": "code",
"source": "claims_df[:2]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "96b54e52",
"cell_type": "code",
"source": "claims_df.index",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"id": "95dbb457",
"cell_type": "markdown",
"source": "### Working with columns"
},
{
"metadata": {
"trusted": false
},
"id": "74235562",
"cell_type": "code",
"source": "import pandas as pd\nurl = \"https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/houston_claims.csv\"\ndf = pd.read_csv(url, index_col = 0)\ndf[:1]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "29beb86b",
"cell_type": "code",
"source": "df.columns",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "266cdda6",
"cell_type": "code",
"source": "df['amountPaidOnBuildingClaim']",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "bbc31803",
"cell_type": "code",
"source": "df.amountPaidOnBuildingClaim",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"id": "25246655",
"cell_type": "markdown",
"source": "### Selecting mulitple columns"
},
{
"metadata": {
"trusted": false
},
"id": "a8c28dd4",
"cell_type": "code",
"source": "df.columns",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "548cd9b8",
"cell_type": "code",
"source": "columns = ['reportedZipcode', 'amountPaidOnBuildingClaim']\nselected_df = df[columns]\nselected_df[:3]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "bb175c47",
"cell_type": "code",
"source": "type(df[['latitude', 'longitude']])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "23a32258",
"cell_type": "code",
"source": "type(df['amountPaidOnBuildingClaim'])",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"id": "bbed9e49",
"cell_type": "markdown",
"source": "### Dropping Columns"
},
{
"metadata": {
"trusted": false
},
"id": "6eb11bf4",
"cell_type": "code",
"source": "import pandas as pd\nurl = \"https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/houston_claims.csv\"\ndf = pd.read_csv(url, index_col = 0)\ndf[:2]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "ac503932",
"cell_type": "code",
"source": "df.columns",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "f4345456",
"cell_type": "code",
"source": "df_dropped = df.drop(columns = ['reportedCity', 'dateOfLoss', 'elevatedBuildingIndicator'])\ndf_dropped[:4]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "e2b90c32",
"cell_type": "code",
"source": "loss_date = df['dateOfLoss']\nloss_date[:3]",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"id": "dbd6ce0f",
"cell_type": "markdown",
"source": "## Pandas Selecting Columns Lab"
},
{
"metadata": {
"trusted": false
},
"id": "3dd7de50",
"cell_type": "code",
"source": "url = 'https://raw.githubusercontent.com/jigsawlabs-student/pandas-free-curriculum/master/imdb_movies.csv'\n\nimdb_movies = pd.read_csv(url, index_col = 0)\nimdb_movies[:2]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "6cdb0a83",
"cell_type": "code",
"source": "df_cols = ['title', 'genre', 'budget', 'runtime', 'year', 'month', 'revenue']\ndf_cols",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "e2a70065",
"cell_type": "code",
"source": "imdb_movies.columns",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "d1f765d0",
"cell_type": "code",
"source": "imdb_movies.dtypes",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "ce8e32f6",
"cell_type": "code",
"source": "X = imdb_movies['budget', 'runtime', 'year', 'month']",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "2216421f",
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.12",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "test",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment