Skip to content

Instantly share code, notes, and snippets.

@dhinojosa
Created September 11, 2019 20:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dhinojosa/03884a1a4117573c1cbc45e673696e15 to your computer and use it in GitHub Desktop.
Save dhinojosa/03884a1a4117573c1cbc45e673696e15 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Demo - More! Pandas"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.25\n",
"1 0.50\n",
"2 0.75\n",
"3 1.00\n",
"dtype: float64"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"data = pd.Series([0.25, 0.5, 0.75, 1.0])\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.25, 0.5 , 0.75, 1. ])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# view raw values\n",
"data.values"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=4, step=1)"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# view index\n",
"data.index"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.5"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we can index, just like a standard Python list\n",
"data[1]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 0.50\n",
"2 0.75\n",
"dtype: float64"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Because index is a RangeIndex, we can do\n",
"# normal slicing from offset 1 to offset 2\n",
"# \n",
"data[1:3]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"# create a series with non-integer indices\n",
"data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 0.25\n",
"b 0.50\n",
"c 0.75\n",
"d 1.00\n",
"dtype: float64"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'c', 'd'], dtype='object')"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.index"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.75"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# similar to dict indexing\n",
"data['c']"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2 0.25\n",
"5 0.50\n",
"3 0.75\n",
"7 1.00\n",
"dtype: float64"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# if you are particularly perverse...\n",
"data = pd.Series([0.25, 0.5, 0.75, 1.0], index=[2, 5, 3, 7])\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.iloc[3]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.75"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[3]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Implicit and Explicit Indexing"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Python a\n",
"C++ b\n",
"Ruby c\n",
"Java d\n",
"JavaScript e\n",
"Haskell f\n",
"dtype: object"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"data = pd.Series(['a', 'b', 'c', 'd', 'e', 'f'], index=['Python', 'C++', 'Ruby', 'Java', 'JavaScript', 'Haskell'])\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Python', 'C++', 'Ruby', 'Java', 'JavaScript', 'Haskell'], dtype='object')"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# index is no longer a range\n",
"# indices are like keys in a dictionary\n",
"data.index"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'b'"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[1] # index by numeric index, not offset!"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"C++ b\n",
"Ruby c\n",
"dtype: object"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# but slicing still works as before\n",
"data[1:3] # offset 1...offset 2"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Python a\n",
"C++ b\n",
"Ruby c\n",
"Java d\n",
"JavaScript e\n",
"Haskell f\n",
"dtype: object"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'d'"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Java']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# __`loc`__ and __`iloc`__\n",
"* .loc is a __*label*__-based indexing method\n",
"* .iloc is an __*integer*__-based indexing method"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Python a\n",
"C++ b\n",
"Ruby c\n",
"Java d\n",
"JavaScript e\n",
"Haskell f\n",
"dtype: object"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'a'"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc['Python'] # 1 here is a label, not an offset"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Python a\n",
"C++ b\n",
"Ruby c\n",
"Java d\n",
"JavaScript e\n",
"dtype: object"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc['Python':'JavaScript'] # 1 and 3 are labels, not integer offsets"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'b'"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.iloc[1] # 1 is an offset, not a label"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"C++ b\n",
"Ruby c\n",
"dtype: object"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.iloc[1:3] # 1..3 is a Python slice based on offsets"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Sales Data"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Opportunity Number</th>\n",
" <th>Supplies Subgroup</th>\n",
" <th>Supplies Group</th>\n",
" <th>Region</th>\n",
" <th>Route To Market</th>\n",
" <th>Elapsed Days In Sales Stage</th>\n",
" <th>Opportunity Result</th>\n",
" <th>Sales Stage Change Count</th>\n",
" <th>Total Days Identified Through Closing</th>\n",
" <th>Total Days Identified Through Qualified</th>\n",
" <th>Opportunity Amount USD</th>\n",
" <th>Client Size By Revenue</th>\n",
" <th>Client Size By Employee Count</th>\n",
" <th>Revenue From Client Past Two Years</th>\n",
" <th>Competitor Type</th>\n",
" <th>Ratio Days Identified To Total Days</th>\n",
" <th>Ratio Days Validated To Total Days</th>\n",
" <th>Ratio Days Qualified To Total Days</th>\n",
" <th>Deal Size Category</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1641984</td>\n",
" <td>Exterior Accessories</td>\n",
" <td>Car Accessories</td>\n",
" <td>Northwest</td>\n",
" <td>Fields Sales</td>\n",
" <td>76</td>\n",
" <td>Won</td>\n",
" <td>13</td>\n",
" <td>104</td>\n",
" <td>101</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>0.69636</td>\n",
" <td>0.113985</td>\n",
" <td>0.154215</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1658010</td>\n",
" <td>Exterior Accessories</td>\n",
" <td>Car Accessories</td>\n",
" <td>Pacific</td>\n",
" <td>Reseller</td>\n",
" <td>63</td>\n",
" <td>Loss</td>\n",
" <td>2</td>\n",
" <td>163</td>\n",
" <td>163</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>0.00000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1674737</td>\n",
" <td>Motorcycle Parts</td>\n",
" <td>Performance &amp; Non-auto</td>\n",
" <td>Pacific</td>\n",
" <td>Reseller</td>\n",
" <td>24</td>\n",
" <td>Won</td>\n",
" <td>7</td>\n",
" <td>82</td>\n",
" <td>82</td>\n",
" <td>7750</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>1.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1675224</td>\n",
" <td>Shelters &amp; RV</td>\n",
" <td>Performance &amp; Non-auto</td>\n",
" <td>NaN</td>\n",
" <td>Reseller</td>\n",
" <td>16</td>\n",
" <td>Loss</td>\n",
" <td>5</td>\n",
" <td>124</td>\n",
" <td>124</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Known</td>\n",
" <td>1.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1689785</td>\n",
" <td>Exterior Accessories</td>\n",
" <td>Car Accessories</td>\n",
" <td>Pacific</td>\n",
" <td>Reseller</td>\n",
" <td>69</td>\n",
" <td>Loss</td>\n",
" <td>11</td>\n",
" <td>91</td>\n",
" <td>13</td>\n",
" <td>69756</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>0.00000</td>\n",
" <td>0.141125</td>\n",
" <td>0.000000</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Opportunity Number Supplies Subgroup Supplies Group \\\n",
"0 1641984 Exterior Accessories Car Accessories \n",
"1 1658010 Exterior Accessories Car Accessories \n",
"2 1674737 Motorcycle Parts Performance & Non-auto \n",
"3 1675224 Shelters & RV Performance & Non-auto \n",
"4 1689785 Exterior Accessories Car Accessories \n",
"\n",
" Region Route To Market Elapsed Days In Sales Stage Opportunity Result \\\n",
"0 Northwest Fields Sales 76 Won \n",
"1 Pacific Reseller 63 Loss \n",
"2 Pacific Reseller 24 Won \n",
"3 NaN Reseller 16 Loss \n",
"4 Pacific Reseller 69 Loss \n",
"\n",
" Sales Stage Change Count Total Days Identified Through Closing \\\n",
"0 13 104 \n",
"1 2 163 \n",
"2 7 82 \n",
"3 5 124 \n",
"4 11 91 \n",
"\n",
" Total Days Identified Through Qualified Opportunity Amount USD \\\n",
"0 101 0 \n",
"1 163 0 \n",
"2 82 7750 \n",
"3 124 0 \n",
"4 13 69756 \n",
"\n",
" Client Size By Revenue Client Size By Employee Count \\\n",
"0 5 5 \n",
"1 3 5 \n",
"2 1 1 \n",
"3 1 1 \n",
"4 1 1 \n",
"\n",
" Revenue From Client Past Two Years Competitor Type \\\n",
"0 0 Unknown \n",
"1 0 Unknown \n",
"2 0 Unknown \n",
"3 0 Known \n",
"4 0 Unknown \n",
"\n",
" Ratio Days Identified To Total Days Ratio Days Validated To Total Days \\\n",
"0 0.69636 0.113985 \n",
"1 0.00000 1.000000 \n",
"2 1.00000 0.000000 \n",
"3 1.00000 0.000000 \n",
"4 0.00000 0.141125 \n",
"\n",
" Ratio Days Qualified To Total Days Deal Size Category \n",
"0 0.154215 1 \n",
"1 0.000000 1 \n",
"2 0.000000 1 \n",
"3 0.000000 1 \n",
"4 0.000000 4 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat = pd.read_csv(\"data/WA_Fn-UseC_-Sales-Win-Loss.csv\")\n",
"dat.head()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Opportunity Number', 'Supplies Subgroup', 'Supplies Group', 'Region',\n",
" 'Route To Market', 'Elapsed Days In Sales Stage', 'Opportunity Result',\n",
" 'Sales Stage Change Count', 'Total Days Identified Through Closing',\n",
" 'Total Days Identified Through Qualified', 'Opportunity Amount USD',\n",
" 'Client Size By Revenue', 'Client Size By Employee Count',\n",
" 'Revenue From Client Past Two Years', 'Competitor Type',\n",
" 'Ratio Days Identified To Total Days',\n",
" 'Ratio Days Validated To Total Days',\n",
" 'Ratio Days Qualified To Total Days', 'Deal Size Category'],\n",
" dtype='object')"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat.columns"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Won\n",
"1 Loss\n",
"2 Won\n",
"3 Loss\n",
"4 Loss\n",
"5 Loss\n",
"6 Won\n",
"7 Loss\n",
"8 Loss\n",
"9 Loss\n",
"10 Loss\n",
"11 Loss\n",
"12 Loss\n",
"13 Loss\n",
"14 Loss\n",
"15 Won\n",
"16 Loss\n",
"17 Loss\n",
"18 Loss\n",
"19 Loss\n",
"20 Loss\n",
"21 Loss\n",
"22 Loss\n",
"23 Loss\n",
"24 Won\n",
"25 Loss\n",
"26 Loss\n",
"27 Loss\n",
"28 Loss\n",
"29 Loss\n",
" ... \n",
"77995 Loss\n",
"77996 Won\n",
"77997 Won\n",
"77998 Loss\n",
"77999 Loss\n",
"78000 Loss\n",
"78001 Won\n",
"78002 Loss\n",
"78003 Won\n",
"78004 Won\n",
"78005 Loss\n",
"78006 Won\n",
"78007 Loss\n",
"78008 Won\n",
"78009 Loss\n",
"78010 Loss\n",
"78011 Won\n",
"78012 Won\n",
"78013 Won\n",
"78014 Won\n",
"78015 Loss\n",
"78016 Won\n",
"78017 Won\n",
"78018 Won\n",
"78019 Won\n",
"78020 Loss\n",
"78021 Won\n",
"78022 Loss\n",
"78023 Loss\n",
"78024 Loss\n",
"Name: Opportunity Result, Length: 78025, dtype: object"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat['Opportunity Result']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Counting Values"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Loss 60398\n",
"Won 17627\n",
"Name: Opportunity Result, dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat['Opportunity Result'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Car Accessories 49810\n",
"Performance & Non-auto 27325\n",
"Tires & Wheels 609\n",
"Car Electronics 281\n",
"Name: Supplies Group, dtype: int64"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat['Supplies Group'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16 5010\n",
"44 2388\n",
"62 1738\n",
"7 1629\n",
"23 1455\n",
"37 1412\n",
"45 1238\n",
"24 1233\n",
"35 1226\n",
"18 1220\n",
"89 1184\n",
"28 1173\n",
"26 1135\n",
"54 1124\n",
"27 1115\n",
"63 1107\n",
"49 1098\n",
"9 1058\n",
"91 1051\n",
"74 1020\n",
"81 976\n",
"64 976\n",
"41 952\n",
"65 945\n",
"0 934\n",
"47 922\n",
"73 912\n",
"17 905\n",
"84 899\n",
"90 870\n",
" ... \n",
"121 6\n",
"102 6\n",
"103 5\n",
"118 5\n",
"128 5\n",
"115 5\n",
"116 4\n",
"122 4\n",
"112 4\n",
"129 4\n",
"113 3\n",
"104 3\n",
"114 3\n",
"123 2\n",
"106 2\n",
"126 2\n",
"105 2\n",
"125 2\n",
"130 2\n",
"210 2\n",
"124 1\n",
"135 1\n",
"131 1\n",
"132 1\n",
"134 1\n",
"148 1\n",
"108 1\n",
"137 1\n",
"138 1\n",
"127 1\n",
"Name: Elapsed Days In Sales Stage, Length: 138, dtype: int64"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat['Elapsed Days In Sales Stage'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Top Five Values"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Motorcycle Parts 15174\n",
"Exterior Accessories 13876\n",
"Garage & Car Care 9733\n",
"Shelters & RV 9606\n",
"Batteries & Accessories 9192\n",
"Name: Supplies Subgroup, dtype: int64"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat['Supplies Subgroup'].value_counts()[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Extracting Columns"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Opportunity Number</th>\n",
" <th>Supplies Subgroup</th>\n",
" <th>Supplies Group</th>\n",
" <th>Region</th>\n",
" <th>Route To Market</th>\n",
" <th>Elapsed Days In Sales Stage</th>\n",
" <th>Opportunity Result</th>\n",
" <th>Sales Stage Change Count</th>\n",
" <th>Total Days Identified Through Closing</th>\n",
" <th>Total Days Identified Through Qualified</th>\n",
" <th>Opportunity Amount USD</th>\n",
" <th>Client Size By Revenue</th>\n",
" <th>Client Size By Employee Count</th>\n",
" <th>Revenue From Client Past Two Years</th>\n",
" <th>Competitor Type</th>\n",
" <th>Ratio Days Identified To Total Days</th>\n",
" <th>Ratio Days Validated To Total Days</th>\n",
" <th>Ratio Days Qualified To Total Days</th>\n",
" <th>Deal Size Category</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1641984</td>\n",
" <td>Exterior Accessories</td>\n",
" <td>Car Accessories</td>\n",
" <td>Northwest</td>\n",
" <td>Fields Sales</td>\n",
" <td>76</td>\n",
" <td>Won</td>\n",
" <td>13</td>\n",
" <td>104</td>\n",
" <td>101</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>0.69636</td>\n",
" <td>0.113985</td>\n",
" <td>0.154215</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1658010</td>\n",
" <td>Exterior Accessories</td>\n",
" <td>Car Accessories</td>\n",
" <td>Pacific</td>\n",
" <td>Reseller</td>\n",
" <td>63</td>\n",
" <td>Loss</td>\n",
" <td>2</td>\n",
" <td>163</td>\n",
" <td>163</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>0.00000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1674737</td>\n",
" <td>Motorcycle Parts</td>\n",
" <td>Performance &amp; Non-auto</td>\n",
" <td>Pacific</td>\n",
" <td>Reseller</td>\n",
" <td>24</td>\n",
" <td>Won</td>\n",
" <td>7</td>\n",
" <td>82</td>\n",
" <td>82</td>\n",
" <td>7750</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>1.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1675224</td>\n",
" <td>Shelters &amp; RV</td>\n",
" <td>Performance &amp; Non-auto</td>\n",
" <td>NaN</td>\n",
" <td>Reseller</td>\n",
" <td>16</td>\n",
" <td>Loss</td>\n",
" <td>5</td>\n",
" <td>124</td>\n",
" <td>124</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Known</td>\n",
" <td>1.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1689785</td>\n",
" <td>Exterior Accessories</td>\n",
" <td>Car Accessories</td>\n",
" <td>Pacific</td>\n",
" <td>Reseller</td>\n",
" <td>69</td>\n",
" <td>Loss</td>\n",
" <td>11</td>\n",
" <td>91</td>\n",
" <td>13</td>\n",
" <td>69756</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Unknown</td>\n",
" <td>0.00000</td>\n",
" <td>0.141125</td>\n",
" <td>0.000000</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Opportunity Number Supplies Subgroup Supplies Group \\\n",
"0 1641984 Exterior Accessories Car Accessories \n",
"1 1658010 Exterior Accessories Car Accessories \n",
"2 1674737 Motorcycle Parts Performance & Non-auto \n",
"3 1675224 Shelters & RV Performance & Non-auto \n",
"4 1689785 Exterior Accessories Car Accessories \n",
"\n",
" Region Route To Market Elapsed Days In Sales Stage Opportunity Result \\\n",
"0 Northwest Fields Sales 76 Won \n",
"1 Pacific Reseller 63 Loss \n",
"2 Pacific Reseller 24 Won \n",
"3 NaN Reseller 16 Loss \n",
"4 Pacific Reseller 69 Loss \n",
"\n",
" Sales Stage Change Count Total Days Identified Through Closing \\\n",
"0 13 104 \n",
"1 2 163 \n",
"2 7 82 \n",
"3 5 124 \n",
"4 11 91 \n",
"\n",
" Total Days Identified Through Qualified Opportunity Amount USD \\\n",
"0 101 0 \n",
"1 163 0 \n",
"2 82 7750 \n",
"3 124 0 \n",
"4 13 69756 \n",
"\n",
" Client Size By Revenue Client Size By Employee Count \\\n",
"0 5 5 \n",
"1 3 5 \n",
"2 1 1 \n",
"3 1 1 \n",
"4 1 1 \n",
"\n",
" Revenue From Client Past Two Years Competitor Type \\\n",
"0 0 Unknown \n",
"1 0 Unknown \n",
"2 0 Unknown \n",
"3 0 Known \n",
"4 0 Unknown \n",
"\n",
" Ratio Days Identified To Total Days Ratio Days Validated To Total Days \\\n",
"0 0.69636 0.113985 \n",
"1 0.00000 1.000000 \n",
"2 1.00000 0.000000 \n",
"3 1.00000 0.000000 \n",
"4 0.00000 0.141125 \n",
"\n",
" Ratio Days Qualified To Total Days Deal Size Category \n",
"0 0.154215 1 \n",
"1 0.000000 1 \n",
"2 0.000000 1 \n",
"3 0.000000 1 \n",
"4 0.000000 4 "
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dat.head()"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"region_results = dat[[\"Region\", \"Opportunity Result\"]]"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(78025, 2)"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"region_results.shape"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>Opportunity Result</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Northwest</td>\n",
" <td>Won</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Pacific</td>\n",
" <td>Loss</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Pacific</td>\n",
" <td>Won</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>Loss</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Pacific</td>\n",
" <td>Loss</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region Opportunity Result\n",
"0 Northwest Won\n",
"1 Pacific Loss\n",
"2 Pacific Won\n",
"3 NaN Loss\n",
"4 Pacific Loss"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"region_results.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Creating a DataFrame from dicts"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" <td>Barack Obama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" <td>George W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" <td>Bill Clinton</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect name\n",
"0 1961 2008 Barack Obama\n",
"1 1946 2000 George W. Bush\n",
"2 1946 1992 Bill Clinton\n",
"3 1924 1988 George H.W. Bush"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents = pd.DataFrame([\n",
" { 'name': 'Barack Obama', 'elect': 2008, 'born': 1961 },\n",
" { 'name': 'George W. Bush', 'elect': 2000, 'born': 1946 },\n",
" { 'name': 'Bill Clinton', 'elect': 1992, 'born': 1946 },\n",
" { 'name': 'George H.W. Bush', 'elect': 1988, 'born': 1924 },\n",
"])\n",
"presidents"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setting the Index of a DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['born', 'elect', 'name'], dtype='object')"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.columns"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"presidents.set_index('name', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barack Obama</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George W. Bush</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bill Clinton</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George H.W. Bush</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect\n",
"name \n",
"Barack Obama 1961 2008\n",
"George W. Bush 1946 2000\n",
"Bill Clinton 1946 1992\n",
"George H.W. Bush 1924 1988"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x115a11b70>"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.groupby(by=['born'])"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>elect</th>\n",
" </tr>\n",
" <tr>\n",
" <th>born</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1924</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1946</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1961</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" elect\n",
"born \n",
"1924 1\n",
"1946 2\n",
"1961 1"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.groupby(by=['born']).count()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Barack Obama'"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents['born'].idxmax() # who is the youngest president?"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1924"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.loc['George H.W. Bush'].loc['born']"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1946"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents['born']['Bill Clinton']"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"born 1946\n",
"elect 1992\n",
"Name: Bill Clinton, dtype: int64"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.loc['Bill Clinton']"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1946"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.loc['Bill Clinton'].loc['born']"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"name\n",
"Barack Obama 1961\n",
"George W. Bush 1946\n",
"Bill Clinton 1946\n",
"George H.W. Bush 1924\n",
"Name: born, dtype: int64"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# note that we are looking at presidents dataframe here\n",
"presidents['born']\n",
"# pd.DataFrame(presidents['born'])"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1946"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents['born'].iloc[2]"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"born 1946\n",
"elect 1992\n",
"Name: Bill Clinton, dtype: int64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.iloc[2]"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1946"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.iloc[2]['born']"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1946"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents['born']['Bill Clinton']"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1946"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.loc['Bill Clinton']['born']"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1992"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents.loc['Bill Clinton']['elect']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Merging Two DataFrames"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>father</th>\n",
" <th>son</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Barack Obama, Sr.</td>\n",
" <td>Barack Obama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>George H.W. Bush</td>\n",
" <td>George W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Prescott Bush</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" father son\n",
"0 Barack Obama, Sr. Barack Obama\n",
"1 George H.W. Bush George W. Bush\n",
"2 Prescott Bush George H.W. Bush"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents_dads = pd.DataFrame([\n",
" { 'son': 'Barack Obama', 'father': 'Barack Obama, Sr.' },\n",
" { 'son': 'George W. Bush', 'father': 'George H.W. Bush' },\n",
" { 'son': 'George H.W. Bush', 'father': 'Prescott Bush' },\n",
"])\n",
"\n",
"presidents_dads"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barack Obama</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George W. Bush</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bill Clinton</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George H.W. Bush</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect\n",
"name \n",
"Barack Obama 1961 2008\n",
"George W. Bush 1946 2000\n",
"Bill Clinton 1946 1992\n",
"George H.W. Bush 1924 1988"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>George H.W. Bush</td>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>George W. Bush</td>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Bill Clinton</td>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Barack Obama</td>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name born elect\n",
"3 George H.W. Bush 1924 1988\n",
"1 George W. Bush 1946 2000\n",
"2 Bill Clinton 1946 1992\n",
"0 Barack Obama 1961 2008"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#presidents.reset_index(inplace=True)\n",
"#presidents.sort_values('born')"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" <th>father</th>\n",
" <th>son</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" <td>Barack Obama, Sr.</td>\n",
" <td>Barack Obama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" <td>George H.W. Bush</td>\n",
" <td>George W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" <td>Prescott Bush</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect father son\n",
"0 1961 2008 Barack Obama, Sr. Barack Obama\n",
"1 1946 2000 George H.W. Bush George W. Bush\n",
"2 1924 1988 Prescott Bush George H.W. Bush"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.merge(presidents, presidents_dads, \n",
" left_index=True, right_on='son')"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" <th>father</th>\n",
" <th>son</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" <td>Barack Obama, Sr.</td>\n",
" <td>Barack Obama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" <td>George H.W. Bush</td>\n",
" <td>George W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" <td>NaN</td>\n",
" <td>Bill Clinton</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" <td>Prescott Bush</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect father son\n",
"0 1961 2008 Barack Obama, Sr. Barack Obama\n",
"1 1946 2000 George H.W. Bush George W. Bush\n",
"2 1946 1992 NaN Bill Clinton\n",
"2 1924 1988 Prescott Bush George H.W. Bush"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.merge(presidents, presidents_dads, \n",
" left_index=True, right_on='son', how=\"left\")"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"final = pd.merge(presidents, presidents_dads, \n",
" left_index=True, right_on='son', how=\"left\").fillna('MISSING')"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" <th>father</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" <td>Barack Obama, Sr.</td>\n",
" <td>Barack Obama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" <td>George H.W. Bush</td>\n",
" <td>George W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" <td>MISSING</td>\n",
" <td>Bill Clinton</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" <td>Prescott Bush</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect father name\n",
"0 1961 2008 Barack Obama, Sr. Barack Obama\n",
"1 1946 2000 George H.W. Bush George W. Bush\n",
"2 1946 1992 MISSING Bill Clinton\n",
"2 1924 1988 Prescott Bush George H.W. Bush"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final.columns = ['born', 'elect', 'father', 'name']\n",
"final"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" <th>father</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barack Obama</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" <td>Barack Obama, Sr.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George W. Bush</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bill Clinton</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" <td>MISSING</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George H.W. Bush</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" <td>Prescott Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect father\n",
"name \n",
"Barack Obama 1961 2008 Barack Obama, Sr.\n",
"George W. Bush 1946 2000 George H.W. Bush\n",
"Bill Clinton 1946 1992 MISSING\n",
"George H.W. Bush 1924 1988 Prescott Bush"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final.set_index('name', inplace=True)\n",
"final"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'George H.W. Bush'"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final.loc['George W. Bush'].loc['father']"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'George H.W. Bush'"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final['father'].loc['George W. Bush']"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" <td>Barack Obama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" <td>George W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" <td>Bill Clinton</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect name\n",
"0 1961 2008 Barack Obama\n",
"1 1946 2000 George W. Bush\n",
"2 1946 1992 Bill Clinton\n",
"3 1924 1988 George H.W. Bush"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>father</th>\n",
" <th>son</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Barack Obama, Sr.</td>\n",
" <td>Barack Obama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>George H.W. Bush</td>\n",
" <td>George W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Prescott Bush</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" father son\n",
"0 Barack Obama, Sr. Barack Obama\n",
"1 George H.W. Bush George W. Bush\n",
"2 Prescott Bush George H.W. Bush"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents_dads"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>born</th>\n",
" <th>elect</th>\n",
" <th>father</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Barack Obama</th>\n",
" <td>1961</td>\n",
" <td>2008</td>\n",
" <td>Barack Obama, Sr.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George W. Bush</th>\n",
" <td>1946</td>\n",
" <td>2000</td>\n",
" <td>George H.W. Bush</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bill Clinton</th>\n",
" <td>1946</td>\n",
" <td>1992</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>George H.W. Bush</th>\n",
" <td>1924</td>\n",
" <td>1988</td>\n",
" <td>Prescott Bush</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" born elect father\n",
"name \n",
"Barack Obama 1961 2008 Barack Obama, Sr.\n",
"George W. Bush 1946 2000 George H.W. Bush\n",
"Bill Clinton 1946 1992 NaN\n",
"George H.W. Bush 1924 1988 Prescott Bush"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.merge(presidents, presidents_dads, \n",
" left_on='name', right_on='son', how=\"left\").drop('son', axis=1).set_index('name')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment