Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erood/f9197e256e66246ecf1425cc1956aaf5 to your computer and use it in GitHub Desktop.
Save erood/f9197e256e66246ecf1425cc1956aaf5 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline \n",
"import matplotlib.pyplot as plt \n",
"import pandas \n",
"import numpy as np\n",
"\n",
"import statsmodels.api as sm\n",
"from statsmodels.sandbox.regression.predstd import wls_prediction_std\n",
"\n",
"import plotly.plotly as py\n",
"import plotly.graph_objs as go\n",
"from plotly.tools import FigureFactory as FF\n",
"py.sign_in('erikrood','3eqsrype8v')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"NIPE = pandas.read_csv('/Users/erikrood/desktop/ipython_datasets/NIPE_main_cleaned.csv') "
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Company_name</th>\n",
" <th>NIPE_MM</th>\n",
" <th>Sector</th>\n",
" <th>Industry</th>\n",
" <th>Exchange</th>\n",
" <th>Market_Cap</th>\n",
" <th>Employees_Full_Time</th>\n",
" <th>Net_income_MM</th>\n",
" <th>Clean_market_cap</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Host Hotels &amp; Resorts Inc</td>\n",
" <td>2675</td>\n",
" <td>Financials</td>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>NYSE</td>\n",
" <td>$11.8B</td>\n",
" <td>240</td>\n",
" <td>$642,000</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gilead Sciences Inc</td>\n",
" <td>2168</td>\n",
" <td>Health Care</td>\n",
" <td>Biotechnology</td>\n",
" <td>NASDAQ</td>\n",
" <td>$118.9B</td>\n",
" <td>8000</td>\n",
" <td>$17,341,000</td>\n",
" <td>119</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Realty Income Corp.</td>\n",
" <td>1967</td>\n",
" <td>Financials</td>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>NYSE</td>\n",
" <td>$14.9B</td>\n",
" <td>132</td>\n",
" <td>$259,665</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Welltower Inc</td>\n",
" <td>1719</td>\n",
" <td>Financials</td>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>NYSE</td>\n",
" <td>$24.6B</td>\n",
" <td>476</td>\n",
" <td>$818,344</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Equity Residential</td>\n",
" <td>1221</td>\n",
" <td>Financials</td>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>NYSE</td>\n",
" <td>$24.9B</td>\n",
" <td>3500</td>\n",
" <td>$4,272,041</td>\n",
" <td>25</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Company_name NIPE_MM Sector \\\n",
"0 Host Hotels & Resorts Inc 2675 Financials \n",
"1 Gilead Sciences Inc 2168 Health Care \n",
"2 Realty Income Corp. 1967 Financials \n",
"3 Welltower Inc 1719 Financials \n",
"4 Equity Residential 1221 Financials \n",
"\n",
" Industry Exchange Market_Cap \\\n",
"0 Real Estate Investment Trusts (REITs) NYSE $11.8B \n",
"1 Biotechnology NASDAQ $118.9B \n",
"2 Real Estate Investment Trusts (REITs) NYSE $14.9B \n",
"3 Real Estate Investment Trusts (REITs) NYSE $24.6B \n",
"4 Real Estate Investment Trusts (REITs) NYSE $24.9B \n",
"\n",
" Employees_Full_Time Net_income_MM Clean_market_cap \n",
"0 240 $642,000 12 \n",
"1 8000 $17,341,000 119 \n",
"2 132 $259,665 15 \n",
"3 476 $818,344 25 \n",
"4 3500 $4,272,041 25 "
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"NIPE.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Industry</th>\n",
" <th>NIPE_MM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>2675.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Biotechnology</td>\n",
" <td>2168.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>1967.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>1719.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>1221.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Industry NIPE_MM\n",
"0 Real Estate Investment Trusts (REITs) 2675.0\n",
"1 Biotechnology 2168.0\n",
"2 Real Estate Investment Trusts (REITs) 1967.0\n",
"3 Real Estate Investment Trusts (REITs) 1719.0\n",
"4 Real Estate Investment Trusts (REITs) 1221.0"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = NIPE [[3,1]]\n",
"df[['NIPE_MM']] = df[['NIPE_MM']].astype(float)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Industry</th>\n",
" <th>NIPE_MM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Tobacco</td>\n",
" <td>609.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>487.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Biotechnology</td>\n",
" <td>293.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Diversified Financial Services</td>\n",
" <td>184.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Road &amp; Rail</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Consumer Finance</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Semiconductors &amp; Semiconductor Equipment</td>\n",
" <td>106.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Industry NIPE_MM\n",
"0 Tobacco 609.0\n",
"1 Real Estate Investment Trusts (REITs) 487.0\n",
"2 Biotechnology 293.0\n",
"3 Diversified Financial Services 184.0\n",
"4 Road & Rail 130.0\n",
"5 Consumer Finance 128.0\n",
"6 Semiconductors & Semiconductor Equipment 106.0"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"average_NIPE_industry = df.groupby('Industry').mean().sort_values(by='NIPE_MM',ascending=False).round(decimals=0).reset_index()\n",
"average_NIPE_industry_top7 = average_NIPE_industry.head(7)\n",
"average_NIPE_industry_top7.head(7)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Industry</th>\n",
" <th>NIPE_MM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Food &amp; Staples Retailing</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Auto Components</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Containers &amp; Packaging</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>Multiline Retail</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>Metals &amp; Mining</td>\n",
" <td>-20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>Energy Equipment &amp; Services</td>\n",
" <td>-21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>Oil, Gas &amp; Consumable Fuels</td>\n",
" <td>-421.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Industry NIPE_MM\n",
"53 Food & Staples Retailing 10.0\n",
"54 Auto Components 8.0\n",
"55 Containers & Packaging 8.0\n",
"56 Multiline Retail 7.0\n",
"57 Metals & Mining -20.0\n",
"58 Energy Equipment & Services -21.0\n",
"59 Oil, Gas & Consumable Fuels -421.0"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"average_NIPE_industry_bottom7 = average_NIPE_industry.tail(7)\n",
"average_NIPE_industry_bottom7.head(7)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ValueError",
"evalue": "labels [ 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\n 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52] not contained in axis",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-82-358bd5250706>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0maverage_NIPE_industry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m9\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m11\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m13\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m14\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m15\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m16\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m17\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m18\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m19\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m20\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m21\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m22\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m23\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m24\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m25\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m26\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m28\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m29\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m30\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m31\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m32\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m33\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m34\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m35\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m36\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m37\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m38\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m39\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m40\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m41\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m42\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m43\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m44\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m45\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m46\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m47\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m48\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m49\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m51\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m52\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0maverage_NIPE_industry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/erikrood/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, level, inplace, errors)\u001b[0m\n\u001b[1;32m 1871\u001b[0m \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1872\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1873\u001b[0;31m \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1874\u001b[0m \u001b[0mdropped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1875\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/erikrood/anaconda/lib/python2.7/site-packages/pandas/indexes/base.pyc\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m 2964\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2965\u001b[0m raise ValueError('labels %s not contained in axis' %\n\u001b[0;32m-> 2966\u001b[0;31m labels[mask])\n\u001b[0m\u001b[1;32m 2967\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2968\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: labels [ 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31\n 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52] not contained in axis"
]
}
],
"source": [
"#too lazy to create function, but dropping everything less the top 7 and bottom 7 companies by NIPE\n",
"average_NIPE_industry.drop([7,8,9,10,11,12,13,14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 , 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 , 48 , 49 , 50 , 51 , 52], inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Industry</th>\n",
" <th>NIPE_MM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Tobacco</td>\n",
" <td>609.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>487.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Biotechnology</td>\n",
" <td>293.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Diversified Financial Services</td>\n",
" <td>184.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Road &amp; Rail</td>\n",
" <td>130.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Consumer Finance</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Semiconductors &amp; Semiconductor Equipment</td>\n",
" <td>106.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Food &amp; Staples Retailing</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Auto Components</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Containers &amp; Packaging</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>Multiline Retail</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>Metals &amp; Mining</td>\n",
" <td>-20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>Energy Equipment &amp; Services</td>\n",
" <td>-21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>Oil, Gas &amp; Consumable Fuels</td>\n",
" <td>-421.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Industry NIPE_MM\n",
"0 Tobacco 609.0\n",
"1 Real Estate Investment Trusts (REITs) 487.0\n",
"2 Biotechnology 293.0\n",
"3 Diversified Financial Services 184.0\n",
"4 Road & Rail 130.0\n",
"5 Consumer Finance 128.0\n",
"6 Semiconductors & Semiconductor Equipment 106.0\n",
"53 Food & Staples Retailing 10.0\n",
"54 Auto Components 8.0\n",
"55 Containers & Packaging 8.0\n",
"56 Multiline Retail 7.0\n",
"57 Metals & Mining -20.0\n",
"58 Energy Equipment & Services -21.0\n",
"59 Oil, Gas & Consumable Fuels -421.0"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"average_NIPE_industry.head(14)"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~erikrood/31.embed\" height=\"525px\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#bar chart\n",
"\n",
"\n",
"x = average_NIPE_industry['Industry']\n",
"y = average_NIPE_industry['NIPE_MM']\n",
"\n",
"data = [\n",
" go.Bar(\n",
" x=x,\n",
" y=y,\n",
" \n",
" marker=dict(\n",
" color='rgb(255, 217, 102)',\n",
" line=dict(\n",
" color='rgb(8,48,107)',\n",
" width=1.5\n",
" ),\n",
" ),\n",
" opacity=0.6\n",
" )\n",
"]\n",
"layout = go.Layout(\n",
" title='net income per employee by industry [top 7/bottom 7 industries]',\n",
" xaxis=dict(\n",
" title='industry',\n",
" tickangle = 47,\n",
" ),\n",
" yaxis=dict(\n",
" title='avg. NIPE (MM)'\n",
" ),\n",
" margin=go.Margin(\n",
" b = 220\n",
" \n",
" )\n",
"\n",
" \n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='NIPE_industry')"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Net_income_MM</th>\n",
" <th>Employees_Full_Time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>$1,000,000</td>\n",
" <td>6700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>$1,002,100</td>\n",
" <td>47145</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>$1,010,551</td>\n",
" <td>39556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>$1,020,661</td>\n",
" <td>77800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>$1,028,380</td>\n",
" <td>180835</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Net_income_MM Employees_Full_Time\n",
"0 $1,000,000 6700\n",
"1 $1,002,100 47145\n",
"2 $1,010,551 39556\n",
"3 $1,020,661 77800\n",
"4 $1,028,380 180835"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#scatter of employees vs. NI,(could drill into sectors/industries)\n",
"df4 = NIPE [[6,7]]\n",
"df4[['Employees_Full_Time']] = df4[['Employees_Full_Time']].astype(int)\n",
"df4.head()\n",
"df5 = df4.groupby('Net_income_MM').sum().round(decimals=0).reset_index()\n",
"df5.head()"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~erikrood/33.embed\" height=\"525px\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Creating a better looking scatterplot of Cereal vs. Year\n",
"\n",
"_x = df5['Employees_Full_Time']\n",
"_y = df5['Net_income_MM']\n",
"\n",
"# Create a trace\n",
"trace = go.Scatter(\n",
" x = _x,\n",
" y = _y,\n",
"\n",
" mode = 'markers',\n",
" marker = dict(\n",
" size = 6,\n",
" color = 'rgb(156, 231, 193)',\n",
"\n",
" )\n",
" \n",
")\n",
"\n",
"data = [trace]\n",
"\n",
"layout = dict(title='net income vs. number of employees',\n",
" hovermode='closest',\n",
" xaxis=dict(\n",
" title='Net Income (MM)',\n",
" ticklen=5,\n",
" zeroline=False,\n",
" gridwidth=2,\n",
" ),\n",
" yaxis=dict(\n",
" title='# of employees',\n",
" ticklen=5,\n",
" zeroline=False,\n",
" gridwidth=2,\n",
" ),\n",
")\n",
"\n",
"\n",
"# Plot and embed in ipython notebook\n",
"fig = dict(data=data, layout=layout)\n",
"py.iplot(fig, filename='NIPE_vs_Employees')"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#rank top/bottom companies\n",
"df3 = NIPE [[0,1]]\n",
"#top 10\n",
"df6 = df3.head(10)\n",
"#bottom 10\n",
"df7 = df3.tail(10)"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~erikrood/35.embed\" height=\"525px\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#bar chart\n",
"\n",
"x = df6['Company_name']\n",
"y = df6['NIPE_MM']\n",
"\n",
"data = [\n",
" go.Bar(\n",
" x=x,\n",
" y=y,\n",
" \n",
" marker=dict(\n",
" color='rgb(247, 135, 22)',\n",
" line=dict(\n",
" color='rgb(8,48,107)',\n",
" width=1.5\n",
" ),\n",
" ),\n",
" opacity=0.6\n",
" )\n",
"]\n",
"layout = go.Layout(\n",
" title='net income per employee by company [top 10]',\n",
" xaxis=dict(\n",
" title='company',\n",
" tickangle = 47,\n",
" ),\n",
" yaxis=dict(\n",
" title='NIPE (MM)'\n",
" ),\n",
" margin=go.Margin(\n",
" b = 220\n",
" \n",
" )\n",
"\n",
" \n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='NIPE_industry_top10')"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~erikrood/37.embed\" height=\"525px\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#bar chart, bottom 10\n",
"\n",
"x = df7['Company_name']\n",
"y = df7['NIPE_MM']\n",
"\n",
"data = [\n",
" go.Bar(\n",
" x=x,\n",
" y=y,\n",
" \n",
" marker=dict(\n",
" color='rgb(84, 226, 129)',\n",
" line=dict(\n",
" color='rgb(8,48,107)',\n",
" width=1.5\n",
" ),\n",
" ),\n",
" opacity=0.6\n",
" )\n",
"]\n",
"layout = go.Layout(\n",
" title='net income per employee by company [bottom 10]',\n",
" xaxis=dict(\n",
" title='company',\n",
" tickangle = 47,\n",
" ),\n",
" yaxis=dict(\n",
" title='NIPE (MM)'\n",
" ),\n",
" margin=go.Margin(\n",
" b = 220\n",
" \n",
" )\n",
"\n",
" \n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='NIPE_industry_bottom10')"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#splitting out IT companies\n",
"IT_companies = ['Information Technology']\n",
"NIPE_IT = NIPE[NIPE['Sector'].isin(IT_companies)]"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Company_name</th>\n",
" <th>NIPE_MM</th>\n",
" <th>Sector</th>\n",
" <th>Industry</th>\n",
" <th>Exchange</th>\n",
" <th>Market_Cap</th>\n",
" <th>Employees_Full_Time</th>\n",
" <th>Net_income_MM</th>\n",
" <th>Clean_market_cap</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Visa Inc</td>\n",
" <td>607</td>\n",
" <td>Information Technology</td>\n",
" <td>IT Services</td>\n",
" <td>NYSE</td>\n",
" <td>$184.2B</td>\n",
" <td>11300</td>\n",
" <td>$6,857,000</td>\n",
" <td>184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Apple Inc</td>\n",
" <td>461</td>\n",
" <td>Information Technology</td>\n",
" <td>Technology Hardware, Storage &amp; Peripherals</td>\n",
" <td>NASDAQ</td>\n",
" <td>$513.5B</td>\n",
" <td>110000</td>\n",
" <td>$50,678,000</td>\n",
" <td>514</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Facebook Inc</td>\n",
" <td>369</td>\n",
" <td>Information Technology</td>\n",
" <td>Internet Software &amp; Services</td>\n",
" <td>NASDAQ</td>\n",
" <td>$336.3B</td>\n",
" <td>12691</td>\n",
" <td>$4,686,000</td>\n",
" <td>336</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>MasterCard Inc</td>\n",
" <td>332</td>\n",
" <td>Information Technology</td>\n",
" <td>IT Services</td>\n",
" <td>NYSE</td>\n",
" <td>$106.9B</td>\n",
" <td>11300</td>\n",
" <td>$3,747,000</td>\n",
" <td>107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Alibaba Group Holding Ltd</td>\n",
" <td>305</td>\n",
" <td>Information Technology</td>\n",
" <td>Internet Software &amp; Services</td>\n",
" <td>NYSE</td>\n",
" <td>$192B</td>\n",
" <td>34985</td>\n",
" <td>$10,673,110</td>\n",
" <td>192</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Company_name NIPE_MM Sector \\\n",
"12 Visa Inc 607 Information Technology \n",
"20 Apple Inc 461 Information Technology \n",
"23 Facebook Inc 369 Information Technology \n",
"27 MasterCard Inc 332 Information Technology \n",
"29 Alibaba Group Holding Ltd 305 Information Technology \n",
"\n",
" Industry Exchange Market_Cap \\\n",
"12 IT Services NYSE $184.2B \n",
"20 Technology Hardware, Storage & Peripherals NASDAQ $513.5B \n",
"23 Internet Software & Services NASDAQ $336.3B \n",
"27 IT Services NYSE $106.9B \n",
"29 Internet Software & Services NYSE $192B \n",
"\n",
" Employees_Full_Time Net_income_MM Clean_market_cap \n",
"12 11300 $6,857,000 184 \n",
"20 110000 $50,678,000 514 \n",
"23 12691 $4,686,000 336 \n",
"27 11300 $3,747,000 107 \n",
"29 34985 $10,673,110 192 "
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"NIPE_IT.head()"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Company_name</th>\n",
" <th>NIPE_MM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Visa Inc</td>\n",
" <td>607</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Apple Inc</td>\n",
" <td>461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Facebook Inc</td>\n",
" <td>369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>MasterCard Inc</td>\n",
" <td>332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Alibaba Group Holding Ltd</td>\n",
" <td>305</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Company_name NIPE_MM\n",
"12 Visa Inc 607\n",
"20 Apple Inc 461\n",
"23 Facebook Inc 369\n",
"27 MasterCard Inc 332\n",
"29 Alibaba Group Holding Ltd 305"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df8 = NIPE_IT[[0,1]]\n",
"df8.head()"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Company_name</th>\n",
" <th>NIPE_MM</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Visa Inc</td>\n",
" <td>607</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Apple Inc</td>\n",
" <td>461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Facebook Inc</td>\n",
" <td>369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>MasterCard Inc</td>\n",
" <td>332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Alibaba Group Holding Ltd</td>\n",
" <td>305</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Company_name NIPE_MM\n",
"0 Visa Inc 607\n",
"1 Apple Inc 461\n",
"2 Facebook Inc 369\n",
"3 MasterCard Inc 332\n",
"4 Alibaba Group Holding Ltd 305"
]
},
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"IT_NIPE_cleaned = df8.groupby('Company_name').mean().sort_values(by='NIPE_MM',ascending=False).round(decimals=0).reset_index()\n",
"IT_NIPE_cleaned.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#standard deviation in NIPE by industry\n",
"df9 = NIPE [[3,1]]\n",
"#converting NIPE to float/#\n",
"df9[['NIPE_MM']] = df9[['NIPE_MM']].astype(float)\n",
"#pivoting and calculating std deviation\n",
"df10 = df9.groupby('Industry', as_index=False).agg(np.std, ddof=0).sort_values(by='NIPE_MM',ascending=False).round(decimals=0)"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Industry</th>\n",
" <th>NIPE_Std_dev</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>Oil, Gas &amp; Consumable Fuels</td>\n",
" <td>1092.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>Real Estate Investment Trusts (REITs)</td>\n",
" <td>989.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Biotechnology</td>\n",
" <td>596.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>Tobacco</td>\n",
" <td>427.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>Metals &amp; Mining</td>\n",
" <td>331.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Industry NIPE_Std_dev\n",
"44 Oil, Gas & Consumable Fuels 1092.0\n",
"48 Real Estate Investment Trusts (REITs) 989.0\n",
"7 Biotechnology 596.0\n",
"56 Tobacco 427.0\n",
"41 Metals & Mining 331.0"
]
},
"execution_count": 157,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#cleaning column names\n",
"df10.columns = ['Industry', 'NIPE_Std_dev'] #renaming columns\n",
"df10.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 158,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#standard deviation in NIPE by sector\n",
"df11 = NIPE [[2,1]]\n",
"#converting NIPE to float/#\n",
"df11[['NIPE_MM']] = df11[['NIPE_MM']].astype(float)\n",
"#pivoting and calculating std deviation\n",
"df12 = df11.groupby('Sector', as_index=False).agg(np.std, ddof=0).sort_values(by='NIPE_MM',ascending=False).round(decimals=0)"
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sector</th>\n",
" <th>NIPE_Std_dev</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Energy</td>\n",
" <td>1058.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Financials</td>\n",
" <td>510.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Health Care</td>\n",
" <td>281.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Materials</td>\n",
" <td>204.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Consumer Staples</td>\n",
" <td>182.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Information Technology</td>\n",
" <td>135.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Telecommunication Services</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Consumer Discretionary</td>\n",
" <td>56.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Industrials</td>\n",
" <td>48.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Utilities</td>\n",
" <td>40.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sector NIPE_Std_dev\n",
"2 Energy 1058.0\n",
"3 Financials 510.0\n",
"4 Health Care 281.0\n",
"7 Materials 204.0\n",
"1 Consumer Staples 182.0\n",
"6 Information Technology 135.0\n",
"8 Telecommunication Services 73.0\n",
"0 Consumer Discretionary 56.0\n",
"5 Industrials 48.0\n",
"9 Utilities 40.0"
]
},
"execution_count": 160,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df12.columns = ['Sector', 'NIPE_Std_dev'] #renaming columns\n",
"df12.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sector</th>\n",
" <th>Avg_NIPE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Financials</td>\n",
" <td>185.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Health Care</td>\n",
" <td>92.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Consumer Staples</td>\n",
" <td>86.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Utilities</td>\n",
" <td>83.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Information Technology</td>\n",
" <td>74.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sector Avg_NIPE\n",
"0 Financials 185.0\n",
"1 Health Care 92.0\n",
"2 Consumer Staples 86.0\n",
"3 Utilities 83.0\n",
"4 Information Technology 74.0"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#avg NIPE by sector\n",
"df13 = NIPE [[2,1]]\n",
"df14 = df13.groupby('Sector').mean().sort_values(by='NIPE_MM',ascending=False).round(decimals=0).reset_index()\n",
"df14.columns = ['Sector', 'Avg_NIPE'] #renaming columns\n",
"df14.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sector</th>\n",
" <th>Avg_NIPE</th>\n",
" <th>NIPE_Std_dev</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Financials</td>\n",
" <td>185.0</td>\n",
" <td>510.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Health Care</td>\n",
" <td>92.0</td>\n",
" <td>281.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Consumer Staples</td>\n",
" <td>86.0</td>\n",
" <td>182.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Utilities</td>\n",
" <td>83.0</td>\n",
" <td>40.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Information Technology</td>\n",
" <td>74.0</td>\n",
" <td>135.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Industrials</td>\n",
" <td>43.0</td>\n",
" <td>48.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Telecommunication Services</td>\n",
" <td>40.0</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Consumer Discretionary</td>\n",
" <td>38.0</td>\n",
" <td>56.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Materials</td>\n",
" <td>35.0</td>\n",
" <td>204.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Energy</td>\n",
" <td>-393.0</td>\n",
" <td>1058.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sector Avg_NIPE NIPE_Std_dev\n",
"0 Financials 185.0 510.0\n",
"1 Health Care 92.0 281.0\n",
"2 Consumer Staples 86.0 182.0\n",
"3 Utilities 83.0 40.0\n",
"4 Information Technology 74.0 135.0\n",
"5 Industrials 43.0 48.0\n",
"6 Telecommunication Services 40.0 73.0\n",
"7 Consumer Discretionary 38.0 56.0\n",
"8 Materials 35.0 204.0\n",
"9 Energy -393.0 1058.0"
]
},
"execution_count": 164,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#joining avg_NIPE w/ standard deviation NIPE table\n",
"j1 = df14.merge(df12, how='left', on=\"Sector\")\n",
"j1.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~erikrood/39.embed\" height=\"525px\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 169,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#bar chart\n",
"\n",
"x = j1['Sector']\n",
"y1 = j1['Avg_NIPE']\n",
"y2 = j1['NIPE_Std_dev']\n",
"\n",
"\n",
"trace0 = go.Bar(\n",
" x=x,\n",
" y=y1,\n",
" name='Average NIPE',\n",
" marker=dict(\n",
" color='rgb(255, 217, 102)',\n",
" line=dict(\n",
" color='rgb(8,48,107)',\n",
" width=1.5\n",
" ),\n",
" ),\n",
" opacity=0.6\n",
" )\n",
"\n",
"trace1 = go.Bar(\n",
" x=x,\n",
" y=y2,\n",
" name='Standard deviation',\n",
" marker=dict(\n",
" color='rgb(84, 226, 129)',\n",
" line=dict(\n",
" color='rgb(8,48,107)',\n",
" width=1.5\n",
" ),\n",
" ),\n",
" opacity=0.6\n",
" )\n",
" \n",
"data = [trace0, trace1]\n",
" \n",
"layout = go.Layout(\n",
" title='mean NIPE by sector, with standard deviation',\n",
" xaxis=dict(\n",
" title='industry',\n",
" tickangle = 47,\n",
" ),\n",
" yaxis=dict(\n",
" title='NIPE (MM)'\n",
" ),\n",
" margin=go.Margin(\n",
" b = 220\n",
" \n",
" )\n",
"\n",
" \n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='NIPE_sector_stddev')"
]
},
{
"cell_type": "code",
"execution_count": 231,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#setting up y values for box plot (step 0), loading in the data I want\n",
"sector_nipe = NIPE[['Sector', 'NIPE_MM']]\n",
"sector_nipe[['NIPE_MM']] = sector_nipe[['NIPE_MM']].astype(float)"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'Consumer Discretionary',\n",
" 'Consumer Staples',\n",
" 'Energy',\n",
" 'Financials',\n",
" 'Health Care',\n",
" 'Industrials',\n",
" 'Information Technology',\n",
" 'Materials',\n",
" 'Telecommunication Services',\n",
" 'Utilities'}"
]
},
"execution_count": 222,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#setting up x variables for box plot\n",
"x_data1 = {'Consumer Discretionary','Consumer Staples','Energy',\\\n",
" 'Financials', 'Health Care','Industrials',\\\n",
" 'Information Technology','Materials','Telecommunication Services','Utilities'}\n",
"x_data1"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#setting up y values for box plot (step 1)\n",
"sector_CD = sector_nipe[sector_nipe['Sector'] == 'Consumer Discretionary'].copy()\n",
"ConsumerDiscretionary = sector_CD[['NIPE_MM']].copy()\n",
"\n",
"sector_CS = sector_nipe[sector_nipe['Sector']== 'Consumer Staples'].copy()\n",
"ConsumerStaples = sector_CS[['NIPE_MM']].copy()\n",
"\n",
"sector_energy = sector_nipe[sector_nipe['Sector'] == 'Energy'].copy()\n",
"Energy = sector_energy[['NIPE_MM']].copy()\n",
"\n",
"sector_financials = sector_nipe[sector_nipe['Sector'] == 'Financials'].copy()\n",
"Financials = sector_financials[['NIPE_MM']].copy()\n",
"\n",
"sector_healthcare = sector_nipe[sector_nipe['Sector'] == 'Health Care'].copy()\n",
"Healthcare = sector_healthcare[['NIPE_MM']].copy()\n",
"\n",
"sector_industrials = sector_nipe[sector_nipe['Sector'] == 'Industrials'].copy()\n",
"Industrials = sector_industrials[['NIPE_MM']].copy()\n",
"\n",
"sector_IT = sector_nipe[sector_nipe['Sector'] == 'Information Technology'].copy()\n",
"InformationTechnology = sector_IT[['NIPE_MM']].copy()\n",
"\n",
"sector_materials = sector_nipe[sector_nipe['Sector'] == 'Materials'].copy()\n",
"Materials = sector_materials[['NIPE_MM']].copy()\n",
"\n",
"sector_TS = sector_nipe[sector_nipe['Sector'] == 'Telecommunication Services'].copy()\n",
"TelecommunicationServices = sector_TS[['NIPE_MM']].copy()\n",
"\n",
"sector_utilities = sector_nipe[sector_nipe['Sector'] == 'Utilities'].copy()\n",
"Utilities = sector_utilities[['NIPE_MM']].copy()\n",
"\n",
"#setting up y values for box plot (step 2)\n",
"y_data = [\n",
" Industrials.values,\n",
" Energy.values,\n",
" Utilities.values,\n",
" ConsumerStaples.values,\n",
" Healthcare.values,\n",
" Materials.values,\n",
" TelecommunicationServices.values,\n",
" Financials.values,\n",
" ConsumerDiscretionary.values,\n",
" InformationTechnology.values\n",
"]\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~erikrood/41.embed\" height=\"650px\" width=\"800px\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 234,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#putting together the box plot\n",
"traces = []\n",
"for xd, yd in zip(x_data1, y_data):\n",
" traces.append(go.Box(\n",
" y=yd,\n",
" name=xd,\n",
" boxpoints='all',\n",
" jitter=0.5,\n",
" whiskerwidth=0.2,\n",
" marker=dict(\n",
" size=2,\n",
" ),\n",
" line=dict(width=1),\n",
" ))\n",
"\n",
"layout = go.Layout(\n",
" title='NIPE distribution by sector',\n",
" yaxis=dict(\n",
" range=[-300, 350],\n",
" showgrid=True,\n",
" zeroline=True,\n",
" #dtick=5,\n",
" gridcolor='rgb(255, 255, 255)',\n",
" gridwidth=1,\n",
" zerolinecolor='rgb(255, 255, 255)',\n",
" zerolinewidth=2,\n",
" ),\n",
" margin=go.Margin(\n",
" l=50,\n",
" r=90,\n",
" b=280,\n",
" t=60,\n",
" pad=4\n",
" ),\n",
" width=800,\n",
" height=650,\n",
" showlegend=False\n",
")\n",
"\n",
" \n",
"fig = go.Figure(data=traces, layout=layout)\n",
"py.iplot(fig, filename = 'NIPE_distribution_sector')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment