Last active
September 20, 2018 09:20
-
-
Save dharmeshdev19/62275a3da71b562f27ae02538291e074 to your computer and use it in GitHub Desktop.
Pandas / 02 - Learning Pandas / 01 - Starting with Pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Object Creation in Pandas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Creating basic series" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 1.0\n", | |
"1 3.0\n", | |
"2 5.0\n", | |
"3 NaN\n", | |
"4 6.0\n", | |
"5 8.0\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"my_series = pd.Series([1,3,5,np.nan,6,8])\n", | |
"my_series" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Create data frame as datetime index" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',\n", | |
" '2016-01-05', '2016-01-06'],\n", | |
" dtype='datetime64[ns]', freq='D')" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"my_dates_index = pd.date_range('20160101', periods=6)\n", | |
"my_dates_index" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Create DataFrame object from python dictionary" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>float</th>\n", | |
" <th>time</th>\n", | |
" <th>series</th>\n", | |
" <th>array</th>\n", | |
" <th>categories</th>\n", | |
" <th>dull</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-08-25</td>\n", | |
" <td>1.0</td>\n", | |
" <td>3</td>\n", | |
" <td>test</td>\n", | |
" <td>boring data</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-08-25</td>\n", | |
" <td>1.0</td>\n", | |
" <td>3</td>\n", | |
" <td>train</td>\n", | |
" <td>boring data</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-08-25</td>\n", | |
" <td>1.0</td>\n", | |
" <td>3</td>\n", | |
" <td>taxes</td>\n", | |
" <td>boring data</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-08-25</td>\n", | |
" <td>1.0</td>\n", | |
" <td>3</td>\n", | |
" <td>tools</td>\n", | |
" <td>boring data</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" float time series array categories dull\n", | |
"0 1.0 2016-08-25 1.0 3 test boring data\n", | |
"1 1.0 2016-08-25 1.0 3 train boring data\n", | |
"2 1.0 2016-08-25 1.0 3 taxes boring data\n", | |
"3 1.0 2016-08-25 1.0 3 tools boring data" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_from_dictionary = pd.DataFrame({ \n", | |
" 'float' : 1.,\n", | |
" 'time' : pd.Timestamp('20160825'),\n", | |
" 'series' : pd.Series(1,index=list(range(4)),dtype='float32'),\n", | |
" 'array' : np.array([3] * 4,dtype='int32'),\n", | |
" 'categories' : pd.Categorical([\"test\",\"train\",\"taxes\",\"tools\"]),\n", | |
" 'dull' : 'boring data' \n", | |
" })\n", | |
"df_from_dictionary" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Create DataFrame object Using Numpy array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Original Numpy array\n", | |
"[[ 0 1 2 3]\n", | |
" [ 4 5 6 7]\n", | |
" [ 8 9 10 11]\n", | |
" [12 13 14 15]\n", | |
" [16 17 18 19]\n", | |
" [20 21 22 23]]\n", | |
"\n", | |
"\n", | |
"Origina Numpy array converted to Pandas dataframe\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3\n", | |
"0 0 1 2 3\n", | |
"1 4 5 6 7\n", | |
"2 8 9 10 11\n", | |
"3 12 13 14 15\n", | |
"4 16 17 18 19\n", | |
"5 20 21 22 23" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_numpy_data = np.array(np.arange(24)).reshape((6,4))\n", | |
"print(\"Original Numpy array\")\n", | |
"print(sample_numpy_data)\n", | |
"print(\"\\n\")\n", | |
"sample_numpy_df = pd.DataFrame(sample_numpy_data)\n", | |
"print(\"Origina Numpy array converted to Pandas dataframe\")\n", | |
"sample_numpy_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Create DataFrame object using numpy array as values and datetime array as index\n", | |
"**(for clarity purpose we have added name index to columns)**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-01</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-02</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-03</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-04</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-05</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-06</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"2016-01-01 0 1 2 3\n", | |
"2016-01-02 4 5 6 7\n", | |
"2016-01-03 8 9 10 11\n", | |
"2016-01-04 12 13 14 15\n", | |
"2016-01-05 16 17 18 19\n", | |
"2016-01-06 20 21 22 23" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df = pd.DataFrame(sample_numpy_data, index=my_dates_index, columns=list('ABCD'))\n", | |
"sample_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Basic Operations on Pandas DataFrame or Series Object" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### display first 5 rows of dataframe object using head method\n", | |
"**by default the head method shows the first five rows, you can also display the exact number of rows by passing and integer value to the head method**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-01</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-02</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-03</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-04</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-05</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"2016-01-01 0 1 2 3\n", | |
"2016-01-02 4 5 6 7\n", | |
"2016-01-03 8 9 10 11\n", | |
"2016-01-04 12 13 14 15\n", | |
"2016-01-05 16 17 18 19" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-01</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-02</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-03</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"2016-01-01 0 1 2 3\n", | |
"2016-01-02 4 5 6 7\n", | |
"2016-01-03 8 9 10 11" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.head(3) # displays the first 3 rows of the dataframe object" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### display last 5 rows of dataframe object using tail method\n", | |
"**by default the tail method shows the last five rows, you can also display the exact number of rows by passing and integer value to the tail method**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-02</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-03</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-04</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-05</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-06</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"2016-01-02 4 5 6 7\n", | |
"2016-01-03 8 9 10 11\n", | |
"2016-01-04 12 13 14 15\n", | |
"2016-01-05 16 17 18 19\n", | |
"2016-01-06 20 21 22 23" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-04</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-05</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-06</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"2016-01-04 12 13 14 15\n", | |
"2016-01-05 16 17 18 19\n", | |
"2016-01-06 20 21 22 23" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.tail(3) # displays the last 3 rows of the dataframe object" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### display data type of each column in dataframe object" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"float float64\n", | |
"time datetime64[ns]\n", | |
"series float32\n", | |
"array int32\n", | |
"categories category\n", | |
"dull object\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_from_dictionary.dtypes" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### display only the values of dataframe object" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0, 1, 2, 3],\n", | |
" [ 4, 5, 6, 7],\n", | |
" [ 8, 9, 10, 11],\n", | |
" [12, 13, 14, 15],\n", | |
" [16, 17, 18, 19],\n", | |
" [20, 21, 22, 23]])" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 1., 3., 5., nan, 6., 8.])" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# in case of series object\n", | |
"my_series.values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[1.0, Timestamp('2016-08-25 00:00:00'), 1.0, 3, 'test',\n", | |
" 'boring data'],\n", | |
" [1.0, Timestamp('2016-08-25 00:00:00'), 1.0, 3, 'train',\n", | |
" 'boring data'],\n", | |
" [1.0, Timestamp('2016-08-25 00:00:00'), 1.0, 3, 'taxes',\n", | |
" 'boring data'],\n", | |
" [1.0, Timestamp('2016-08-25 00:00:00'), 1.0, 3, 'tools',\n", | |
" 'boring data']], dtype=object)" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_from_dictionary.values" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### display only the index(values) of dataframe object" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',\n", | |
" '2016-01-05', '2016-01-06'],\n", | |
" dtype='datetime64[ns]', freq='D')" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.index" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Int64Index([0, 1, 2, 3], dtype='int64')" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_from_dictionary.index" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### display only the columns(values) of dataframe object" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['A', 'B', 'C', 'D'], dtype='object')" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['float', 'time', 'series', 'array', 'categories', 'dull'], dtype='object')" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_from_dictionary.columns" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### describe a dataframe object\n", | |
"##### describe(): a quick statistical summary\n", | |
"- notice: integer data summarized with floating point numbers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>6.000000</td>\n", | |
" <td>6.000000</td>\n", | |
" <td>6.000000</td>\n", | |
" <td>6.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>10.000000</td>\n", | |
" <td>11.000000</td>\n", | |
" <td>12.000000</td>\n", | |
" <td>13.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>7.483315</td>\n", | |
" <td>7.483315</td>\n", | |
" <td>7.483315</td>\n", | |
" <td>7.483315</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>3.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>5.000000</td>\n", | |
" <td>6.000000</td>\n", | |
" <td>7.000000</td>\n", | |
" <td>8.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>10.000000</td>\n", | |
" <td>11.000000</td>\n", | |
" <td>12.000000</td>\n", | |
" <td>13.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>15.000000</td>\n", | |
" <td>16.000000</td>\n", | |
" <td>17.000000</td>\n", | |
" <td>18.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>20.000000</td>\n", | |
" <td>21.000000</td>\n", | |
" <td>22.000000</td>\n", | |
" <td>23.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"count 6.000000 6.000000 6.000000 6.000000\n", | |
"mean 10.000000 11.000000 12.000000 13.000000\n", | |
"std 7.483315 7.483315 7.483315 7.483315\n", | |
"min 0.000000 1.000000 2.000000 3.000000\n", | |
"25% 5.000000 6.000000 7.000000 8.000000\n", | |
"50% 10.000000 11.000000 12.000000 13.000000\n", | |
"75% 15.000000 16.000000 17.000000 18.000000\n", | |
"max 20.000000 21.000000 22.000000 23.000000" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.describe()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### control precision of floating point numbers in dataframe object\n", | |
"**remember that we need set this option on root level that is we need set the property to main pandas object**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.set_option('display.precision', 2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>6.00</td>\n", | |
" <td>6.00</td>\n", | |
" <td>6.00</td>\n", | |
" <td>6.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>10.00</td>\n", | |
" <td>11.00</td>\n", | |
" <td>12.00</td>\n", | |
" <td>13.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>7.48</td>\n", | |
" <td>7.48</td>\n", | |
" <td>7.48</td>\n", | |
" <td>7.48</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>0.00</td>\n", | |
" <td>1.00</td>\n", | |
" <td>2.00</td>\n", | |
" <td>3.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>5.00</td>\n", | |
" <td>6.00</td>\n", | |
" <td>7.00</td>\n", | |
" <td>8.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>10.00</td>\n", | |
" <td>11.00</td>\n", | |
" <td>12.00</td>\n", | |
" <td>13.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>15.00</td>\n", | |
" <td>16.00</td>\n", | |
" <td>17.00</td>\n", | |
" <td>18.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>20.00</td>\n", | |
" <td>21.00</td>\n", | |
" <td>22.00</td>\n", | |
" <td>23.00</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"count 6.00 6.00 6.00 6.00\n", | |
"mean 10.00 11.00 12.00 13.00\n", | |
"std 7.48 7.48 7.48 7.48\n", | |
"min 0.00 1.00 2.00 3.00\n", | |
"25% 5.00 6.00 7.00 8.00\n", | |
"50% 10.00 11.00 12.00 13.00\n", | |
"75% 15.00 16.00 17.00 18.00\n", | |
"max 20.00 21.00 22.00 23.00" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.describe() # comparing the above code we can see the difference in decimal values for the same dataframe object" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### sort data by axis (by column)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>D</th>\n", | |
" <th>C</th>\n", | |
" <th>B</th>\n", | |
" <th>A</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-01</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-02</th>\n", | |
" <td>7</td>\n", | |
" <td>6</td>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-03</th>\n", | |
" <td>11</td>\n", | |
" <td>10</td>\n", | |
" <td>9</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-04</th>\n", | |
" <td>15</td>\n", | |
" <td>14</td>\n", | |
" <td>13</td>\n", | |
" <td>12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-05</th>\n", | |
" <td>19</td>\n", | |
" <td>18</td>\n", | |
" <td>17</td>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-06</th>\n", | |
" <td>23</td>\n", | |
" <td>22</td>\n", | |
" <td>21</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" D C B A\n", | |
"2016-01-01 3 2 1 0\n", | |
"2016-01-02 7 6 5 4\n", | |
"2016-01-03 11 10 9 8\n", | |
"2016-01-04 15 14 13 12\n", | |
"2016-01-05 19 18 17 16\n", | |
"2016-01-06 23 22 21 20" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.sort_index(axis=1, ascending=False) # axis = 1 means that we want sort the dataframe by column, sort from D to A" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### sort data by axis (by row)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-06</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-05</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-04</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-03</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-02</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-01</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"2016-01-06 20 21 22 23\n", | |
"2016-01-05 16 17 18 19\n", | |
"2016-01-04 12 13 14 15\n", | |
"2016-01-03 8 9 10 11\n", | |
"2016-01-02 4 5 6 7\n", | |
"2016-01-01 0 1 2 3" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.sort_index(axis=0, ascending=False) # axis = 0 means that we want sort the dataframe by row" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>6</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>9</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>7</td>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"0 1 2 6 4\n", | |
"1 2 4 9 3\n", | |
"2 7 8 4 6\n", | |
"3 4 5 1 8" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"another_df_from_dictionary = pd.DataFrame({ \n", | |
" 'A' : np.array([1,2,7,4]),\n", | |
" 'B' : np.array([2,4,8,5]),\n", | |
" 'C' : np.array([6,9,4,1]),\n", | |
" 'D' : np.array([4,3,6,8]),\n", | |
" })\n", | |
"another_df_from_dictionary" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>D</th>\n", | |
" <th>C</th>\n", | |
" <th>B</th>\n", | |
" <th>A</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3</td>\n", | |
" <td>9</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>6</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>8</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" D C B A\n", | |
"0 4 6 2 1\n", | |
"1 3 9 4 2\n", | |
"2 6 4 8 7\n", | |
"3 8 1 5 4" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"another_df_from_dictionary.sort_index(axis=1, ascending=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>7</td>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>9</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>6</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"3 4 5 1 8\n", | |
"2 7 8 4 6\n", | |
"1 2 4 9 3\n", | |
"0 1 2 6 4" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"another_df_from_dictionary.sort_index(axis=0, ascending=False) # sort from 3 to 0" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### sort by data within a column" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-01-06</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-05</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-04</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-03</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-02</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-01-01</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"2016-01-06 20 21 22 23\n", | |
"2016-01-05 16 17 18 19\n", | |
"2016-01-04 12 13 14 15\n", | |
"2016-01-03 8 9 10 11\n", | |
"2016-01-02 4 5 6 7\n", | |
"2016-01-01 0 1 2 3" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.sort_values(by='B', ascending=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>9</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>6</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>7</td>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D\n", | |
"1 2 4 9 3\n", | |
"0 1 2 6 4\n", | |
"2 7 8 4 6\n", | |
"3 4 5 1 8" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"another_df_from_dictionary.sort_values(by=\"C\", ascending=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### transpose rows and columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>2016-01-01 00:00:00</th>\n", | |
" <th>2016-01-02 00:00:00</th>\n", | |
" <th>2016-01-03 00:00:00</th>\n", | |
" <th>2016-01-04 00:00:00</th>\n", | |
" <th>2016-01-05 00:00:00</th>\n", | |
" <th>2016-01-06 00:00:00</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>12</td>\n", | |
" <td>16</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B</th>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>9</td>\n", | |
" <td>13</td>\n", | |
" <td>17</td>\n", | |
" <td>21</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>C</th>\n", | |
" <td>2</td>\n", | |
" <td>6</td>\n", | |
" <td>10</td>\n", | |
" <td>14</td>\n", | |
" <td>18</td>\n", | |
" <td>22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>D</th>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>11</td>\n", | |
" <td>15</td>\n", | |
" <td>19</td>\n", | |
" <td>23</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 2016-01-01 2016-01-02 2016-01-03 2016-01-04 2016-01-05 2016-01-06\n", | |
"A 0 4 8 12 16 20\n", | |
"B 1 5 9 13 17 21\n", | |
"C 2 6 10 14 18 22\n", | |
"D 3 7 11 15 19 23" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_df.T" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment