Skip to content

Instantly share code, notes, and snippets.

@ccomte
Created May 20, 2019 11:37
Show Gist options
  • Save ccomte/41b78e562552db683b1d855629afc9e0 to your computer and use it in GitHub Desktop.
Save ccomte/41b78e562552db683b1d855629afc9e0 to your computer and use it in GitHub Desktop.
Manipulating and analysing data with pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Manipulating and analysing data with pandas"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook contains the examples showed during the session [Manipulating and analysing data with pandas](https://www.lincs.fr/events/manipulating-and-analyzing-data-with-pandas/) of the [Python Academy](https://www.lincs.fr/research/working-groups/python-academy/) working group at [LINCS](https://www.lincs.fr). The slides of the presentation are available [here](https://www.lincs.fr/wp-content/uploads/2019/05/pandas.pdf)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.854184Z",
"start_time": "2019-05-20T11:19:47.318740Z"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"## NumPy"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.867872Z",
"start_time": "2019-05-20T11:19:47.857768Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0.],\n",
" [ 20., 230.],\n",
" [ 21., 275.]])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.array([[3., 0.], [20., 230.], [21., 275.]])\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.880504Z",
"start_time": "2019-05-20T11:19:47.870799Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"21.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a[2,0]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.887766Z",
"start_time": "2019-05-20T11:19:47.882836Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0.],\n",
" [ 20., 230.]])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a[:2, :]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.897828Z",
"start_time": "2019-05-20T11:19:47.889722Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0.],\n",
" [ 20., 230.]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b = a[:2, :]\n",
"b"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.906623Z",
"start_time": "2019-05-20T11:19:47.899513Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0., 0.],\n",
" [ 20., 230.],\n",
" [ 21., 275.]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b[0,0] = 0.\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.914063Z",
"start_time": "2019-05-20T11:19:47.908855Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0.],\n",
" [ 20., 230.],\n",
" [ 21., 275.]])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.array([[3., 0.], [20., 230.], [21., 275.]])\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.925159Z",
"start_time": "2019-05-20T11:19:47.917548Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"(3, 2)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.936086Z",
"start_time": "2019-05-20T11:19:47.928536Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"dtype('float64')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.dtype"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.944769Z",
"start_time": "2019-05-20T11:19:47.938364Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"8"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.itemsize"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.953083Z",
"start_time": "2019-05-20T11:19:47.947486Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0., 20.],\n",
" [230., 21., 275.]])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b = a.reshape(2,3)\n",
"b"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.963857Z",
"start_time": "2019-05-20T11:19:47.954665Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0.],\n",
" [ 20., 230.],\n",
" [ 21., 275.]])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.974137Z",
"start_time": "2019-05-20T11:19:47.965965Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0., 20.],\n",
" [230., 21., 275.]])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.resize(2,3)\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.985749Z",
"start_time": "2019-05-20T11:19:47.976552Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0.],\n",
" [ 20., 230.],\n",
" [ 21., 275.]])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.resize(3,2)\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:47.991043Z",
"start_time": "2019-05-20T11:19:47.987680Z"
},
"hidden": true
},
"outputs": [],
"source": [
"b = np.resize(a, (2,3))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.001918Z",
"start_time": "2019-05-20T11:19:47.993555Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3., 0.],\n",
" [ 20., 230.],\n",
" [ 21., 275.]])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b[0,0] = 0\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.012783Z",
"start_time": "2019-05-20T11:19:48.004136Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"549.0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.sum()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.021546Z",
"start_time": "2019-05-20T11:19:48.015098Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 44., 505.])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.sum(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.028708Z",
"start_time": "2019-05-20T11:19:48.023791Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"275.0"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.max()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.039997Z",
"start_time": "2019-05-20T11:19:48.030994Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 21., 275.])"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.max(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.050717Z",
"start_time": "2019-05-20T11:19:48.041747Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([( 3, 0.), (20, 230.), (21, 275.)],\n",
" dtype=[('Age', '<i8'), ('Weight', '<f8')])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.array([(3., 0.), (20., 230.), (21., 275.)],\n",
" dtype=np.dtype([('Age', int), ('Weight', float)]))\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.059135Z",
"start_time": "2019-05-20T11:19:48.052661Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3, 20, 21])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a['Age']"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.069844Z",
"start_time": "2019-05-20T11:19:48.064548Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0., 230., 275.])"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a['Weight']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.080500Z",
"start_time": "2019-05-20T11:19:48.075102Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"(3, 0.)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a[0]"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"## Data structures in pandas"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Series"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.091504Z",
"start_time": "2019-05-20T11:19:48.082662Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Bei Bei 3\n",
"Mei Xiang 20\n",
"Tian Tian 21\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pd.Series([3, 20, 21],\n",
" index=['Bei Bei', 'Mei Xiang', 'Tian Tian'],\n",
" name='Age')\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.100222Z",
"start_time": "2019-05-20T11:19:48.093809Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"<PandasArray>\n",
"[3, 20, 21]\n",
"Length: 3, dtype: int64"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.array"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.107041Z",
"start_time": "2019-05-20T11:19:48.101880Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"dtype('int64')"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.dtype"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.117096Z",
"start_time": "2019-05-20T11:19:48.108426Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Age'"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.name"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.128566Z",
"start_time": "2019-05-20T11:19:48.119470Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Bei Bei', 'Mei Xiang', 'Tian Tian'], dtype='object')"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.index"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.137381Z",
"start_time": "2019-05-20T11:19:48.131055Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"20"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s['Mei Xiang']"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.144055Z",
"start_time": "2019-05-20T11:19:48.139791Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"20"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[1]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.154477Z",
"start_time": "2019-05-20T11:19:48.145841Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Mei Xiang 20\n",
"Tian Tian 21\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s['Mei Xiang':'Tian Tian']"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.165256Z",
"start_time": "2019-05-20T11:19:48.156239Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Mei Xiang 20\n",
"Tian Tian 21\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[1:3]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.175884Z",
"start_time": "2019-05-20T11:19:48.167785Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Mei Xiang 20\n",
"Tian Tian 21\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[1:]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.183993Z",
"start_time": "2019-05-20T11:19:48.178238Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Mei Xiang 20\n",
"Tian Tian 21\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t = s['Mei Xiang':'Tian Tian']\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.192775Z",
"start_time": "2019-05-20T11:19:48.185491Z"
},
"hidden": true
},
"outputs": [],
"source": [
"t['Tian Tian'] = 22"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.204706Z",
"start_time": "2019-05-20T11:19:48.194807Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Bei Bei 3\n",
"Mei Xiang 20\n",
"Tian Tian 22\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.213850Z",
"start_time": "2019-05-20T11:19:48.206406Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Bei Bei 3\n",
"Mei Xiang 20\n",
"Tian Tian 21\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pd.Series([3, 20, 21],\n",
" index=['Bei Bei', 'Mei Xiang', 'Tian Tian'],\n",
" name='Age')\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.221728Z",
"start_time": "2019-05-20T11:19:48.215724Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Mei Xiang 230.0\n",
"Tian Tian 275.0\n",
"Name: Weight, dtype: float64"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"u = pd.Series([230., 275.],\n",
" index=['Mei Xiang', 'Tian Tian'],\n",
" name='Weight')\n",
"u"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.233000Z",
"start_time": "2019-05-20T11:19:48.223337Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Bei Bei NaN\n",
"Mei Xiang 250.0\n",
"Tian Tian 296.0\n",
"dtype: float64"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.add(u)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.246104Z",
"start_time": "2019-05-20T11:19:48.234552Z"
},
"hidden": true,
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"Bei Bei 3.0\n",
"Mei Xiang 250.0\n",
"Tian Tian 296.0\n",
"dtype: float64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.add(u, fill_value=0)"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.262026Z",
"start_time": "2019-05-20T11:19:48.248305Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Bei Bei</th>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mei Xiang</th>\n",
" <td>20</td>\n",
" <td>230.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Tian Tian</th>\n",
" <td>21</td>\n",
" <td>275.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Weight\n",
"Bei Bei 3 NaN\n",
"Mei Xiang 20 230.0\n",
"Tian Tian 21 275.0"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'Age': [3, 20, 21],\n",
" 'Weight': [np.nan, 230., 275.]},\n",
" index=['Bei Bei', 'Mei Xiang', 'Tian Tian'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.272166Z",
"start_time": "2019-05-20T11:19:48.264478Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Age int64\n",
"Weight float64\n",
"dtype: object"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.281907Z",
"start_time": "2019-05-20T11:19:48.274577Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"(3, 2)"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.292929Z",
"start_time": "2019-05-20T11:19:48.284143Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Age', 'Weight'], dtype='object')"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.301831Z",
"start_time": "2019-05-20T11:19:48.295306Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Bei Bei', 'Mei Xiang', 'Tian Tian'], dtype='object')"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.310085Z",
"start_time": "2019-05-20T11:19:48.304929Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.size"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.321746Z",
"start_time": "2019-05-20T11:19:48.312699Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Bei Bei 3\n",
"Mei Xiang 20\n",
"Tian Tian 21\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Age']"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.336780Z",
"start_time": "2019-05-20T11:19:48.323760Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>Weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Mei Xiang</th>\n",
" <td>20</td>\n",
" <td>230.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Tian Tian</th>\n",
" <td>21</td>\n",
" <td>275.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Weight\n",
"Mei Xiang 20 230.0\n",
"Tian Tian 21 275.0"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Mei Xiang':'Tian Tian']"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.344150Z",
"start_time": "2019-05-20T11:19:48.338681Z"
},
"hidden": true
},
"outputs": [],
"source": [
"# df['Mei Xiang']"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.354612Z",
"start_time": "2019-05-20T11:19:48.345825Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Age 20.0\n",
"Weight 230.0\n",
"Name: Mei Xiang, dtype: float64"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['Mei Xiang','Age':'Weight']"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.370244Z",
"start_time": "2019-05-20T11:19:48.356886Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Age 44.0\n",
"Weight 505.0\n",
"dtype: float64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sum()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.382072Z",
"start_time": "2019-05-20T11:19:48.372293Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Age 44.0\n",
"Weight 505.0\n",
"dtype: float64"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sum(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.392189Z",
"start_time": "2019-05-20T11:19:48.384396Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Bei Bei 3.0\n",
"Mei Xiang 250.0\n",
"Tian Tian 296.0\n",
"dtype: float64"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sum(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.398939Z",
"start_time": "2019-05-20T11:19:48.394281Z"
},
"hidden": true
},
"outputs": [],
"source": [
"#del(df['Weight'])\n",
"#df"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.407860Z",
"start_time": "2019-05-20T11:19:48.401210Z"
},
"hidden": true
},
"outputs": [],
"source": [
"#s = df.pop('Age')\n",
"#s"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"## Data analysis tools in pandas"
]
},
{
"cell_type": "markdown",
"metadata": {
"hidden": true
},
"source": [
"The organization and most of the examples of this part come from pandas' official tutorial [10 minutes to pandas](http://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html). Some examples also originate from the [user guide](http://pandas.pydata.org/pandas-docs/stable/user_guide/index.html)."
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Object creation"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.420709Z",
"start_time": "2019-05-20T11:19:48.410242Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
" '2013-01-05'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dates = pd.date_range('20130101', periods=5)\n",
"dates"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.441663Z",
"start_time": "2019-05-20T11:19:48.423279Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>1.159100</td>\n",
" <td>-0.377109</td>\n",
" <td>0.865423</td>\n",
" <td>0.024703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>-2.545407</td>\n",
" <td>1.001357</td>\n",
" <td>-0.649275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>-1.283808</td>\n",
" <td>-1.018828</td>\n",
" <td>1.221888</td>\n",
" <td>-0.964830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>-0.110649</td>\n",
" <td>0.431603</td>\n",
" <td>1.549942</td>\n",
" <td>-1.553638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>-1.025729</td>\n",
" <td>-2.389124</td>\n",
" <td>-2.182942</td>\n",
" <td>-0.012242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 1.159100 -0.377109 0.865423 0.024703\n",
"2013-01-02 0.991748 -2.545407 1.001357 -0.649275\n",
"2013-01-03 -1.283808 -1.018828 1.221888 -0.964830\n",
"2013-01-04 -0.110649 0.431603 1.549942 -1.553638\n",
"2013-01-05 -1.025729 -2.389124 -2.182942 -0.012242"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.randn(5, 4), index=dates, columns=list('ABCD'))\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.458847Z",
"start_time": "2019-05-20T11:19:48.443227Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" <th>F</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>test</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>train</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>test</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>train</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E F\n",
"0 1.0 2013-01-02 1.0 3 test foo\n",
"1 1.0 2013-01-02 1.0 3 train foo\n",
"2 1.0 2013-01-02 1.0 3 test foo\n",
"3 1.0 2013-01-02 1.0 3 train foo"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = pd.DataFrame({'A': 1.,\n",
" 'B': pd.Timestamp('20130102'),\n",
" 'C': pd.Series(1, index=list(range(4)), dtype='float32'),\n",
" 'D': np.array([3] * 4, dtype='int32'),\n",
" 'E': pd.Categorical([\"test\", \"train\", \"test\", \"train\"]),\n",
" 'F': 'foo'})\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.471665Z",
"start_time": "2019-05-20T11:19:48.460785Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"A float64\n",
"B datetime64[ns]\n",
"C float32\n",
"D int32\n",
"E category\n",
"F object\n",
"dtype: object"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Viewing data"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.490230Z",
"start_time": "2019-05-20T11:19:48.474320Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>1.159100</td>\n",
" <td>-0.377109</td>\n",
" <td>0.865423</td>\n",
" <td>0.024703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>-2.545407</td>\n",
" <td>1.001357</td>\n",
" <td>-0.649275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>-1.283808</td>\n",
" <td>-1.018828</td>\n",
" <td>1.221888</td>\n",
" <td>-0.964830</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 1.159100 -0.377109 0.865423 0.024703\n",
"2013-01-02 0.991748 -2.545407 1.001357 -0.649275\n",
"2013-01-03 -1.283808 -1.018828 1.221888 -0.964830"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.503465Z",
"start_time": "2019-05-20T11:19:48.492466Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>-1.283808</td>\n",
" <td>-1.018828</td>\n",
" <td>1.221888</td>\n",
" <td>-0.964830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>-0.110649</td>\n",
" <td>0.431603</td>\n",
" <td>1.549942</td>\n",
" <td>-1.553638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>-1.025729</td>\n",
" <td>-2.389124</td>\n",
" <td>-2.182942</td>\n",
" <td>-0.012242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-03 -1.283808 -1.018828 1.221888 -0.964830\n",
"2013-01-04 -0.110649 0.431603 1.549942 -1.553638\n",
"2013-01-05 -1.025729 -2.389124 -2.182942 -0.012242"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail(3)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.512153Z",
"start_time": "2019-05-20T11:19:48.505787Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
" '2013-01-05'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.524389Z",
"start_time": "2019-05-20T11:19:48.514349Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'B', 'C', 'D'], dtype='object')"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.537240Z",
"start_time": "2019-05-20T11:19:48.527648Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"2013-01-01 1.159100\n",
"2013-01-02 0.991748\n",
"2013-01-03 -1.283808\n",
"2013-01-04 -0.110649\n",
"2013-01-05 -1.025729\n",
"Freq: D, Name: A, dtype: float64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.A"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.545439Z",
"start_time": "2019-05-20T11:19:48.539132Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.15909981, -0.37710932, 0.86542348, 0.0247032 ],\n",
" [ 0.99174807, -2.54540728, 1.00135698, -0.64927482],\n",
" [-1.28380839, -1.01882835, 1.22188822, -0.96483024],\n",
" [-0.11064882, 0.43160327, 1.54994242, -1.55363829],\n",
" [-1.02572943, -2.38912399, -2.18294221, -0.01224229]])"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = df.to_numpy()\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.549933Z",
"start_time": "2019-05-20T11:19:48.547141Z"
},
"hidden": true
},
"outputs": [],
"source": [
"a[0,0] = 0"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.561679Z",
"start_time": "2019-05-20T11:19:48.556970Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"0.0"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['A'][0]"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.572547Z",
"start_time": "2019-05-20T11:19:48.563912Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],\n",
" [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],\n",
" [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],\n",
" [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],\n",
" dtype=object)"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a2 = df2.to_numpy()\n",
"a2"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.580072Z",
"start_time": "2019-05-20T11:19:48.575027Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a2[0,0] = 0\n",
"df2['A'][0]"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.593570Z",
"start_time": "2019-05-20T11:19:48.581834Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" <th>F</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>test</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>train</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>test</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>2013-01-02</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>train</td>\n",
" <td>foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E F\n",
"0 1.0 2013-01-02 1.0 3 test foo\n",
"1 1.0 2013-01-02 1.0 3 train foo\n",
"2 1.0 2013-01-02 1.0 3 test foo\n",
"3 1.0 2013-01-02 1.0 3 train foo"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.values[0,0] = 0\n",
"df2"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Selection"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.609760Z",
"start_time": "2019-05-20T11:19:48.595165Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>-2.545407</td>\n",
" <td>1.001357</td>\n",
" <td>-0.649275</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-02 0.991748 -2.545407 1.001357 -0.649275"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.A > 0]"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.638483Z",
"start_time": "2019-05-20T11:19:48.617377Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.865423</td>\n",
" <td>0.024703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>NaN</td>\n",
" <td>1.001357</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.221888</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>NaN</td>\n",
" <td>0.431603</td>\n",
" <td>1.549942</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 NaN NaN 0.865423 0.024703\n",
"2013-01-02 0.991748 NaN 1.001357 NaN\n",
"2013-01-03 NaN NaN 1.221888 NaN\n",
"2013-01-04 NaN 0.431603 1.549942 NaN\n",
"2013-01-05 NaN NaN NaN NaN"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df[df > 0]\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Missing Data"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.649823Z",
"start_time": "2019-05-20T11:19:48.640389Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [A, B, C, D]\n",
"Index: []"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.659961Z",
"start_time": "2019-05-20T11:19:48.651475Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [2013-01-01 00:00:00, 2013-01-02 00:00:00, 2013-01-03 00:00:00, 2013-01-04 00:00:00, 2013-01-05 00:00:00]"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dropna(axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"hidden": true
},
"source": [
"Doesn't modify the original DataFrame:"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.675886Z",
"start_time": "2019-05-20T11:19:48.661794Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.865423</td>\n",
" <td>0.024703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>NaN</td>\n",
" <td>1.001357</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.221888</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>NaN</td>\n",
" <td>0.431603</td>\n",
" <td>1.549942</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 NaN NaN 0.865423 0.024703\n",
"2013-01-02 0.991748 NaN 1.001357 NaN\n",
"2013-01-03 NaN NaN 1.221888 NaN\n",
"2013-01-04 NaN 0.431603 1.549942 NaN\n",
"2013-01-05 NaN NaN NaN NaN"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.692007Z",
"start_time": "2019-05-20T11:19:48.678116Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.865423</td>\n",
" <td>0.024703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>0.000000</td>\n",
" <td>1.001357</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.221888</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>0.000000</td>\n",
" <td>0.431603</td>\n",
" <td>1.549942</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 0.000000 0.000000 0.865423 0.024703\n",
"2013-01-02 0.991748 0.000000 1.001357 0.000000\n",
"2013-01-03 0.000000 0.000000 1.221888 0.000000\n",
"2013-01-04 0.000000 0.431603 1.549942 0.000000\n",
"2013-01-05 0.000000 0.000000 0.000000 0.000000"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.fillna(value=0)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.705342Z",
"start_time": "2019-05-20T11:19:48.694120Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.865423</td>\n",
" <td>0.024703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>NaN</td>\n",
" <td>1.001357</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.221888</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>NaN</td>\n",
" <td>0.431603</td>\n",
" <td>1.549942</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 NaN NaN 0.865423 0.024703\n",
"2013-01-02 0.991748 NaN 1.001357 NaN\n",
"2013-01-03 NaN NaN 1.221888 NaN\n",
"2013-01-04 NaN 0.431603 1.549942 NaN\n",
"2013-01-05 NaN NaN NaN NaN"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.717620Z",
"start_time": "2019-05-20T11:19:48.707304Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 True True False False\n",
"2013-01-02 False True False True\n",
"2013-01-03 True True False True\n",
"2013-01-04 True False False True\n",
"2013-01-05 True True True True"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.737066Z",
"start_time": "2019-05-20T11:19:48.719983Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 True True False False\n",
"2013-01-02 False True False True\n",
"2013-01-03 True True False True\n",
"2013-01-04 True False False True\n",
"2013-01-05 True True True True"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.isna(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Getting Data In/Out"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.747435Z",
"start_time": "2019-05-20T11:19:48.739130Z"
},
"hidden": true
},
"outputs": [],
"source": [
"df.to_csv('foo.csv')"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.765695Z",
"start_time": "2019-05-20T11:19:48.750022Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.865423</td>\n",
" <td>0.024703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-02</th>\n",
" <td>0.991748</td>\n",
" <td>NaN</td>\n",
" <td>1.001357</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-03</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.221888</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" <td>NaN</td>\n",
" <td>0.431603</td>\n",
" <td>1.549942</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-05</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"2013-01-01 NaN NaN 0.865423 0.024703\n",
"2013-01-02 0.991748 NaN 1.001357 NaN\n",
"2013-01-03 NaN NaN 1.221888 NaN\n",
"2013-01-04 NaN 0.431603 1.549942 NaN\n",
"2013-01-05 NaN NaN NaN NaN"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('foo.csv', index_col=0)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Hierarchical indexing (MultiIndex)"
]
},
{
"cell_type": "markdown",
"metadata": {
"hidden": true
},
"source": [
"Example from http://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced-hierarchical"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.775061Z",
"start_time": "2019-05-20T11:19:48.768069Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"[array(['bar', 'bar', 'foo', 'foo'], dtype='<U3'),\n",
" array(['one', 'two', 'one', 'two'], dtype='<U3')]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arrays = [np.array(['bar', 'bar', 'foo', 'foo']),\n",
" np.array(['one', 'two', 'one', 'two'])]\n",
"arrays"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.786669Z",
"start_time": "2019-05-20T11:19:48.777858Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"bar one -0.840014\n",
" two -0.756362\n",
"foo one -0.547463\n",
" two -0.613027\n",
"dtype: float64"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pd.Series(np.random.randn(4), index=arrays)\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.805754Z",
"start_time": "2019-05-20T11:19:48.788798Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">bar</th>\n",
" <th>one</th>\n",
" <td>-0.075528</td>\n",
" <td>-2.227210</td>\n",
" <td>-1.484136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td>0.707189</td>\n",
" <td>0.207465</td>\n",
" <td>0.094467</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">foo</th>\n",
" <th>one</th>\n",
" <td>-1.311922</td>\n",
" <td>-1.140869</td>\n",
" <td>0.140230</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td>1.157357</td>\n",
" <td>0.816002</td>\n",
" <td>0.041543</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2\n",
"bar one -0.075528 -2.227210 -1.484136\n",
" two 0.707189 0.207465 0.094467\n",
"foo one -1.311922 -1.140869 0.140230\n",
" two 1.157357 0.816002 0.041543"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.randn(4, 3), index=arrays)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Various operations"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.815203Z",
"start_time": "2019-05-20T11:19:48.808316Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"0 0.119274\n",
"1 -0.586153\n",
"2 -0.301974\n",
"dtype: float64"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.mean()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.825814Z",
"start_time": "2019-05-20T11:19:48.817959Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"0 0.119274\n",
"1 -0.586153\n",
"2 -0.301974\n",
"dtype: float64"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.mean(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.850484Z",
"start_time": "2019-05-20T11:19:48.827967Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">bar</th>\n",
" <th>one</th>\n",
" <td>-0.075528</td>\n",
" <td>-2.227210</td>\n",
" <td>-1.484136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td>0.631661</td>\n",
" <td>-2.019745</td>\n",
" <td>-1.389669</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">foo</th>\n",
" <th>one</th>\n",
" <td>-0.680261</td>\n",
" <td>-3.160614</td>\n",
" <td>-1.249439</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td>0.477096</td>\n",
" <td>-2.344612</td>\n",
" <td>-1.207896</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2\n",
"bar one -0.075528 -2.227210 -1.484136\n",
" two 0.631661 -2.019745 -1.389669\n",
"foo one -0.680261 -3.160614 -1.249439\n",
" two 0.477096 -2.344612 -1.207896"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.apply(np.cumsum)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.859697Z",
"start_time": "2019-05-20T11:19:48.851969Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"-0.613027 1\n",
"-0.840014 1\n",
"-0.756362 1\n",
"-0.547463 1\n",
"dtype: int64"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Gather Series or DataFrames"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.870360Z",
"start_time": "2019-05-20T11:19:48.861531Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.244737</td>\n",
" <td>0.193583</td>\n",
" <td>0.618107</td>\n",
" <td>1.367776</td>\n",
" <td>0.421097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.136700</td>\n",
" <td>0.492938</td>\n",
" <td>0.585207</td>\n",
" <td>0.075231</td>\n",
" <td>0.299545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-1.093732</td>\n",
" <td>-0.232071</td>\n",
" <td>0.678656</td>\n",
" <td>0.882982</td>\n",
" <td>0.824984</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.576383</td>\n",
" <td>0.463082</td>\n",
" <td>0.592405</td>\n",
" <td>-0.747336</td>\n",
" <td>0.420204</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4\n",
"0 -0.244737 0.193583 0.618107 1.367776 0.421097\n",
"1 -0.136700 0.492938 0.585207 0.075231 0.299545\n",
"2 -1.093732 -0.232071 0.678656 0.882982 0.824984\n",
"3 0.576383 0.463082 0.592405 -0.747336 0.420204"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.randn(4, 5))\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.880758Z",
"start_time": "2019-05-20T11:19:48.872522Z"
},
"hidden": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0 1 2 3 4\n",
"0 -0.244737 0.193583 0.618107 1.367776 0.421097\n",
"1 -0.136700 0.492938 0.585207 0.075231 0.299545\n",
"2 -1.093732 -0.232071 0.678656 0.882982 0.824984\n",
"3 0.576383 0.463082 0.592405 -0.747336 0.420204\n"
]
}
],
"source": [
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.896870Z",
"start_time": "2019-05-20T11:19:48.882288Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"[ 0 1 2 3 4\n",
" 0 -0.244737 0.193583 0.618107 1.367776 0.421097\n",
" 1 -0.136700 0.492938 0.585207 0.075231 0.299545,\n",
" 0 1 2 3 4\n",
" 2 -1.093732 -0.232071 0.678656 0.882982 0.824984\n",
" 3 0.576383 0.463082 0.592405 -0.747336 0.420204]"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pieces = [df[:2], df[2:]]\n",
"pieces"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.910874Z",
"start_time": "2019-05-20T11:19:48.898491Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.244737</td>\n",
" <td>0.193583</td>\n",
" <td>0.618107</td>\n",
" <td>1.367776</td>\n",
" <td>0.421097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.136700</td>\n",
" <td>0.492938</td>\n",
" <td>0.585207</td>\n",
" <td>0.075231</td>\n",
" <td>0.299545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-1.093732</td>\n",
" <td>-0.232071</td>\n",
" <td>0.678656</td>\n",
" <td>0.882982</td>\n",
" <td>0.824984</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.576383</td>\n",
" <td>0.463082</td>\n",
" <td>0.592405</td>\n",
" <td>-0.747336</td>\n",
" <td>0.420204</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4\n",
"0 -0.244737 0.193583 0.618107 1.367776 0.421097\n",
"1 -0.136700 0.492938 0.585207 0.075231 0.299545\n",
"2 -1.093732 -0.232071 0.678656 0.882982 0.824984\n",
"3 0.576383 0.463082 0.592405 -0.747336 0.420204"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(pieces)"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Grouping"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.922679Z",
"start_time": "2019-05-20T11:19:48.912452Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>foo</td>\n",
" <td>-0.553484</td>\n",
" <td>1.462874</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>bar</td>\n",
" <td>-0.131502</td>\n",
" <td>-0.151523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>foo</td>\n",
" <td>0.699913</td>\n",
" <td>0.490660</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bar</td>\n",
" <td>0.492997</td>\n",
" <td>-2.148453</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C\n",
"0 foo -0.553484 1.462874\n",
"1 bar -0.131502 -0.151523\n",
"2 foo 0.699913 0.490660\n",
"3 bar 0.492997 -2.148453"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar'],\n",
" 'B': np.random.randn(4),\n",
" 'C': np.random.randn(4)})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.935918Z",
"start_time": "2019-05-20T11:19:48.924256Z"
},
"hidden": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" A B C\n",
"0 foo -0.553484 1.462874\n",
"1 bar -0.131502 -0.151523\n",
"2 foo 0.699913 0.490660\n",
"3 bar 0.492997 -2.148453\n"
]
}
],
"source": [
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.950540Z",
"start_time": "2019-05-20T11:19:48.937671Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>bar</th>\n",
" <td>0.361495</td>\n",
" <td>-2.299976</td>\n",
" </tr>\n",
" <tr>\n",
" <th>foo</th>\n",
" <td>0.146429</td>\n",
" <td>1.953534</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" B C\n",
"A \n",
"bar 0.361495 -2.299976\n",
"foo 0.146429 1.953534"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('A').sum()"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.961146Z",
"start_time": "2019-05-20T11:19:48.952364Z"
},
"hidden": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" B C\n",
"A \n",
"bar 0.361495 -2.299976\n",
"foo 0.146429 1.953534\n"
]
}
],
"source": [
"print(df.groupby('A').sum())"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Time Series"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.979698Z",
"start_time": "2019-05-20T11:19:48.963420Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',\n",
" '2012-01-01 00:00:02', '2012-01-01 00:00:03'],\n",
" dtype='datetime64[ns]', freq='S')"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rng = pd.date_range('1/1/2012', periods=4, freq='S')\n",
"rng"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:48.990993Z",
"start_time": "2019-05-20T11:19:48.982003Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"2012-01-01 00:00:00 172\n",
"2012-01-01 00:00:01 257\n",
"2012-01-01 00:00:02 452\n",
"2012-01-01 00:00:03 400\n",
"Freq: S, dtype: int64"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)\n",
"ts"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:49.006453Z",
"start_time": "2019-05-20T11:19:48.993252Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"2012-01-01 00:00:00 429\n",
"2012-01-01 00:00:02 852\n",
"Freq: 2S, dtype: int64"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.resample('2S').sum()"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Categorical"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:49.017460Z",
"start_time": "2019-05-20T11:19:49.008481Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"0 a\n",
"1 b\n",
"2 c\n",
"3 a\n",
"dtype: category\n",
"Categories (3, object): [a, b, c]"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pd.Series([\"a\", \"b\", \"c\", \"a\"], dtype=\"category\")\n",
"s"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true,
"hidden": true
},
"source": [
"### Plotting"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:49.026305Z",
"start_time": "2019-05-20T11:19:49.019037Z"
},
"code_folding": [
0
],
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"2000-01-01 -0.914553\n",
"2000-01-02 -0.533403\n",
"2000-01-03 2.094352\n",
"2000-01-04 0.923388\n",
"2000-01-05 1.378172\n",
"Freq: D, dtype: float64"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts = pd.Series(np.random.randn(1000),\n",
" index=pd.date_range('1/1/2000', periods=1000))\n",
"ts.head()"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:49.037762Z",
"start_time": "2019-05-20T11:19:49.028015Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"2000-01-01 -0.914553\n",
"2000-01-02 -1.447956\n",
"2000-01-03 0.646397\n",
"2000-01-04 1.569784\n",
"2000-01-05 2.947957\n",
"Freq: D, dtype: float64"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts = ts.cumsum()\n",
"ts.head()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"ExecuteTime": {
"end_time": "2019-05-20T11:19:49.360267Z",
"start_time": "2019-05-20T11:19:49.039323Z"
},
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2f28c3e588>"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"ts.plot()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment