Skip to content

Instantly share code, notes, and snippets.

@wonderphil
Last active January 26, 2023 12:00
Show Gist options
  • Save wonderphil/39439675589b40b6de6d441c3c501a84 to your computer and use it in GitHub Desktop.
Save wonderphil/39439675589b40b6de6d441c3c501a84 to your computer and use it in GitHub Desktop.
time series basics
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Time Series Basics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Importing Time Series Data from csv-Files"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"temp = pd.read_csv(\"temp.csv\", parse_dates = [\"datetime\"], index_col= \"datetime\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>LA</th>\n",
" <th>NY</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-01 00:00:00</th>\n",
" <td>11.7</td>\n",
" <td>-1.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-01 01:00:00</th>\n",
" <td>10.7</td>\n",
" <td>-1.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-01 02:00:00</th>\n",
" <td>9.9</td>\n",
" <td>-2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-01 03:00:00</th>\n",
" <td>9.3</td>\n",
" <td>-2.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-01 04:00:00</th>\n",
" <td>8.8</td>\n",
" <td>-2.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" LA NY\n",
"datetime \n",
"2013-01-01 00:00:00 11.7 -1.1\n",
"2013-01-01 01:00:00 10.7 -1.7\n",
"2013-01-01 02:00:00 9.9 -2.0\n",
"2013-01-01 03:00:00 9.3 -2.1\n",
"2013-01-01 04:00:00 8.8 -2.3"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00\n",
"Data columns (total 2 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 LA 35062 non-null float64\n",
" 1 NY 35064 non-null float64\n",
"dtypes: float64(2)\n",
"memory usage: 821.8 KB\n"
]
}
],
"source": [
"temp.info()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"numpy.float64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(temp.iloc[0, 0])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 01:00:00',\n",
" '2013-01-01 02:00:00', '2013-01-01 03:00:00',\n",
" '2013-01-01 04:00:00', '2013-01-01 05:00:00',\n",
" '2013-01-01 06:00:00', '2013-01-01 07:00:00',\n",
" '2013-01-01 08:00:00', '2013-01-01 09:00:00',\n",
" ...\n",
" '2016-12-31 14:00:00', '2016-12-31 15:00:00',\n",
" '2016-12-31 16:00:00', '2016-12-31 17:00:00',\n",
" '2016-12-31 18:00:00', '2016-12-31 19:00:00',\n",
" '2016-12-31 20:00:00', '2016-12-31 21:00:00',\n",
" '2016-12-31 22:00:00', '2016-12-31 23:00:00'],\n",
" dtype='datetime64[ns]', name='datetime', length=35064, freq=None)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp.index"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2013-01-01 00:00:00')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp.index[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Converting strings to datetime objects with pd.to_datetime()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"temp = pd.read_csv(\"temp.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>datetime</th>\n",
" <th>LA</th>\n",
" <th>NY</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2013-01-01 00:00:00</td>\n",
" <td>11.7</td>\n",
" <td>-1.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2013-01-01 01:00:00</td>\n",
" <td>10.7</td>\n",
" <td>-1.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2013-01-01 02:00:00</td>\n",
" <td>9.9</td>\n",
" <td>-2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2013-01-01 03:00:00</td>\n",
" <td>9.3</td>\n",
" <td>-2.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2013-01-01 04:00:00</td>\n",
" <td>8.8</td>\n",
" <td>-2.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" datetime LA NY\n",
"0 2013-01-01 00:00:00 11.7 -1.1\n",
"1 2013-01-01 01:00:00 10.7 -1.7\n",
"2 2013-01-01 02:00:00 9.9 -2.0\n",
"3 2013-01-01 03:00:00 9.3 -2.1\n",
"4 2013-01-01 04:00:00 8.8 -2.3"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 35064 entries, 0 to 35063\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 datetime 35064 non-null object \n",
" 1 LA 35062 non-null float64\n",
" 2 NY 35064 non-null float64\n",
"dtypes: float64(2), object(1)\n",
"memory usage: 821.9+ KB\n"
]
}
],
"source": [
"temp.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.datetime[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"pd.to_datetime(temp.datetime)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp = temp.set_index(pd.to_datetime(temp.datetime)).drop(\"datetime\", axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.index[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime(\"2015-05-20 10:30:20\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime(\"20150520\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime(\"2015/05/20\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime(\"2015 05 20\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#pd.to_datetime(\"2015-20-05\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime(\"2015 May 20\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime(\"May 2015 20\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime(\"2015 20th may\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.to_datetime([\"2015-05-20\", \"Feb 20 2015\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"pd.to_datetime([\"2015-05-20\", \"Feb 20 2015\", \"Elephant\"], errors=\"coerce\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Indexing and Slicing Time Series"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp = pd.read_csv(\"temp.csv\", parse_dates= [\"datetime\"], index_col= \"datetime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.loc[\"2013-01-01 01:00:00\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.loc[\"2015\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.loc[\"2015-05\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.loc[\"2015-05-20\"].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.loc[\"2015-05-20 10:00:00\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#temp.loc[\"2015-05-20 10:30:00\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.loc[\"2015-01-01\" : \"2015-12-31\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.loc[\"2015-01-01\" : \"2015-12-31\"].equals(temp.loc[\"2015\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.loc[\"2015-04-15\" : \"2016-02-23\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.loc[\"2015-05-20\":]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.loc[:\"2015-05-20\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.loc[\"20FEBRUARY2015\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#temp.loc[[\"2015-05-20 10:00:00\", \"2015-05-20 12:00:00\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"two_timestamps = pd.to_datetime([\"2015-05-20 10:00:00\", \"2015-05-20 12:00:00\"])\n",
"two_timestamps"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.loc[two_timestamps]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Downsampling Time Series with resample()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"plt.style.use(\"seaborn\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp = pd.read_csv(\"temp.csv\", parse_dates= [\"datetime\"], index_col = \"datetime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"list(temp.resample(\"D\"))[1][1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.head(25)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"D\").sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"2H\").first()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"W\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"W-Wed\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"M\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"MS\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"MS\", loffset=\"14D\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"Q\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"Q-Feb\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"temp.resample(\"Y\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"temp.resample(\"YS\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment