Skip to content

Instantly share code, notes, and snippets.

@AllieUbisse
Forked from carlleston/1-ETL_TS.ipynb
Created August 23, 2020 12:11
Show Gist options
  • Save AllieUbisse/6d77b2715f3c3609ed904e068c686af1 to your computer and use it in GitHub Desktop.
Save AllieUbisse/6d77b2715f3c3609ed904e068c686af1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"IBM Capstone - ITUB4 TS Forecasting using News"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# @hidden_cell\n",
"# The project token is an authorization token that is used to access project resources like data sources, connections, and used by platform APIs.\n",
"from project_lib import Project\n",
"project = Project(project_id='********-****-******-****-**********', project_access_token='p-************************************')\n",
"pc = project.project_context"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install yfinance==0.1.54\n",
"!pip install pandas-datareader==0.8.1"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/anthony/Capstone Advanced DS/github/Deployment/venv/lib/python3.7/site-packages/pandas_datareader/compat/__init__.py:7: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
" from pandas.util.testing import assert_frame_equal\n"
]
}
],
"source": [
"from pandas_datareader import data as pdr\n",
"from datetime import date,timedelta\n",
"import yfinance as yf\n",
"yf.pdr_override()\n",
"import pandas as pd\n",
"import datetime \n",
"\n",
"today = date.today()\n",
"today = today + timedelta(days=1)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"date_time = '2005-04-05'\n",
"start = datetime.datetime.strptime(date_time, '%Y-%d-%m') "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[*********************100%***********************] 1 of 1 completed\n",
"[*********************100%***********************] 1 of 1 completed\n",
"[*********************100%***********************] 1 of 1 completed\n"
]
}
],
"source": [
"### Get the datas (Itau share price, ibovespa, USDBRL)\n",
"TS_itau = pdr.get_data_yahoo('itub4.sa', start=start, end=today).reset_index()\n",
"BVSP = pdr.get_data_yahoo('^BVSP', start=start, end=today).reset_index()\n",
"USDBRL = pdr.get_data_yahoo('USDBRL=X', start=start, end=today).reset_index()\n",
"\n",
"### Cleanning and group by\n",
"TS_itau= TS_itau[['Date','Open','Close']]\n",
"BVSP= BVSP[['Date','Open']]\n",
"USDBRL= USDBRL[['Date','Open']]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"TS_itau.columns = ['Date','Itau_open','Itau_Close']\n",
"BVSP.columns = ['Date','BVSP_open']\n",
"USDBRL.columns = ['Date','USDBRL_open']\n",
"result = pd.merge(TS_itau, BVSP, on='Date')\n",
"result = pd.merge(result, USDBRL, on='Date')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"s = result['Itau_Close']\n",
"lag_col = pd.concat([s.shift(), s.shift(2),s.shift(3)], axis=1)\n",
"lag_col.columns = ['lag_1','lag_2','lag_3']\n",
"result = pd.concat([result, lag_col], axis=1, sort=False).dropna()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Itau_open</th>\n",
" <th>Itau_Close</th>\n",
" <th>BVSP_open</th>\n",
" <th>USDBRL_open</th>\n",
" <th>lag_1</th>\n",
" <th>lag_2</th>\n",
" <th>lag_3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2006-03-29</td>\n",
" <td>10.9282</td>\n",
" <td>10.8934</td>\n",
" <td>36684.0</td>\n",
" <td>2.2270</td>\n",
" <td>10.9282</td>\n",
" <td>11.1600</td>\n",
" <td>11.4829</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2006-03-30</td>\n",
" <td>11.0093</td>\n",
" <td>10.8405</td>\n",
" <td>37493.0</td>\n",
" <td>2.2117</td>\n",
" <td>10.8934</td>\n",
" <td>10.9282</td>\n",
" <td>11.1600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2006-03-31</td>\n",
" <td>10.8620</td>\n",
" <td>10.5971</td>\n",
" <td>37783.0</td>\n",
" <td>2.1853</td>\n",
" <td>10.8405</td>\n",
" <td>10.8934</td>\n",
" <td>10.9282</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2006-04-03</td>\n",
" <td>10.7047</td>\n",
" <td>11.0358</td>\n",
" <td>37952.0</td>\n",
" <td>2.1618</td>\n",
" <td>10.5971</td>\n",
" <td>10.8405</td>\n",
" <td>10.8934</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2006-04-04</td>\n",
" <td>11.0789</td>\n",
" <td>11.2263</td>\n",
" <td>38718.0</td>\n",
" <td>2.1370</td>\n",
" <td>11.0358</td>\n",
" <td>10.5971</td>\n",
" <td>10.8405</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Itau_open Itau_Close BVSP_open USDBRL_open lag_1 lag_2 \\\n",
"3 2006-03-29 10.9282 10.8934 36684.0 2.2270 10.9282 11.1600 \n",
"4 2006-03-30 11.0093 10.8405 37493.0 2.2117 10.8934 10.9282 \n",
"5 2006-03-31 10.8620 10.5971 37783.0 2.1853 10.8405 10.8934 \n",
"6 2006-04-03 10.7047 11.0358 37952.0 2.1618 10.5971 10.8405 \n",
"7 2006-04-04 11.0789 11.2263 38718.0 2.1370 11.0358 10.5971 \n",
"\n",
" lag_3 \n",
"3 11.4829 \n",
"4 11.1600 \n",
"5 10.9282 \n",
"6 10.8934 \n",
"7 10.8405 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# overwrite the TS data\n",
"project.save_data(data=result.to_csv(), file_name='ts_data',overwrite=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment