Skip to content

Instantly share code, notes, and snippets.

@abFunctions
Created July 9, 2021 05:00
Show Gist options
  • Save abFunctions/5f22965dfbae08c29d2de53157a036f3 to your computer and use it in GitHub Desktop.
Save abFunctions/5f22965dfbae08c29d2de53157a036f3 to your computer and use it in GitHub Desktop.
ml_03_stock_preduction_roku
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Tutorial url: https://www.youtube.com/watch?v=lncoLfue_Y4"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import datetime\n",
"import sklearn"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# using data set containing Roku daily stock price over the past 5 years\n",
"dataset = pd.read_csv('ROKU_daily.csv',index_col=\"Date\",parse_dates=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Adj Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2017-09-28</th>\n",
" <td>15.800000</td>\n",
" <td>23.500000</td>\n",
" <td>15.750000</td>\n",
" <td>23.500000</td>\n",
" <td>23.500000</td>\n",
" <td>39265900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-09-29</th>\n",
" <td>26.740000</td>\n",
" <td>29.799999</td>\n",
" <td>25.469999</td>\n",
" <td>26.540001</td>\n",
" <td>26.540001</td>\n",
" <td>44294700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-02</th>\n",
" <td>25.200001</td>\n",
" <td>26.280001</td>\n",
" <td>23.260000</td>\n",
" <td>23.559999</td>\n",
" <td>23.559999</td>\n",
" <td>16008400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-03</th>\n",
" <td>23.010000</td>\n",
" <td>23.020000</td>\n",
" <td>20.770000</td>\n",
" <td>20.809999</td>\n",
" <td>20.809999</td>\n",
" <td>13678500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-04</th>\n",
" <td>21.290001</td>\n",
" <td>22.600000</td>\n",
" <td>20.820000</td>\n",
" <td>20.850000</td>\n",
" <td>20.850000</td>\n",
" <td>9345700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-21</th>\n",
" <td>365.049988</td>\n",
" <td>385.790009</td>\n",
" <td>363.100006</td>\n",
" <td>382.730011</td>\n",
" <td>382.730011</td>\n",
" <td>6227000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-22</th>\n",
" <td>382.589996</td>\n",
" <td>405.679993</td>\n",
" <td>382.000000</td>\n",
" <td>403.500000</td>\n",
" <td>403.500000</td>\n",
" <td>6686300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-23</th>\n",
" <td>406.501007</td>\n",
" <td>424.339996</td>\n",
" <td>406.501007</td>\n",
" <td>421.700012</td>\n",
" <td>421.700012</td>\n",
" <td>8639900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-24</th>\n",
" <td>430.494995</td>\n",
" <td>431.779999</td>\n",
" <td>419.549988</td>\n",
" <td>423.579987</td>\n",
" <td>423.579987</td>\n",
" <td>6575100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-25</th>\n",
" <td>426.174988</td>\n",
" <td>431.975006</td>\n",
" <td>420.299988</td>\n",
" <td>430.940002</td>\n",
" <td>430.940002</td>\n",
" <td>4987500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>942 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Adj Close \\\n",
"Date \n",
"2017-09-28 15.800000 23.500000 15.750000 23.500000 23.500000 \n",
"2017-09-29 26.740000 29.799999 25.469999 26.540001 26.540001 \n",
"2017-10-02 25.200001 26.280001 23.260000 23.559999 23.559999 \n",
"2017-10-03 23.010000 23.020000 20.770000 20.809999 20.809999 \n",
"2017-10-04 21.290001 22.600000 20.820000 20.850000 20.850000 \n",
"... ... ... ... ... ... \n",
"2021-06-21 365.049988 385.790009 363.100006 382.730011 382.730011 \n",
"2021-06-22 382.589996 405.679993 382.000000 403.500000 403.500000 \n",
"2021-06-23 406.501007 424.339996 406.501007 421.700012 421.700012 \n",
"2021-06-24 430.494995 431.779999 419.549988 423.579987 423.579987 \n",
"2021-06-25 426.174988 431.975006 420.299988 430.940002 430.940002 \n",
"\n",
" Volume \n",
"Date \n",
"2017-09-28 39265900 \n",
"2017-09-29 44294700 \n",
"2017-10-02 16008400 \n",
"2017-10-03 13678500 \n",
"2017-10-04 9345700 \n",
"... ... \n",
"2021-06-21 6227000 \n",
"2021-06-22 6686300 \n",
"2021-06-23 8639900 \n",
"2021-06-24 6575100 \n",
"2021-06-25 4987500 \n",
"\n",
"[942 rows x 6 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Open False\n",
"High False\n",
"Low False\n",
"Close False\n",
"Adj Close False\n",
"Volume False\n",
"dtype: bool"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check to see if any of the data is not applicable. Returns bool, detects missing values (true means no missing values)\n",
"dataset.isna().any()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 942 entries, 2017-09-28 to 2021-06-25\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Open 942 non-null float64\n",
" 1 High 942 non-null float64\n",
" 2 Low 942 non-null float64\n",
" 3 Close 942 non-null float64\n",
" 4 Adj Close 942 non-null float64\n",
" 5 Volume 942 non-null int64 \n",
"dtypes: float64(5), int64(1)\n",
"memory usage: 51.5 KB\n"
]
}
],
"source": [
"# print out the basic info of the data set \n",
"dataset.info()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Date'>"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1152x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# plotting open price over the past 5 years\n",
"dataset['Open'].plot(figsize=(16,6))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# convert column \"a\" of a DataFrame\n",
"# dataset[\"Close\"] = dataset[\"Close\"].str.replace(',','').astype(float)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# dataset[\"Volume\"] = dataset[\"Volume\"].str.replace(',','').astype(float)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Adj Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2017-09-28</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-09-29</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-02</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-03</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-04</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-05</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-06</th>\n",
" <td>22.274286</td>\n",
" <td>24.395715</td>\n",
" <td>21.345714</td>\n",
" <td>22.924286</td>\n",
" <td>22.924286</td>\n",
" <td>1.943051e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-09</th>\n",
" <td>23.424286</td>\n",
" <td>24.660000</td>\n",
" <td>22.461428</td>\n",
" <td>23.072857</td>\n",
" <td>23.072857</td>\n",
" <td>1.492693e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-10</th>\n",
" <td>23.202858</td>\n",
" <td>24.060000</td>\n",
" <td>22.065714</td>\n",
" <td>22.557143</td>\n",
" <td>22.557143</td>\n",
" <td>9.667586e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-11</th>\n",
" <td>23.004286</td>\n",
" <td>23.781429</td>\n",
" <td>22.057143</td>\n",
" <td>22.571429</td>\n",
" <td>22.571429</td>\n",
" <td>8.159143e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-12</th>\n",
" <td>23.098572</td>\n",
" <td>23.968572</td>\n",
" <td>22.440000</td>\n",
" <td>22.974286</td>\n",
" <td>22.974286</td>\n",
" <td>6.711629e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-13</th>\n",
" <td>23.450000</td>\n",
" <td>24.197143</td>\n",
" <td>22.744286</td>\n",
" <td>23.284286</td>\n",
" <td>23.284286</td>\n",
" <td>5.798943e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-16</th>\n",
" <td>23.674286</td>\n",
" <td>24.340000</td>\n",
" <td>22.838572</td>\n",
" <td>23.310000</td>\n",
" <td>23.310000</td>\n",
" <td>5.260086e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-17</th>\n",
" <td>23.647143</td>\n",
" <td>24.215715</td>\n",
" <td>22.767143</td>\n",
" <td>23.151429</td>\n",
" <td>23.151429</td>\n",
" <td>4.513229e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-18</th>\n",
" <td>23.405714</td>\n",
" <td>23.808572</td>\n",
" <td>22.544286</td>\n",
" <td>22.795714</td>\n",
" <td>22.795714</td>\n",
" <td>3.550043e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-19</th>\n",
" <td>22.885714</td>\n",
" <td>23.427143</td>\n",
" <td>22.351429</td>\n",
" <td>22.667143</td>\n",
" <td>22.667143</td>\n",
" <td>2.770871e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-20</th>\n",
" <td>22.637143</td>\n",
" <td>23.155715</td>\n",
" <td>22.151429</td>\n",
" <td>22.411429</td>\n",
" <td>22.411429</td>\n",
" <td>2.242857e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-23</th>\n",
" <td>22.362857</td>\n",
" <td>22.822857</td>\n",
" <td>21.708571</td>\n",
" <td>21.945715</td>\n",
" <td>21.945715</td>\n",
" <td>2.126100e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-24</th>\n",
" <td>21.884286</td>\n",
" <td>22.322857</td>\n",
" <td>21.184286</td>\n",
" <td>21.481429</td>\n",
" <td>21.481429</td>\n",
" <td>2.041100e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-25</th>\n",
" <td>21.411429</td>\n",
" <td>21.830714</td>\n",
" <td>20.661429</td>\n",
" <td>21.014286</td>\n",
" <td>21.014286</td>\n",
" <td>2.016714e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Adj Close \\\n",
"Date \n",
"2017-09-28 NaN NaN NaN NaN NaN \n",
"2017-09-29 NaN NaN NaN NaN NaN \n",
"2017-10-02 NaN NaN NaN NaN NaN \n",
"2017-10-03 NaN NaN NaN NaN NaN \n",
"2017-10-04 NaN NaN NaN NaN NaN \n",
"2017-10-05 NaN NaN NaN NaN NaN \n",
"2017-10-06 22.274286 24.395715 21.345714 22.924286 22.924286 \n",
"2017-10-09 23.424286 24.660000 22.461428 23.072857 23.072857 \n",
"2017-10-10 23.202858 24.060000 22.065714 22.557143 22.557143 \n",
"2017-10-11 23.004286 23.781429 22.057143 22.571429 22.571429 \n",
"2017-10-12 23.098572 23.968572 22.440000 22.974286 22.974286 \n",
"2017-10-13 23.450000 24.197143 22.744286 23.284286 23.284286 \n",
"2017-10-16 23.674286 24.340000 22.838572 23.310000 23.310000 \n",
"2017-10-17 23.647143 24.215715 22.767143 23.151429 23.151429 \n",
"2017-10-18 23.405714 23.808572 22.544286 22.795714 22.795714 \n",
"2017-10-19 22.885714 23.427143 22.351429 22.667143 22.667143 \n",
"2017-10-20 22.637143 23.155715 22.151429 22.411429 22.411429 \n",
"2017-10-23 22.362857 22.822857 21.708571 21.945715 21.945715 \n",
"2017-10-24 21.884286 22.322857 21.184286 21.481429 21.481429 \n",
"2017-10-25 21.411429 21.830714 20.661429 21.014286 21.014286 \n",
"\n",
" Volume \n",
"Date \n",
"2017-09-28 NaN \n",
"2017-09-29 NaN \n",
"2017-10-02 NaN \n",
"2017-10-03 NaN \n",
"2017-10-04 NaN \n",
"2017-10-05 NaN \n",
"2017-10-06 1.943051e+07 \n",
"2017-10-09 1.492693e+07 \n",
"2017-10-10 9.667586e+06 \n",
"2017-10-11 8.159143e+06 \n",
"2017-10-12 6.711629e+06 \n",
"2017-10-13 5.798943e+06 \n",
"2017-10-16 5.260086e+06 \n",
"2017-10-17 4.513229e+06 \n",
"2017-10-18 3.550043e+06 \n",
"2017-10-19 2.770871e+06 \n",
"2017-10-20 2.242857e+06 \n",
"2017-10-23 2.126100e+06 \n",
"2017-10-24 2.041100e+06 \n",
"2017-10-25 2.016714e+06 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get 7 day rolling mean (starts on day 7, iterates daily following with 7-day rolling avg)\n",
"dataset.rolling(7).mean().head(20)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Date'>"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1152x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# plot out rolling 30-day avg in comparison to daily close\n",
"dataset[\"Open\"].plot(figsize=(16,6))\n",
"dataset.rolling(window=30).mean()[\"Close\"].plot()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Date'>"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1152x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"dataset['Close: 30 Day Mean'] = dataset['Close'].rolling(window=30).mean()\n",
"dataset[['Close','Close: 30 Day Mean']].plot(figsize=(16,6))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Date'>"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1152x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# optional: specify a minimum number of periods\n",
"dataset['Close'].expanding(min_periods=1).mean().plot(figsize=(16,6))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2017-09-28</th>\n",
" <td>15.800000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-09-29</th>\n",
" <td>26.740000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-02</th>\n",
" <td>25.200001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-03</th>\n",
" <td>23.010000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017-10-04</th>\n",
" <td>21.290001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-21</th>\n",
" <td>365.049988</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-22</th>\n",
" <td>382.589996</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-23</th>\n",
" <td>406.501007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-24</th>\n",
" <td>430.494995</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-06-25</th>\n",
" <td>426.174988</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>942 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" Open\n",
"Date \n",
"2017-09-28 15.800000\n",
"2017-09-29 26.740000\n",
"2017-10-02 25.200001\n",
"2017-10-03 23.010000\n",
"2017-10-04 21.290001\n",
"... ...\n",
"2021-06-21 365.049988\n",
"2021-06-22 382.589996\n",
"2021-06-23 406.501007\n",
"2021-06-24 430.494995\n",
"2021-06-25 426.174988\n",
"\n",
"[942 rows x 1 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create dataframe of the training set\n",
"training_set=dataset['Open']\n",
"training_set=pd.DataFrame(training_set)\n",
"training_set"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Open False\n",
"High False\n",
"Low False\n",
"Close False\n",
"Adj Close False\n",
"Volume False\n",
"Close: 30 Day Mean True\n",
"dtype: bool"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DATA PREPROCESSING\n",
"# Data cleaning\n",
"dataset.isna().any()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"# Feature scaling\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"sc = MinMaxScaler(feature_range = (0,1))\n",
"training_set_scaled = sc.fit_transform(training_set)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"# Creating a data structure with 60 timesteps and 1 output\n",
"X_train = []\n",
"y_train = []\n",
"for i in range(60,942):\n",
" X_train.append(training_set_scaled[i-60:i, 0])\n",
" y_train.append(training_set_scaled[i, 0])\n",
"X_train, y_train = np.array(X_train), np.array(y_train)\n",
"\n",
"# Reshaping\n",
"X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"# PART II - BUILDING THE RNN\n",
"\n",
"# Importing all Keras libraries and packages (it's easier)\n",
"from keras.layers import *\n",
"from keras.models import *\n",
"import keras.backend as K #for some advanced functions"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/100\n",
"28/28 [==============================] - 26s 107ms/step - loss: 0.0351\n",
"Epoch 2/100\n",
"28/28 [==============================] - 4s 129ms/step - loss: 0.0050\n",
"Epoch 3/100\n",
"28/28 [==============================] - 3s 123ms/step - loss: 0.0042\n",
"Epoch 4/100\n",
"28/28 [==============================] - 3s 115ms/step - loss: 0.0045\n",
"Epoch 5/100\n",
"28/28 [==============================] - 4s 127ms/step - loss: 0.0043\n",
"Epoch 6/100\n",
"28/28 [==============================] - 3s 120ms/step - loss: 0.0047\n",
"Epoch 7/100\n",
"28/28 [==============================] - 3s 116ms/step - loss: 0.0037\n",
"Epoch 8/100\n",
"28/28 [==============================] - 3s 107ms/step - loss: 0.0040\n",
"Epoch 9/100\n",
"28/28 [==============================] - 4s 125ms/step - loss: 0.0032\n",
"Epoch 10/100\n",
"28/28 [==============================] - 3s 121ms/step - loss: 0.0027\n",
"Epoch 11/100\n",
"28/28 [==============================] - 3s 115ms/step - loss: 0.0027\n",
"Epoch 12/100\n",
"28/28 [==============================] - 3s 115ms/step - loss: 0.0028\n",
"Epoch 13/100\n",
"28/28 [==============================] - 3s 114ms/step - loss: 0.0031\n",
"Epoch 14/100\n",
"28/28 [==============================] - 4s 136ms/step - loss: 0.0029\n",
"Epoch 15/100\n",
"28/28 [==============================] - 4s 143ms/step - loss: 0.0023\n",
"Epoch 16/100\n",
"28/28 [==============================] - 3s 121ms/step - loss: 0.0026\n",
"Epoch 17/100\n",
"28/28 [==============================] - 3s 123ms/step - loss: 0.0026\n",
"Epoch 18/100\n",
"28/28 [==============================] - 3s 111ms/step - loss: 0.0028\n",
"Epoch 19/100\n",
"28/28 [==============================] - 4s 130ms/step - loss: 0.0020\n",
"Epoch 20/100\n",
"28/28 [==============================] - 3s 107ms/step - loss: 0.0023\n",
"Epoch 21/100\n",
"28/28 [==============================] - 3s 104ms/step - loss: 0.0028\n",
"Epoch 22/100\n",
"28/28 [==============================] - 3s 104ms/step - loss: 0.0026\n",
"Epoch 23/100\n",
"28/28 [==============================] - 3s 117ms/step - loss: 0.0032\n",
"Epoch 24/100\n",
"28/28 [==============================] - 3s 109ms/step - loss: 0.0025\n",
"Epoch 25/100\n",
"28/28 [==============================] - 4s 128ms/step - loss: 0.0022\n",
"Epoch 26/100\n",
"28/28 [==============================] - 3s 119ms/step - loss: 0.0024\n",
"Epoch 27/100\n",
"28/28 [==============================] - 3s 120ms/step - loss: 0.0023\n",
"Epoch 28/100\n",
"28/28 [==============================] - 3s 111ms/step - loss: 0.0024\n",
"Epoch 29/100\n",
"28/28 [==============================] - 3s 109ms/step - loss: 0.0022\n",
"Epoch 30/100\n",
"28/28 [==============================] - 3s 109ms/step - loss: 0.0024\n",
"Epoch 31/100\n",
"28/28 [==============================] - 3s 115ms/step - loss: 0.0021\n",
"Epoch 32/100\n",
"28/28 [==============================] - 3s 116ms/step - loss: 0.0021\n",
"Epoch 33/100\n",
"28/28 [==============================] - 3s 108ms/step - loss: 0.0026\n",
"Epoch 34/100\n",
"28/28 [==============================] - 3s 111ms/step - loss: 0.0022\n",
"Epoch 35/100\n",
"28/28 [==============================] - 3s 112ms/step - loss: 0.0023\n",
"Epoch 36/100\n",
"28/28 [==============================] - 3s 107ms/step - loss: 0.0018\n",
"Epoch 37/100\n",
"28/28 [==============================] - 3s 119ms/step - loss: 0.0017\n",
"Epoch 38/100\n",
"28/28 [==============================] - 3s 108ms/step - loss: 0.0019\n",
"Epoch 39/100\n",
"28/28 [==============================] - 3s 107ms/step - loss: 0.0020\n",
"Epoch 40/100\n",
"28/28 [==============================] - 3s 106ms/step - loss: 0.0023\n",
"Epoch 41/100\n",
"28/28 [==============================] - 3s 120ms/step - loss: 0.0019\n",
"Epoch 42/100\n",
"28/28 [==============================] - 3s 103ms/step - loss: 0.0018\n",
"Epoch 43/100\n",
"28/28 [==============================] - 3s 104ms/step - loss: 0.0023\n",
"Epoch 44/100\n",
"28/28 [==============================] - 3s 113ms/step - loss: 0.0022\n",
"Epoch 45/100\n",
"28/28 [==============================] - 3s 103ms/step - loss: 0.0023\n",
"Epoch 46/100\n",
"28/28 [==============================] - 3s 104ms/step - loss: 0.0019\n",
"Epoch 47/100\n",
"28/28 [==============================] - 3s 117ms/step - loss: 0.0016\n",
"Epoch 48/100\n",
"28/28 [==============================] - 3s 112ms/step - loss: 0.0019\n",
"Epoch 49/100\n",
"28/28 [==============================] - 3s 113ms/step - loss: 0.0015\n",
"Epoch 50/100\n",
"28/28 [==============================] - 3s 107ms/step - loss: 0.0013\n",
"Epoch 51/100\n",
"28/28 [==============================] - 3s 122ms/step - loss: 0.0031\n",
"Epoch 52/100\n",
"28/28 [==============================] - 3s 104ms/step - loss: 0.0026\n",
"Epoch 53/100\n",
"28/28 [==============================] - 3s 108ms/step - loss: 0.0015\n",
"Epoch 54/100\n",
"28/28 [==============================] - 3s 111ms/step - loss: 0.0013\n",
"Epoch 55/100\n",
"28/28 [==============================] - 3s 112ms/step - loss: 0.0021\n",
"Epoch 56/100\n",
"28/28 [==============================] - 3s 122ms/step - loss: 0.0015\n",
"Epoch 57/100\n",
"28/28 [==============================] - 3s 106ms/step - loss: 0.0016\n",
"Epoch 58/100\n",
"28/28 [==============================] - 3s 115ms/step - loss: 0.0016\n",
"Epoch 59/100\n",
"28/28 [==============================] - 5s 171ms/step - loss: 0.0014\n",
"Epoch 60/100\n",
"28/28 [==============================] - 6s 206ms/step - loss: 0.0014\n",
"Epoch 61/100\n",
"28/28 [==============================] - 6s 212ms/step - loss: 0.0017\n",
"Epoch 62/100\n",
"28/28 [==============================] - 6s 200ms/step - loss: 0.0014\n",
"Epoch 63/100\n",
"28/28 [==============================] - 6s 197ms/step - loss: 0.0013\n",
"Epoch 64/100\n",
"28/28 [==============================] - 4s 146ms/step - loss: 0.0016\n",
"Epoch 65/100\n",
"28/28 [==============================] - 4s 155ms/step - loss: 0.0016\n",
"Epoch 66/100\n",
"28/28 [==============================] - 5s 164ms/step - loss: 0.0015A\n",
"Epoch 67/100\n",
"28/28 [==============================] - 4s 150ms/step - loss: 0.0014\n",
"Epoch 68/100\n",
"28/28 [==============================] - 4s 137ms/step - loss: 0.0014\n",
"Epoch 69/100\n",
"28/28 [==============================] - 4s 138ms/step - loss: 0.0014\n",
"Epoch 70/100\n",
"28/28 [==============================] - 5s 178ms/step - loss: 0.0014\n",
"Epoch 71/100\n",
"28/28 [==============================] - 4s 146ms/step - loss: 0.0013\n",
"Epoch 72/100\n",
"28/28 [==============================] - 4s 151ms/step - loss: 0.0018\n",
"Epoch 73/100\n",
"28/28 [==============================] - 4s 150ms/step - loss: 0.0016\n",
"Epoch 74/100\n",
"28/28 [==============================] - 4s 152ms/step - loss: 0.0015\n",
"Epoch 75/100\n",
"28/28 [==============================] - 4s 153ms/step - loss: 0.0013\n",
"Epoch 76/100\n",
"28/28 [==============================] - 5s 189ms/step - loss: 0.0017\n",
"Epoch 77/100\n",
"28/28 [==============================] - 5s 178ms/step - loss: 0.0015\n",
"Epoch 78/100\n",
"28/28 [==============================] - 5s 193ms/step - loss: 0.0011\n",
"Epoch 79/100\n",
"28/28 [==============================] - 5s 194ms/step - loss: 0.0013\n",
"Epoch 80/100\n",
"28/28 [==============================] - 4s 153ms/step - loss: 0.0012\n",
"Epoch 81/100\n",
"28/28 [==============================] - 4s 160ms/step - loss: 0.0016\n",
"Epoch 82/100\n",
"28/28 [==============================] - 5s 175ms/step - loss: 0.0013\n",
"Epoch 83/100\n",
"28/28 [==============================] - 4s 151ms/step - loss: 0.0016\n",
"Epoch 84/100\n",
"28/28 [==============================] - 3s 110ms/step - loss: 0.0012\n",
"Epoch 85/100\n",
"28/28 [==============================] - 4s 143ms/step - loss: 0.0012\n",
"Epoch 86/100\n",
"28/28 [==============================] - 4s 134ms/step - loss: 0.0016\n",
"Epoch 87/100\n",
"28/28 [==============================] - 5s 174ms/step - loss: 0.0015\n",
"Epoch 88/100\n",
"28/28 [==============================] - 6s 199ms/step - loss: 0.0014\n",
"Epoch 89/100\n",
"28/28 [==============================] - 4s 147ms/step - loss: 0.0013\n",
"Epoch 90/100\n",
"28/28 [==============================] - 3s 103ms/step - loss: 0.0014\n",
"Epoch 91/100\n",
"28/28 [==============================] - 3s 101ms/step - loss: 0.0011\n",
"Epoch 92/100\n",
"28/28 [==============================] - 3s 100ms/step - loss: 0.0013\n",
"Epoch 93/100\n",
"28/28 [==============================] - 4s 127ms/step - loss: 0.0012 \n",
"Epoch 94/100\n",
"28/28 [==============================] - 5s 183ms/step - loss: 0.0011\n",
"Epoch 95/100\n",
"28/28 [==============================] - 4s 152ms/step - loss: 0.0013\n",
"Epoch 96/100\n",
"28/28 [==============================] - 4s 151ms/step - loss: 0.0011\n",
"Epoch 97/100\n",
"28/28 [==============================] - 6s 225ms/step - loss: 0.0011\n",
"Epoch 98/100\n",
"28/28 [==============================] - 6s 210ms/step - loss: 9.4430e-04\n",
"Epoch 99/100\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"28/28 [==============================] - 6s 212ms/step - loss: 0.0014\n",
"Epoch 100/100\n",
"28/28 [==============================] - 6s 211ms/step - loss: 0.0011\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7ff97b437e20>"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Initializing the RNN\n",
"regressor = Sequential()\n",
"\n",
"# Adding the first LSTM layer and some Dropout regularization\n",
"regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))\n",
"regressor.add(Dropout(0.2))\n",
"\n",
"# Adding a second LSTM layer and some Dropout regularization\n",
"regressor.add(LSTM(units = 50, return_sequences = True))\n",
"regressor.add(Dropout(0.2))\n",
"\n",
"# Adding a third LSTM layer and some Dropout regularization\n",
"regressor.add(LSTM(units = 50, return_sequences = True))\n",
"regressor.add(Dropout(0.2))\n",
"\n",
"# Adding a forth LSTM layer and some Dropout regularization\n",
"regressor.add(LSTM(units = 50))\n",
"regressor.add(Dropout(0.2))\n",
"\n",
"# Adding the output layer\n",
"regressor.add(Dense(units = 1))\n",
"\n",
"\n",
"# Compiling the RNN\n",
"regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')\n",
"\n",
"# Fitting the RNN to the training set\n",
"regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"# PART 3 - Making the predictions and visualizing the results\n",
"\n",
"# Getting the real stock price of 2017\n",
"dataset_test = pd.read_csv(\"ROKU_test.csv\",index_col=\"Date\",parse_dates=True)"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"real_stock_price = dataset_test.iloc[:,1:2].values"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Adj Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2020-06-26</th>\n",
" <td>124.820000</td>\n",
" <td>126.550003</td>\n",
" <td>120.260002</td>\n",
" <td>122.550003</td>\n",
" <td>122.550003</td>\n",
" <td>7984100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-06-29</th>\n",
" <td>122.400002</td>\n",
" <td>123.209000</td>\n",
" <td>112.110001</td>\n",
" <td>115.050003</td>\n",
" <td>115.050003</td>\n",
" <td>12073800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-06-30</th>\n",
" <td>115.099998</td>\n",
" <td>117.870003</td>\n",
" <td>113.879997</td>\n",
" <td>116.529999</td>\n",
" <td>116.529999</td>\n",
" <td>5845900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-07-01</th>\n",
" <td>117.300003</td>\n",
" <td>129.440002</td>\n",
" <td>116.349998</td>\n",
" <td>128.389999</td>\n",
" <td>128.389999</td>\n",
" <td>15001800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-07-02</th>\n",
" <td>128.470001</td>\n",
" <td>132.500000</td>\n",
" <td>125.195000</td>\n",
" <td>128.649994</td>\n",
" <td>128.649994</td>\n",
" <td>12158100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Adj Close \\\n",
"Date \n",
"2020-06-26 124.820000 126.550003 120.260002 122.550003 122.550003 \n",
"2020-06-29 122.400002 123.209000 112.110001 115.050003 115.050003 \n",
"2020-06-30 115.099998 117.870003 113.879997 116.529999 116.529999 \n",
"2020-07-01 117.300003 129.440002 116.349998 128.389999 128.389999 \n",
"2020-07-02 128.470001 132.500000 125.195000 128.649994 128.649994 \n",
"\n",
" Volume \n",
"Date \n",
"2020-06-26 7984100 \n",
"2020-06-29 12073800 \n",
"2020-06-30 5845900 \n",
"2020-07-01 15001800 \n",
"2020-07-02 12158100 "
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 257 entries, 2020-06-26 to 2021-07-02\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Open 257 non-null float64\n",
" 1 High 257 non-null float64\n",
" 2 Low 257 non-null float64\n",
" 3 Close 257 non-null float64\n",
" 4 Adj Close 257 non-null float64\n",
" 5 Volume 257 non-null int64 \n",
"dtypes: float64(5), int64(1)\n",
"memory usage: 14.1 KB\n"
]
}
],
"source": [
"dataset_test.info()"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
"dataset_test['Volume'] = dataset_test['Volume']"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"test_set=dataset_test['Open']\n",
"test_set=pd.DataFrame(test_set)"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 257 entries, 2020-06-26 to 2021-07-02\n",
"Data columns (total 1 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Open 257 non-null float64\n",
"dtypes: float64(1)\n",
"memory usage: 4.0 KB\n"
]
}
],
"source": [
"test_set.info()"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
"# Getting the predicted stock price\n",
"dataset_total = pd.concat((dataset['Open'], dataset_test['Open']), axis = 0)\n",
"inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values\n",
"inputs = inputs.reshape(-1,1)\n",
"inputs = sc.transform(inputs)\n",
"X_test = []\n",
"for i in range(60, 80):\n",
" X_test.append(inputs[i-60:i, 0])\n",
"X_test = np.array(X_test)\n",
"X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))\n",
"predicted_stock_price = regressor.predict(X_test)\n",
"predicted_stock_price = sc.inverse_transform(predicted_stock_price)"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 20 entries, 0 to 19\n",
"Data columns (total 1 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 0 20 non-null float32\n",
"dtypes: float32(1)\n",
"memory usage: 208.0 bytes\n"
]
}
],
"source": [
"predicted_stock_price=pd.DataFrame(predicted_stock_price)\n",
"predicted_stock_price.info()"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Visualizing the results\n",
"plt.plot(real_stock_price, color = 'red', label = 'Real Roku Stock Price')\n",
"plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Roku Stock Price')\n",
"plt.title('Roku Stock Price Prediction')\n",
"plt.xlabel('Time')\n",
"plt.ylabel('Roku Stock Price')\n",
"plt.legend()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment