Skip to content

Instantly share code, notes, and snippets.

@alllexx88
Created March 25, 2019 16:23
Show Gist options
  • Save alllexx88/3dc2c54320e9f0a70151056efe0e831a to your computer and use it in GitHub Desktop.
Save alllexx88/3dc2c54320e9f0a70151056efe0e831a to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from math import sqrt\n",
"from numpy import split\n",
"from numpy import array\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.preprocessing import StandardScaler\n",
"from matplotlib import pyplot\n",
"pd.plotting.register_matplotlib_converters()\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense\n",
"from keras.layers import Flatten\n",
"from keras.layers import LSTM\n",
"from keras.layers import RepeatVector\n",
"from keras.layers import TimeDistributed\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Загрузим датасет"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"timeStamp object\n",
"month int64\n",
"dayofweek int64\n",
"precip float64\n",
"demand float64\n",
"temp float64\n",
"is_workday float64\n",
"dtype: object"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"electricity_data = pd.read_csv ('Electricity_D.csv')\n",
"electricity_data.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timeStamp</th>\n",
" <th>month</th>\n",
" <th>dayofweek</th>\n",
" <th>precip</th>\n",
" <th>demand</th>\n",
" <th>temp</th>\n",
" <th>is_workday</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2012-01-01</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0.0033</td>\n",
" <td>91775.9</td>\n",
" <td>47.3622</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2012-01-02</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0000</td>\n",
" <td>127270.9</td>\n",
" <td>40.4967</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2012-01-03</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0000</td>\n",
" <td>146292.3</td>\n",
" <td>26.6725</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2012-01-04</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0.0000</td>\n",
" <td>152070.4</td>\n",
" <td>20.5850</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2012-01-05</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.0000</td>\n",
" <td>147125.9</td>\n",
" <td>33.5775</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timeStamp month dayofweek precip demand temp is_workday\n",
"0 2012-01-01 0 6 0.0033 91775.9 47.3622 0.5\n",
"1 2012-01-02 0 0 0.0000 127270.9 40.4967 0.0\n",
"2 2012-01-03 0 1 0.0000 146292.3 26.6725 0.0\n",
"3 2012-01-04 0 2 0.0000 152070.4 20.5850 0.0\n",
"4 2012-01-05 0 3 0.0000 147125.9 33.5775 0.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"electricity_data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Преобразуем колонку 'timeStamp' в datetime"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"timeStamp datetime64[ns]\n",
"month int64\n",
"dayofweek int64\n",
"precip float64\n",
"demand float64\n",
"temp float64\n",
"is_workday float64\n",
"dtype: object"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"electricity_data['timeStamp'] = pd.to_datetime(electricity_data['timeStamp'])\n",
"electricity_data.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Делаем колонку 'timeStamp' индексом"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>dayofweek</th>\n",
" <th>precip</th>\n",
" <th>demand</th>\n",
" <th>temp</th>\n",
" <th>is_workday</th>\n",
" </tr>\n",
" <tr>\n",
" <th>timeStamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2012-01-01</th>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0.0033</td>\n",
" <td>91775.9</td>\n",
" <td>47.3622</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-02</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0000</td>\n",
" <td>127270.9</td>\n",
" <td>40.4967</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-03</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0000</td>\n",
" <td>146292.3</td>\n",
" <td>26.6725</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-04</th>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0.0000</td>\n",
" <td>152070.4</td>\n",
" <td>20.5850</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-05</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.0000</td>\n",
" <td>147125.9</td>\n",
" <td>33.5775</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" month dayofweek precip demand temp is_workday\n",
"timeStamp \n",
"2012-01-01 0 6 0.0033 91775.9 47.3622 0.5\n",
"2012-01-02 0 0 0.0000 127270.9 40.4967 0.0\n",
"2012-01-03 0 1 0.0000 146292.3 26.6725 0.0\n",
"2012-01-04 0 2 0.0000 152070.4 20.5850 0.0\n",
"2012-01-05 0 3 0.0000 147125.9 33.5775 0.0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"electricity_data.set_index('timeStamp', inplace=True)\n",
"electricity_data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Кодируем категориальные данные в onehot представление"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>precip</th>\n",
" <th>demand</th>\n",
" <th>temp</th>\n",
" <th>workday</th>\n",
" <th>weekend</th>\n",
" <th>celebration</th>\n",
" <th>Mn</th>\n",
" <th>Tue</th>\n",
" <th>Wd</th>\n",
" <th>Thr</th>\n",
" <th>...</th>\n",
" <th>Mr</th>\n",
" <th>Ap</th>\n",
" <th>May</th>\n",
" <th>Jn</th>\n",
" <th>Jl</th>\n",
" <th>Ag</th>\n",
" <th>Sp</th>\n",
" <th>Oct</th>\n",
" <th>Nv</th>\n",
" <th>Dc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>timeStamp</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2012-01-01</th>\n",
" <td>0.0033</td>\n",
" <td>91775.9</td>\n",
" <td>47.3622</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-02</th>\n",
" <td>0.0000</td>\n",
" <td>127270.9</td>\n",
" <td>40.4967</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-03</th>\n",
" <td>0.0000</td>\n",
" <td>146292.3</td>\n",
" <td>26.6725</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-04</th>\n",
" <td>0.0000</td>\n",
" <td>152070.4</td>\n",
" <td>20.5850</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-05</th>\n",
" <td>0.0000</td>\n",
" <td>147125.9</td>\n",
" <td>33.5775</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-06</th>\n",
" <td>0.0000</td>\n",
" <td>140820.9</td>\n",
" <td>43.4008</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-07</th>\n",
" <td>0.0000</td>\n",
" <td>124258.6</td>\n",
" <td>51.4979</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-08</th>\n",
" <td>0.0000</td>\n",
" <td>122893.0</td>\n",
" <td>41.4671</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-09</th>\n",
" <td>0.0000</td>\n",
" <td>141379.7</td>\n",
" <td>35.3600</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012-01-10</th>\n",
" <td>0.0000</td>\n",
" <td>140910.3</td>\n",
" <td>40.5033</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" precip demand temp workday weekend celebration Mn \\\n",
"timeStamp \n",
"2012-01-01 0.0033 91775.9 47.3622 0.0 0.0 1.0 0.0 \n",
"2012-01-02 0.0000 127270.9 40.4967 1.0 0.0 0.0 1.0 \n",
"2012-01-03 0.0000 146292.3 26.6725 1.0 0.0 0.0 0.0 \n",
"2012-01-04 0.0000 152070.4 20.5850 1.0 0.0 0.0 0.0 \n",
"2012-01-05 0.0000 147125.9 33.5775 1.0 0.0 0.0 0.0 \n",
"2012-01-06 0.0000 140820.9 43.4008 1.0 0.0 0.0 0.0 \n",
"2012-01-07 0.0000 124258.6 51.4979 0.0 1.0 0.0 0.0 \n",
"2012-01-08 0.0000 122893.0 41.4671 0.0 1.0 0.0 0.0 \n",
"2012-01-09 0.0000 141379.7 35.3600 1.0 0.0 0.0 1.0 \n",
"2012-01-10 0.0000 140910.3 40.5033 1.0 0.0 0.0 0.0 \n",
"\n",
" Tue Wd Thr ... Mr Ap May Jn Jl Ag Sp Oct Nv \\\n",
"timeStamp ... \n",
"2012-01-01 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-02 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-03 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-04 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-05 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-06 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-07 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-08 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-09 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2012-01-10 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" Dc \n",
"timeStamp \n",
"2012-01-01 0.0 \n",
"2012-01-02 0.0 \n",
"2012-01-03 0.0 \n",
"2012-01-04 0.0 \n",
"2012-01-05 0.0 \n",
"2012-01-06 0.0 \n",
"2012-01-07 0.0 \n",
"2012-01-08 0.0 \n",
"2012-01-09 0.0 \n",
"2012-01-10 0.0 \n",
"\n",
"[10 rows x 24 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"electricity_data_onehot = electricity_data.copy()\n",
"\n",
"for (n, is_workday) in zip([0, 1, 0.5], ['workday', 'weekend', 'celebration']):\n",
" electricity_data_onehot[is_workday] = (electricity_data_onehot['is_workday'] == n).astype(float)\n",
"\n",
"for (n, dayofweek) in zip(range(7), ['Mn', 'Tue', 'Wd', 'Thr', 'Fr', 'St', 'Sn']):\n",
" electricity_data_onehot[dayofweek] = (electricity_data_onehot['dayofweek'] == n).astype(float)\n",
"\n",
"for (n, month) in zip(range(12), ['Jr', 'Fr', 'Mr', 'Ap', 'May', 'Jn', 'Jl', 'Ag', 'Sp',\n",
" 'Oct', 'Nv', 'Dc']):\n",
" electricity_data_onehot[month] = (electricity_data_onehot['month'] == n).astype(float)\n",
"\n",
"electricity_data_onehot.drop(columns = ['is_workday', 'dayofweek', 'month'], inplace = True)\n",
"\n",
"electricity_data_onehot.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Датасет для ML"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" demand\n",
"2014-05-04 114339.3\n",
"2014-05-05 129762.3\n",
"2014-05-06 129834.0\n",
"2014-05-07 129842.7\n",
"2014-05-08 131727.8\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>temp</th>\n",
" <th>precip</th>\n",
" <th>workday</th>\n",
" <th>weekend</th>\n",
" <th>celebration</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2014-05-04</th>\n",
" <td>58.0013</td>\n",
" <td>0.0000</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-05-05</th>\n",
" <td>58.1371</td>\n",
" <td>0.0000</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-05-06</th>\n",
" <td>59.1088</td>\n",
" <td>0.0000</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-05-07</th>\n",
" <td>57.7008</td>\n",
" <td>0.0000</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-05-08</th>\n",
" <td>54.8892</td>\n",
" <td>0.0146</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" temp precip workday weekend celebration\n",
"2014-05-04 58.0013 0.0000 0.0 1.0 0.0\n",
"2014-05-05 58.1371 0.0000 1.0 0.0 0.0\n",
"2014-05-06 59.1088 0.0000 1.0 0.0 0.0\n",
"2014-05-07 57.7008 0.0000 1.0 0.0 0.0\n",
"2014-05-08 54.8892 0.0146 1.0 0.0 0.0"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# переменная, определяющая прогнозируемый интервал\n",
"X_date = pd.to_datetime('2016-05-01')\n",
"# наша неделя, дни недели\n",
"X_week = pd.date_range(X_date, X_date + pd.DateOffset(days=7), closed='left')\n",
"days_dict = {0:'Пн',1:'Вт',2:'Ср',3:'Чт',4:'Пт',5:'Сб',6:'Вс'}\n",
"X_weekdays = [days_dict[d.dayofweek] for d in X_week]\n",
"# реальные данные за нашу неделю\n",
"y_real = electricity_data_onehot.loc[X_week, ['demand']].values\n",
"\n",
"# ~2 года по X_date, не включая X_date:\n",
"dates_range = pd.date_range(X_date - pd.DateOffset(weeks=104), X_date, closed='left')\n",
"\n",
"ml_X = electricity_data_onehot.loc[dates_range,\n",
" ['temp', 'precip', 'workday', 'weekend', 'celebration']]\n",
"ml_Y = electricity_data_onehot.loc[dates_range, ['demand']]\n",
"\n",
"print(ml_Y.head())\n",
"ml_X.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# использована часть кода отсюда:\n",
"# https://machinelearningmastery.com/how-to-develop-lstm-models-for-multi-step-time-series-forecasting-of-household-power-consumption\n",
"\n",
"# evaluate one or more weekly forecasts against expected values\n",
"def evaluate_forecasts(actual, predicted):\n",
" scores = list()\n",
" # calculate an RMSE score for each day\n",
" for i in range(actual.shape[1]):\n",
" # calculate mse\n",
" mse = mean_squared_error(actual[:, i], predicted[:, i])\n",
" # calculate rmse\n",
" rmse = sqrt(mse)\n",
" # store\n",
" scores.append(rmse)\n",
" # calculate overall RMSE\n",
" s = 0\n",
" for row in range(actual.shape[0]):\n",
" for col in range(actual.shape[1]):\n",
" s += (actual[row, col] - predicted[row, col])**2\n",
" score = sqrt(s / (actual.shape[0] * actual.shape[1]))\n",
" return score, scores\n",
"\n",
"# summarize scores\n",
"def summarize_scores(name, score, scores):\n",
" s_scores = ', '.join(['%.1f' % s for s in scores])\n",
" print('%s: [%.3f] %s' % (name, score, s_scores))\n",
"\n",
"# convert history into inputs and outputs\n",
"def to_supervised(train, n_input, n_out=7):\n",
" # flatten data\n",
" data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))\n",
" X, y = list(), list()\n",
" in_start = 0\n",
" # step over the entire history one time step at a time\n",
" for _ in range(len(data)):\n",
" # define the end of the input sequence\n",
" in_end = in_start + n_input\n",
" out_end = in_end + n_out\n",
" # ensure we have enough data for this instance\n",
" if out_end < len(data):\n",
" X.append(data[in_start:in_end, :])\n",
" y.append(data[in_end:out_end, 0])\n",
" # move along one time step\n",
" in_start += 1\n",
" return array(X), array(y)\n",
"\n",
"# train the model\n",
"def build_model(train, n_input):\n",
" # prepare data\n",
" train_x, train_y = to_supervised(train, n_input)\n",
" # define parameters\n",
" verbose, epochs, batch_size = 0, 100, 32\n",
" n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]\n",
" # reshape output into [samples, timesteps, features]\n",
" train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))\n",
" # define model\n",
" model = Sequential()\n",
" model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features)))\n",
" model.add(RepeatVector(n_outputs))\n",
" model.add(LSTM(200, activation='relu', return_sequences=True))\n",
" model.add(TimeDistributed(Dense(100, activation='relu')))\n",
" model.add(TimeDistributed(Dense(1)))\n",
" model.compile(loss='mse', optimizer='adam')\n",
" # fit network\n",
" model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)\n",
" return model\n",
"\n",
"# make a forecast\n",
"def forecast(model, history, n_input):\n",
" # flatten data\n",
" data = array(history)\n",
" data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))\n",
" # retrieve last observations for input data\n",
" input_x = data[-n_input:, :]\n",
" # reshape into [1, n_input, n]\n",
" input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))\n",
" # forecast the next week\n",
" yhat = model.predict(input_x, verbose=0)\n",
" # we only want the vector forecast\n",
" yhat = yhat[0]\n",
" return yhat"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### LSTM модель для составления недельного прогноза\n",
"\n",
"Согласно информации в сети (например, [блогпост на volohai.com](https://blog.valohai.com/smart-grids-use-machine-learning-to-forecast-load)), хороший результат при построении прогнозов загруженности сети дает использование LSTM моделей. Для тренировки модели мы используем данные ~ за два года (104 недели) до прогнозируемого интервала, далее, при прогнозе используются данные за последние две недели."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Эксперементальным путем установлено, что в нашем случае масштабирование входных данных несколько ухудшает качество прогноза: точность сильнее \"прыгает\" в зависимости от инициализации модели, хотя в среднем (по результатам ~30 запусков) результаты сопоставимы"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"scale_input = False\n",
"\n",
"dataset_X = ml_X.values\n",
"dataset_Y = ml_Y.values\n",
"if scale_input:\n",
" scaler = StandardScaler()\n",
" dataset_X = scaler.fit_transform(dataset_X)\n",
"dataset = np.concatenate((dataset_Y, dataset_X), axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Тренировка модели"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"n_input = 14\n",
"\n",
"train_final = array(split(dataset, len(dataset)/7))\n",
"model_final = build_model(train_final, n_input)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE: [6725.887] 11017.0, 5024.1, 6181.4, 4436.1, 2839.5, 4950.4, 8921.3\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1152x432 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"y_hat = forecast(model_final, train_final, n_input)\n",
"score, scores = evaluate_forecasts(array([y_real]), array([y_hat]))\n",
"\n",
"summarize_scores('RMSE', score, scores)\n",
"\n",
"# график\n",
"pyplot.figure(num=None, figsize=(16, 6))\n",
"\n",
"pyplot.subplot(122)\n",
"pyplot.plot(X_week.to_series().dt.date, [100 * np.fabs((y_real[i] - y_hat[i]) / y_real[i]) for i in range(len(y_real))],\n",
" color=\"red\", marker='o', label='Относительная ошибка (MAPE)')\n",
"pyplot.xlabel(\"День, YYYY-MM-DD\")\n",
"pyplot.ylabel(\"Ошибка, %\")\n",
"pyplot.title(\"Ошибка прогнозирования\")\n",
"pyplot.legend()\n",
"\n",
"pyplot.subplot(121)\n",
"pyplot.plot(X_week.to_series().dt.date, y_real, label='Реальная', color=\"blue\", marker='o')\n",
"pyplot.plot(X_week.to_series().dt.date, y_hat, label='Прогноз', color=\"green\", marker='o')\n",
"pyplot.ylim(bottom=100000)\n",
"pyplot.xlabel(\"День, YYYY-MM-DD\")\n",
"pyplot.ylabel(\"Нагрузка\")\n",
"pyplot.title(\"Реальная нагрузка сети Vs Прогнозированная\")\n",
"pyplot.legend()\n",
"pyplot.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Построение и оценка прогноза"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод\n",
"\n",
"Точность прогноза на 2016-05-02 (прогноз через день), основываясь на информации по 2016-05-01 (не включительно) по результатом 30+ запусков являет собой величину порядка 94%-98%."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment