martingaido/ticker-predict.ipynb

## ticker-predict.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "ticker-predict.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyPOHdkDj8Hxn5YrAFAyLFc2",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/martingaido/4486104e9350b9ab1bbd0909904e50fd/ticker-predict.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VO_kyCueu6nH",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#  Descripción: \n",
        "#\n",
        "#  Script para predecir el valor de una acción a futuro usando dos modelos muy\n",
        "#  conocidos de machine learning.\n",
        "#  \n",
        "#  Requisitos:\n",
        "#  \n",
        "#  1) Obtener una llave para acceder a la API de Quandl y desbloquear las\n",
        "#     limitaciones en las consultas. Para obtener la llave de forma gratuita ir\n",
        "#     al siguiente enlace: https://www.quandl.com/\n",
        "#\n",
        "#  2) El dataset que provee Quandl contiene datos de ejemplo de años previos. \n",
        "#     Para obtener un dataset actualizado es necesario pagar una suscripción\n",
        "#     mensual en dólares.\n",
        "#\n",
        "#  3) Tener python instalado en nuestro ordenador y las librerías necesarias.\n",
        "#     pip install quandl scikit-learn numpy\n",
        "\n",
        "import quandl\n",
        "import numpy as np\n",
        "from sklearn.linear_model import LinearRegression\n",
        "from sklearn.svm import SVR\n",
        "from sklearn.model_selection import train_test_split"
      ],
      "execution_count": 118,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v9xE0YqYwE1K",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "70f564fb-c8bc-49e5-cfbe-b790433e9043"
      },
      "source": [
        "# Ingresar un ticker o nombre de la acción\n",
        "ticker = input(\"Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): \")"
      ],
      "execution_count": 119,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): TSLA\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WJbPaQDZwLKW",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "b7824cd8-310d-49a3-fe86-02da0e448dad"
      },
      "source": [
        "# Ingresar la cantidad de días que queremos predecir\n",
        "num_days = input(\"Ingresar la cantidad de días a predecir: \")"
      ],
      "execution_count": 120,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Ingresar la cantidad de días a predecir: 10\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oZqP0aVzwPZO",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "b0292707-a07d-43d9-fc42-0f7e424db04c"
      },
      "source": [
        "# Obtener una muestra del dataset requerido\n",
        "df = quandl.get(\"WIKI/\" + ticker.upper(), authtoken='PONER-AQUI-LA-API-KEY')\n",
        "print(\"Datos históricos para el ticker: \" + ticker.upper())\n",
        "print('')\n",
        "print(df.head())"
      ],
      "execution_count": 121,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Datos históricos para el ticker: TSLA\n",
            "\n",
            "             Open     High    Low  ...  Adj. Low  Adj. Close  Adj. Volume\n",
            "Date                               ...                                   \n",
            "2010-06-29  19.00  25.0000  17.54  ...     17.54       23.89   18766300.0\n",
            "2010-06-30  25.79  30.4192  23.30  ...     23.30       23.83   17187100.0\n",
            "2010-07-01  25.00  25.9200  20.27  ...     20.27       21.96    8218800.0\n",
            "2010-07-02  23.00  23.1000  18.71  ...     18.71       19.20    5139800.0\n",
            "2010-07-06  20.00  20.0000  15.83  ...     15.83       16.11    6866900.0\n",
            "\n",
            "[5 rows x 12 columns]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KP2HBwB7xqHL",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 306
        },
        "outputId": "5ab6b690-bdf5-40da-8153-c3af5e7a6b5c"
      },
      "source": [
        "# Obtener los datos de la columna 'Adj. Close'\n",
        "df = df[['Adj. Close']]\n",
        "\n",
        "# Imprimir los valores para ver el resultado\n",
        "print('Primeros valores del dataset')\n",
        "print(df.head())\n",
        "\n",
        "print('')\n",
        "print('Ultimos valores del dataset')\n",
        "print(df.tail())"
      ],
      "execution_count": 122,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Primeros valores del dataset\n",
            "            Adj. Close\n",
            "Date                  \n",
            "2010-06-29       23.89\n",
            "2010-06-30       23.83\n",
            "2010-07-01       21.96\n",
            "2010-07-02       19.20\n",
            "2010-07-06       16.11\n",
            "\n",
            "Ultimos valores del dataset\n",
            "            Adj. Close\n",
            "Date                  \n",
            "2018-03-21      316.53\n",
            "2018-03-22      309.10\n",
            "2018-03-23      301.54\n",
            "2018-03-26      304.18\n",
            "2018-03-27      279.18\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bdStwiq0x26Q",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 306
        },
        "outputId": "44eab847-2966-49f9-a223-459d84995e9c"
      },
      "source": [
        "# Declarar la variable que contiene la cantida de días\n",
        "forecast_out = int(num_days)\n",
        "\n",
        "# Crear otra columna con la predicción en la cantidad de días especificados\n",
        "df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)\n",
        "\n",
        "# Imprimir los valores para ver el resultado\n",
        "print('Primeros valores del dataset')\n",
        "print(df.head())\n",
        "\n",
        "print('')\n",
        "print('Ultimos valores del dataset')\n",
        "print(df.tail())"
      ],
      "execution_count": 123,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Primeros valores del dataset\n",
            "            Adj. Close  Prediction\n",
            "Date                              \n",
            "2010-06-29       23.89       19.84\n",
            "2010-06-30       23.83       19.89\n",
            "2010-07-01       21.96       20.64\n",
            "2010-07-02       19.20       21.91\n",
            "2010-07-06       16.11       20.30\n",
            "\n",
            "Ultimos valores del dataset\n",
            "            Adj. Close  Prediction\n",
            "Date                              \n",
            "2018-03-21      316.53         NaN\n",
            "2018-03-22      309.10         NaN\n",
            "2018-03-23      301.54         NaN\n",
            "2018-03-26      304.18         NaN\n",
            "2018-03-27      279.18         NaN\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "GX4pwoGl2uTW",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 136
        },
        "outputId": "132f0d49-c2de-4949-ea8f-21b689d4aa01"
      },
      "source": [
        "# Convertir los datos en un array de numpy\n",
        "X = np.array(df.drop(['Prediction'],1))\n",
        "X = X[:-forecast_out]\n",
        "print(X)"
      ],
      "execution_count": 124,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[ 23.89]\n",
            " [ 23.83]\n",
            " [ 21.96]\n",
            " ...\n",
            " [327.17]\n",
            " [345.51]\n",
            " [341.84]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JOqDGDLa3hvL",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "26bd7528-5370-450e-e7af-fbc615a758e4"
      },
      "source": [
        "# Crear un dataset (y)\n",
        "# Convertir los datos en un array de numpy (incluyendo los NaNs)\n",
        "y = np.array(df['Prediction'])\n",
        "y = y[:-forecast_out]\n",
        "print(y)"
      ],
      "execution_count": 125,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[ 19.84  19.89  20.64 ... 301.54 304.18 279.18]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SiV2b4u74ZiW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Dividir la información en 80% de datos de entrenamiento y 20% de datos para testing\n",
        "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
      ],
      "execution_count": 126,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IBTI_uCD44CM",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        },
        "outputId": "a674e72c-c988-428c-d7ca-7cde2eb3cba4"
      },
      "source": [
        "# Crear y entrenar el modelo usando Support Vector Machine (Regressor)\n",
        "# Más info: https://en.wikipedia.org/wiki/Support_vector_machine\n",
        "svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)\n",
        "# Entrenar el modelo\n",
        "svr_rbf.fit(x_train, y_train)"
      ],
      "execution_count": 127,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,\n",
              "    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 127
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5UUfe9fc5p7e",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "64a64584-469a-4832-ed73-db0fbc0976b8"
      },
      "source": [
        "# Probar el modelo y obtener un valor de confidencia.\n",
        "# El valor mas preciso es el mas cercano a 1.0\n",
        "svm_confidence = svr_rbf.score(x_test, y_test)\n",
        "print(\"SVM Confidence: \", svm_confidence)"
      ],
      "execution_count": 128,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "SVM Confidence:  0.9755202671503753\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BQ4wufDN5-KW",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "13ac9824-d3b2-405c-d4b4-18a14fcd5c69"
      },
      "source": [
        "# Crear y entrenar el modelo usando Linear Regression\n",
        "# Más info: https://en.wikipedia.org/wiki/Linear_regression\n",
        "lr = LinearRegression()\n",
        "# Entrenar el Modelo\n",
        "lr.fit(x_train, y_train)"
      ],
      "execution_count": 129,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 129
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PRs-S6Tz6LJF",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "649f8cea-4651-4dd6-a19c-c21ef92dc4fe"
      },
      "source": [
        "# Probar el modelo y obtener un valor de confidencia.\n",
        "# El valor mas preciso es el mas cercano a 1.0\n",
        "lr_confidence = lr.score(x_test, y_test)\n",
        "print(\"LR Confidence: \", lr_confidence)"
      ],
      "execution_count": 130,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "LR Confidence:  0.9798808859371537\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_RRHXPDJ6b11",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 187
        },
        "outputId": "472b82b3-3652-49fa-a203-67b3fdca06f5"
      },
      "source": [
        "x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]\n",
        "print(x_forecast)"
      ],
      "execution_count": 131,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[[326.63]\n",
            " [325.6 ]\n",
            " [321.35]\n",
            " [313.56]\n",
            " [310.55]\n",
            " [316.53]\n",
            " [309.1 ]\n",
            " [301.54]\n",
            " [304.18]\n",
            " [279.18]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vjvieAws696c",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        },
        "outputId": "a1730c02-05c8-4d20-ffa8-e63aa2d7f41f"
      },
      "source": [
        "# Imprimir el resultado para los próximos 'n' días usando el modelo \"Lineal Regression\" \n",
        "lr_prediction = lr.predict(x_forecast)\n",
        "print(lr_prediction)"
      ],
      "execution_count": 132,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[326.53142815 325.51134492 321.30226364 313.58726524 310.60624532\n",
            " 316.52867028 309.17020582 301.68299299 304.29757525 279.53827358]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8srQBMKb7PFl",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 51
        },
        "outputId": "370e382c-8da4-4d81-ca6d-af6d74aaf702"
      },
      "source": [
        "# Imprimir el resultado para los próximos 'n' días usando el modelo \"Support Vector Regressor\"\n",
        "svm_prediction = svr_rbf.predict(x_forecast)\n",
        "print(svm_prediction)"
      ],
      "execution_count": 133,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[303.30585525 319.28318742 313.81326199 323.52528789 350.09919994\n",
            " 343.671505   334.25198917 321.38354181 335.62400205 258.43457551]\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "ticker-predict.ipynb",
	"provenance": [],
	"collapsed_sections": [],
	"authorship_tag": "ABX9TyPOHdkDj8Hxn5YrAFAyLFc2",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/martingaido/4486104e9350b9ab1bbd0909904e50fd/ticker-predict.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "VO_kyCueu6nH",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# Descripción: \n",
	"#\n",
	"# Script para predecir el valor de una acción a futuro usando dos modelos muy\n",
	"# conocidos de machine learning.\n",
	"# \n",
	"# Requisitos:\n",
	"# \n",
	"# 1) Obtener una llave para acceder a la API de Quandl y desbloquear las\n",
	"# limitaciones en las consultas. Para obtener la llave de forma gratuita ir\n",
	"# al siguiente enlace: https://www.quandl.com/\n",
	"#\n",
	"# 2) El dataset que provee Quandl contiene datos de ejemplo de años previos. \n",
	"# Para obtener un dataset actualizado es necesario pagar una suscripción\n",
	"# mensual en dólares.\n",
	"#\n",
	"# 3) Tener python instalado en nuestro ordenador y las librerías necesarias.\n",
	"# pip install quandl scikit-learn numpy\n",
	"\n",
	"import quandl\n",
	"import numpy as np\n",
	"from sklearn.linear_model import LinearRegression\n",
	"from sklearn.svm import SVR\n",
	"from sklearn.model_selection import train_test_split"
	],
	"execution_count": 118,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "v9xE0YqYwE1K",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "70f564fb-c8bc-49e5-cfbe-b790433e9043"
	},
	"source": [
	"# Ingresar un ticker o nombre de la acción\n",
	"ticker = input(\"Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): \")"
	],
	"execution_count": 119,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): TSLA\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "WJbPaQDZwLKW",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "b7824cd8-310d-49a3-fe86-02da0e448dad"
	},
	"source": [
	"# Ingresar la cantidad de días que queremos predecir\n",
	"num_days = input(\"Ingresar la cantidad de días a predecir: \")"
	],
	"execution_count": 120,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Ingresar la cantidad de días a predecir: 10\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "oZqP0aVzwPZO",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 204
	},
	"outputId": "b0292707-a07d-43d9-fc42-0f7e424db04c"
	},
	"source": [
	"# Obtener una muestra del dataset requerido\n",
	"df = quandl.get(\"WIKI/\" + ticker.upper(), authtoken='PONER-AQUI-LA-API-KEY')\n",
	"print(\"Datos históricos para el ticker: \" + ticker.upper())\n",
	"print('')\n",
	"print(df.head())"
	],
	"execution_count": 121,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Datos históricos para el ticker: TSLA\n",
	"\n",
	" Open High Low ... Adj. Low Adj. Close Adj. Volume\n",
	"Date ... \n",
	"2010-06-29 19.00 25.0000 17.54 ... 17.54 23.89 18766300.0\n",
	"2010-06-30 25.79 30.4192 23.30 ... 23.30 23.83 17187100.0\n",
	"2010-07-01 25.00 25.9200 20.27 ... 20.27 21.96 8218800.0\n",
	"2010-07-02 23.00 23.1000 18.71 ... 18.71 19.20 5139800.0\n",
	"2010-07-06 20.00 20.0000 15.83 ... 15.83 16.11 6866900.0\n",
	"\n",
	"[5 rows x 12 columns]\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "KP2HBwB7xqHL",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 306
	},
	"outputId": "5ab6b690-bdf5-40da-8153-c3af5e7a6b5c"
	},
	"source": [
	"# Obtener los datos de la columna 'Adj. Close'\n",
	"df = df[['Adj. Close']]\n",
	"\n",
	"# Imprimir los valores para ver el resultado\n",
	"print('Primeros valores del dataset')\n",
	"print(df.head())\n",
	"\n",
	"print('')\n",
	"print('Ultimos valores del dataset')\n",
	"print(df.tail())"
	],
	"execution_count": 122,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Primeros valores del dataset\n",
	" Adj. Close\n",
	"Date \n",
	"2010-06-29 23.89\n",
	"2010-06-30 23.83\n",
	"2010-07-01 21.96\n",
	"2010-07-02 19.20\n",
	"2010-07-06 16.11\n",
	"\n",
	"Ultimos valores del dataset\n",
	" Adj. Close\n",
	"Date \n",
	"2018-03-21 316.53\n",
	"2018-03-22 309.10\n",
	"2018-03-23 301.54\n",
	"2018-03-26 304.18\n",
	"2018-03-27 279.18\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "bdStwiq0x26Q",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 306
	},
	"outputId": "44eab847-2966-49f9-a223-459d84995e9c"
	},
	"source": [
	"# Declarar la variable que contiene la cantida de días\n",
	"forecast_out = int(num_days)\n",
	"\n",
	"# Crear otra columna con la predicción en la cantidad de días especificados\n",
	"df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)\n",
	"\n",
	"# Imprimir los valores para ver el resultado\n",
	"print('Primeros valores del dataset')\n",
	"print(df.head())\n",
	"\n",
	"print('')\n",
	"print('Ultimos valores del dataset')\n",
	"print(df.tail())"
	],
	"execution_count": 123,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Primeros valores del dataset\n",
	" Adj. Close Prediction\n",
	"Date \n",
	"2010-06-29 23.89 19.84\n",
	"2010-06-30 23.83 19.89\n",
	"2010-07-01 21.96 20.64\n",
	"2010-07-02 19.20 21.91\n",
	"2010-07-06 16.11 20.30\n",
	"\n",
	"Ultimos valores del dataset\n",
	" Adj. Close Prediction\n",
	"Date \n",
	"2018-03-21 316.53 NaN\n",
	"2018-03-22 309.10 NaN\n",
	"2018-03-23 301.54 NaN\n",
	"2018-03-26 304.18 NaN\n",
	"2018-03-27 279.18 NaN\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "GX4pwoGl2uTW",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 136
	},
	"outputId": "132f0d49-c2de-4949-ea8f-21b689d4aa01"
	},
	"source": [
	"# Convertir los datos en un array de numpy\n",
	"X = np.array(df.drop(['Prediction'],1))\n",
	"X = X[:-forecast_out]\n",
	"print(X)"
	],
	"execution_count": 124,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[[ 23.89]\n",
	" [ 23.83]\n",
	" [ 21.96]\n",
	" ...\n",
	" [327.17]\n",
	" [345.51]\n",
	" [341.84]]\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "JOqDGDLa3hvL",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "26bd7528-5370-450e-e7af-fbc615a758e4"
	},
	"source": [
	"# Crear un dataset (y)\n",
	"# Convertir los datos en un array de numpy (incluyendo los NaNs)\n",
	"y = np.array(df['Prediction'])\n",
	"y = y[:-forecast_out]\n",
	"print(y)"
	],
	"execution_count": 125,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[ 19.84 19.89 20.64 ... 301.54 304.18 279.18]\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "SiV2b4u74ZiW",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# Dividir la información en 80% de datos de entrenamiento y 20% de datos para testing\n",
	"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
	],
	"execution_count": 126,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "IBTI_uCD44CM",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 51
	},
	"outputId": "a674e72c-c988-428c-d7ca-7cde2eb3cba4"
	},
	"source": [
	"# Crear y entrenar el modelo usando Support Vector Machine (Regressor)\n",
	"# Más info: https://en.wikipedia.org/wiki/Support_vector_machine\n",
	"svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)\n",
	"# Entrenar el modelo\n",
	"svr_rbf.fit(x_train, y_train)"
	],
	"execution_count": 127,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,\n",
	" kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 127
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "5UUfe9fc5p7e",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "64a64584-469a-4832-ed73-db0fbc0976b8"
	},
	"source": [
	"# Probar el modelo y obtener un valor de confidencia.\n",
	"# El valor mas preciso es el mas cercano a 1.0\n",
	"svm_confidence = svr_rbf.score(x_test, y_test)\n",
	"print(\"SVM Confidence: \", svm_confidence)"
	],
	"execution_count": 128,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"SVM Confidence: 0.9755202671503753\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "BQ4wufDN5-KW",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "13ac9824-d3b2-405c-d4b4-18a14fcd5c69"
	},
	"source": [
	"# Crear y entrenar el modelo usando Linear Regression\n",
	"# Más info: https://en.wikipedia.org/wiki/Linear_regression\n",
	"lr = LinearRegression()\n",
	"# Entrenar el Modelo\n",
	"lr.fit(x_train, y_train)"
	],
	"execution_count": 129,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 129
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "PRs-S6Tz6LJF",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "649f8cea-4651-4dd6-a19c-c21ef92dc4fe"
	},
	"source": [
	"# Probar el modelo y obtener un valor de confidencia.\n",
	"# El valor mas preciso es el mas cercano a 1.0\n",
	"lr_confidence = lr.score(x_test, y_test)\n",
	"print(\"LR Confidence: \", lr_confidence)"
	],
	"execution_count": 130,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"LR Confidence: 0.9798808859371537\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "_RRHXPDJ6b11",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 187
	},
	"outputId": "472b82b3-3652-49fa-a203-67b3fdca06f5"
	},
	"source": [
	"x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]\n",
	"print(x_forecast)"
	],
	"execution_count": 131,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[[326.63]\n",
	" [325.6 ]\n",
	" [321.35]\n",
	" [313.56]\n",
	" [310.55]\n",
	" [316.53]\n",
	" [309.1 ]\n",
	" [301.54]\n",
	" [304.18]\n",
	" [279.18]]\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "vjvieAws696c",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 51
	},
	"outputId": "a1730c02-05c8-4d20-ffa8-e63aa2d7f41f"
	},
	"source": [
	"# Imprimir el resultado para los próximos 'n' días usando el modelo \"Lineal Regression\" \n",
	"lr_prediction = lr.predict(x_forecast)\n",
	"print(lr_prediction)"
	],
	"execution_count": 132,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[326.53142815 325.51134492 321.30226364 313.58726524 310.60624532\n",
	" 316.52867028 309.17020582 301.68299299 304.29757525 279.53827358]\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "8srQBMKb7PFl",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 51
	},
	"outputId": "370e382c-8da4-4d81-ca6d-af6d74aaf702"
	},
	"source": [
	"# Imprimir el resultado para los próximos 'n' días usando el modelo \"Support Vector Regressor\"\n",
	"svm_prediction = svr_rbf.predict(x_forecast)\n",
	"print(svm_prediction)"
	],
	"execution_count": 133,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"[303.30585525 319.28318742 313.81326199 323.52528789 350.09919994\n",
	" 343.671505 334.25198917 321.38354181 335.62400205 258.43457551]\n"
	],
	"name": "stdout"
	}
	]
	}
	]
	}