Created
July 23, 2020 00:55
-
-
Save martingaido/4486104e9350b9ab1bbd0909904e50fd to your computer and use it in GitHub Desktop.
ticker-predict.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "ticker-predict.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyPOHdkDj8Hxn5YrAFAyLFc2", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/martingaido/4486104e9350b9ab1bbd0909904e50fd/ticker-predict.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "VO_kyCueu6nH", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Descripción: \n", | |
"#\n", | |
"# Script para predecir el valor de una acción a futuro usando dos modelos muy\n", | |
"# conocidos de machine learning.\n", | |
"# \n", | |
"# Requisitos:\n", | |
"# \n", | |
"# 1) Obtener una llave para acceder a la API de Quandl y desbloquear las\n", | |
"# limitaciones en las consultas. Para obtener la llave de forma gratuita ir\n", | |
"# al siguiente enlace: https://www.quandl.com/\n", | |
"#\n", | |
"# 2) El dataset que provee Quandl contiene datos de ejemplo de años previos. \n", | |
"# Para obtener un dataset actualizado es necesario pagar una suscripción\n", | |
"# mensual en dólares.\n", | |
"#\n", | |
"# 3) Tener python instalado en nuestro ordenador y las librerías necesarias.\n", | |
"# pip install quandl scikit-learn numpy\n", | |
"\n", | |
"import quandl\n", | |
"import numpy as np\n", | |
"from sklearn.linear_model import LinearRegression\n", | |
"from sklearn.svm import SVR\n", | |
"from sklearn.model_selection import train_test_split" | |
], | |
"execution_count": 118, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "v9xE0YqYwE1K", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "70f564fb-c8bc-49e5-cfbe-b790433e9043" | |
}, | |
"source": [ | |
"# Ingresar un ticker o nombre de la acción\n", | |
"ticker = input(\"Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): \")" | |
], | |
"execution_count": 119, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): TSLA\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WJbPaQDZwLKW", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "b7824cd8-310d-49a3-fe86-02da0e448dad" | |
}, | |
"source": [ | |
"# Ingresar la cantidad de días que queremos predecir\n", | |
"num_days = input(\"Ingresar la cantidad de días a predecir: \")" | |
], | |
"execution_count": 120, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Ingresar la cantidad de días a predecir: 10\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "oZqP0aVzwPZO", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"outputId": "b0292707-a07d-43d9-fc42-0f7e424db04c" | |
}, | |
"source": [ | |
"# Obtener una muestra del dataset requerido\n", | |
"df = quandl.get(\"WIKI/\" + ticker.upper(), authtoken='PONER-AQUI-LA-API-KEY')\n", | |
"print(\"Datos históricos para el ticker: \" + ticker.upper())\n", | |
"print('')\n", | |
"print(df.head())" | |
], | |
"execution_count": 121, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Datos históricos para el ticker: TSLA\n", | |
"\n", | |
" Open High Low ... Adj. Low Adj. Close Adj. Volume\n", | |
"Date ... \n", | |
"2010-06-29 19.00 25.0000 17.54 ... 17.54 23.89 18766300.0\n", | |
"2010-06-30 25.79 30.4192 23.30 ... 23.30 23.83 17187100.0\n", | |
"2010-07-01 25.00 25.9200 20.27 ... 20.27 21.96 8218800.0\n", | |
"2010-07-02 23.00 23.1000 18.71 ... 18.71 19.20 5139800.0\n", | |
"2010-07-06 20.00 20.0000 15.83 ... 15.83 16.11 6866900.0\n", | |
"\n", | |
"[5 rows x 12 columns]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KP2HBwB7xqHL", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 306 | |
}, | |
"outputId": "5ab6b690-bdf5-40da-8153-c3af5e7a6b5c" | |
}, | |
"source": [ | |
"# Obtener los datos de la columna 'Adj. Close'\n", | |
"df = df[['Adj. Close']]\n", | |
"\n", | |
"# Imprimir los valores para ver el resultado\n", | |
"print('Primeros valores del dataset')\n", | |
"print(df.head())\n", | |
"\n", | |
"print('')\n", | |
"print('Ultimos valores del dataset')\n", | |
"print(df.tail())" | |
], | |
"execution_count": 122, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Primeros valores del dataset\n", | |
" Adj. Close\n", | |
"Date \n", | |
"2010-06-29 23.89\n", | |
"2010-06-30 23.83\n", | |
"2010-07-01 21.96\n", | |
"2010-07-02 19.20\n", | |
"2010-07-06 16.11\n", | |
"\n", | |
"Ultimos valores del dataset\n", | |
" Adj. Close\n", | |
"Date \n", | |
"2018-03-21 316.53\n", | |
"2018-03-22 309.10\n", | |
"2018-03-23 301.54\n", | |
"2018-03-26 304.18\n", | |
"2018-03-27 279.18\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "bdStwiq0x26Q", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 306 | |
}, | |
"outputId": "44eab847-2966-49f9-a223-459d84995e9c" | |
}, | |
"source": [ | |
"# Declarar la variable que contiene la cantida de días\n", | |
"forecast_out = int(num_days)\n", | |
"\n", | |
"# Crear otra columna con la predicción en la cantidad de días especificados\n", | |
"df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)\n", | |
"\n", | |
"# Imprimir los valores para ver el resultado\n", | |
"print('Primeros valores del dataset')\n", | |
"print(df.head())\n", | |
"\n", | |
"print('')\n", | |
"print('Ultimos valores del dataset')\n", | |
"print(df.tail())" | |
], | |
"execution_count": 123, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Primeros valores del dataset\n", | |
" Adj. Close Prediction\n", | |
"Date \n", | |
"2010-06-29 23.89 19.84\n", | |
"2010-06-30 23.83 19.89\n", | |
"2010-07-01 21.96 20.64\n", | |
"2010-07-02 19.20 21.91\n", | |
"2010-07-06 16.11 20.30\n", | |
"\n", | |
"Ultimos valores del dataset\n", | |
" Adj. Close Prediction\n", | |
"Date \n", | |
"2018-03-21 316.53 NaN\n", | |
"2018-03-22 309.10 NaN\n", | |
"2018-03-23 301.54 NaN\n", | |
"2018-03-26 304.18 NaN\n", | |
"2018-03-27 279.18 NaN\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "GX4pwoGl2uTW", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 136 | |
}, | |
"outputId": "132f0d49-c2de-4949-ea8f-21b689d4aa01" | |
}, | |
"source": [ | |
"# Convertir los datos en un array de numpy\n", | |
"X = np.array(df.drop(['Prediction'],1))\n", | |
"X = X[:-forecast_out]\n", | |
"print(X)" | |
], | |
"execution_count": 124, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[ 23.89]\n", | |
" [ 23.83]\n", | |
" [ 21.96]\n", | |
" ...\n", | |
" [327.17]\n", | |
" [345.51]\n", | |
" [341.84]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "JOqDGDLa3hvL", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "26bd7528-5370-450e-e7af-fbc615a758e4" | |
}, | |
"source": [ | |
"# Crear un dataset (y)\n", | |
"# Convertir los datos en un array de numpy (incluyendo los NaNs)\n", | |
"y = np.array(df['Prediction'])\n", | |
"y = y[:-forecast_out]\n", | |
"print(y)" | |
], | |
"execution_count": 125, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[ 19.84 19.89 20.64 ... 301.54 304.18 279.18]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SiV2b4u74ZiW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Dividir la información en 80% de datos de entrenamiento y 20% de datos para testing\n", | |
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)" | |
], | |
"execution_count": 126, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IBTI_uCD44CM", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
}, | |
"outputId": "a674e72c-c988-428c-d7ca-7cde2eb3cba4" | |
}, | |
"source": [ | |
"# Crear y entrenar el modelo usando Support Vector Machine (Regressor)\n", | |
"# Más info: https://en.wikipedia.org/wiki/Support_vector_machine\n", | |
"svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)\n", | |
"# Entrenar el modelo\n", | |
"svr_rbf.fit(x_train, y_train)" | |
], | |
"execution_count": 127, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,\n", | |
" kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 127 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5UUfe9fc5p7e", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "64a64584-469a-4832-ed73-db0fbc0976b8" | |
}, | |
"source": [ | |
"# Probar el modelo y obtener un valor de confidencia.\n", | |
"# El valor mas preciso es el mas cercano a 1.0\n", | |
"svm_confidence = svr_rbf.score(x_test, y_test)\n", | |
"print(\"SVM Confidence: \", svm_confidence)" | |
], | |
"execution_count": 128, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"SVM Confidence: 0.9755202671503753\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BQ4wufDN5-KW", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "13ac9824-d3b2-405c-d4b4-18a14fcd5c69" | |
}, | |
"source": [ | |
"# Crear y entrenar el modelo usando Linear Regression\n", | |
"# Más info: https://en.wikipedia.org/wiki/Linear_regression\n", | |
"lr = LinearRegression()\n", | |
"# Entrenar el Modelo\n", | |
"lr.fit(x_train, y_train)" | |
], | |
"execution_count": 129, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 129 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PRs-S6Tz6LJF", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "649f8cea-4651-4dd6-a19c-c21ef92dc4fe" | |
}, | |
"source": [ | |
"# Probar el modelo y obtener un valor de confidencia.\n", | |
"# El valor mas preciso es el mas cercano a 1.0\n", | |
"lr_confidence = lr.score(x_test, y_test)\n", | |
"print(\"LR Confidence: \", lr_confidence)" | |
], | |
"execution_count": 130, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"LR Confidence: 0.9798808859371537\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_RRHXPDJ6b11", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 187 | |
}, | |
"outputId": "472b82b3-3652-49fa-a203-67b3fdca06f5" | |
}, | |
"source": [ | |
"x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]\n", | |
"print(x_forecast)" | |
], | |
"execution_count": 131, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[326.63]\n", | |
" [325.6 ]\n", | |
" [321.35]\n", | |
" [313.56]\n", | |
" [310.55]\n", | |
" [316.53]\n", | |
" [309.1 ]\n", | |
" [301.54]\n", | |
" [304.18]\n", | |
" [279.18]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vjvieAws696c", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
}, | |
"outputId": "a1730c02-05c8-4d20-ffa8-e63aa2d7f41f" | |
}, | |
"source": [ | |
"# Imprimir el resultado para los próximos 'n' días usando el modelo \"Lineal Regression\" \n", | |
"lr_prediction = lr.predict(x_forecast)\n", | |
"print(lr_prediction)" | |
], | |
"execution_count": 132, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[326.53142815 325.51134492 321.30226364 313.58726524 310.60624532\n", | |
" 316.52867028 309.17020582 301.68299299 304.29757525 279.53827358]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8srQBMKb7PFl", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
}, | |
"outputId": "370e382c-8da4-4d81-ca6d-af6d74aaf702" | |
}, | |
"source": [ | |
"# Imprimir el resultado para los próximos 'n' días usando el modelo \"Support Vector Regressor\"\n", | |
"svm_prediction = svr_rbf.predict(x_forecast)\n", | |
"print(svm_prediction)" | |
], | |
"execution_count": 133, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[303.30585525 319.28318742 313.81326199 323.52528789 350.09919994\n", | |
" 343.671505 334.25198917 321.38354181 335.62400205 258.43457551]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment