Skip to content

Instantly share code, notes, and snippets.

@martingaido
Created July 23, 2020 00:55
Show Gist options
  • Save martingaido/4486104e9350b9ab1bbd0909904e50fd to your computer and use it in GitHub Desktop.
Save martingaido/4486104e9350b9ab1bbd0909904e50fd to your computer and use it in GitHub Desktop.
ticker-predict.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "ticker-predict.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyPOHdkDj8Hxn5YrAFAyLFc2",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/martingaido/4486104e9350b9ab1bbd0909904e50fd/ticker-predict.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "VO_kyCueu6nH",
"colab_type": "code",
"colab": {}
},
"source": [
"# Descripción: \n",
"#\n",
"# Script para predecir el valor de una acción a futuro usando dos modelos muy\n",
"# conocidos de machine learning.\n",
"# \n",
"# Requisitos:\n",
"# \n",
"# 1) Obtener una llave para acceder a la API de Quandl y desbloquear las\n",
"# limitaciones en las consultas. Para obtener la llave de forma gratuita ir\n",
"# al siguiente enlace: https://www.quandl.com/\n",
"#\n",
"# 2) El dataset que provee Quandl contiene datos de ejemplo de años previos. \n",
"# Para obtener un dataset actualizado es necesario pagar una suscripción\n",
"# mensual en dólares.\n",
"#\n",
"# 3) Tener python instalado en nuestro ordenador y las librerías necesarias.\n",
"# pip install quandl scikit-learn numpy\n",
"\n",
"import quandl\n",
"import numpy as np\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.svm import SVR\n",
"from sklearn.model_selection import train_test_split"
],
"execution_count": 118,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "v9xE0YqYwE1K",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "70f564fb-c8bc-49e5-cfbe-b790433e9043"
},
"source": [
"# Ingresar un ticker o nombre de la acción\n",
"ticker = input(\"Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): \")"
],
"execution_count": 119,
"outputs": [
{
"output_type": "stream",
"text": [
"Ingresar el nombre del ticker (ej. TSLA, FB, GOOG): TSLA\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "WJbPaQDZwLKW",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "b7824cd8-310d-49a3-fe86-02da0e448dad"
},
"source": [
"# Ingresar la cantidad de días que queremos predecir\n",
"num_days = input(\"Ingresar la cantidad de días a predecir: \")"
],
"execution_count": 120,
"outputs": [
{
"output_type": "stream",
"text": [
"Ingresar la cantidad de días a predecir: 10\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "oZqP0aVzwPZO",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "b0292707-a07d-43d9-fc42-0f7e424db04c"
},
"source": [
"# Obtener una muestra del dataset requerido\n",
"df = quandl.get(\"WIKI/\" + ticker.upper(), authtoken='PONER-AQUI-LA-API-KEY')\n",
"print(\"Datos históricos para el ticker: \" + ticker.upper())\n",
"print('')\n",
"print(df.head())"
],
"execution_count": 121,
"outputs": [
{
"output_type": "stream",
"text": [
"Datos históricos para el ticker: TSLA\n",
"\n",
" Open High Low ... Adj. Low Adj. Close Adj. Volume\n",
"Date ... \n",
"2010-06-29 19.00 25.0000 17.54 ... 17.54 23.89 18766300.0\n",
"2010-06-30 25.79 30.4192 23.30 ... 23.30 23.83 17187100.0\n",
"2010-07-01 25.00 25.9200 20.27 ... 20.27 21.96 8218800.0\n",
"2010-07-02 23.00 23.1000 18.71 ... 18.71 19.20 5139800.0\n",
"2010-07-06 20.00 20.0000 15.83 ... 15.83 16.11 6866900.0\n",
"\n",
"[5 rows x 12 columns]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KP2HBwB7xqHL",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 306
},
"outputId": "5ab6b690-bdf5-40da-8153-c3af5e7a6b5c"
},
"source": [
"# Obtener los datos de la columna 'Adj. Close'\n",
"df = df[['Adj. Close']]\n",
"\n",
"# Imprimir los valores para ver el resultado\n",
"print('Primeros valores del dataset')\n",
"print(df.head())\n",
"\n",
"print('')\n",
"print('Ultimos valores del dataset')\n",
"print(df.tail())"
],
"execution_count": 122,
"outputs": [
{
"output_type": "stream",
"text": [
"Primeros valores del dataset\n",
" Adj. Close\n",
"Date \n",
"2010-06-29 23.89\n",
"2010-06-30 23.83\n",
"2010-07-01 21.96\n",
"2010-07-02 19.20\n",
"2010-07-06 16.11\n",
"\n",
"Ultimos valores del dataset\n",
" Adj. Close\n",
"Date \n",
"2018-03-21 316.53\n",
"2018-03-22 309.10\n",
"2018-03-23 301.54\n",
"2018-03-26 304.18\n",
"2018-03-27 279.18\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "bdStwiq0x26Q",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 306
},
"outputId": "44eab847-2966-49f9-a223-459d84995e9c"
},
"source": [
"# Declarar la variable que contiene la cantida de días\n",
"forecast_out = int(num_days)\n",
"\n",
"# Crear otra columna con la predicción en la cantidad de días especificados\n",
"df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)\n",
"\n",
"# Imprimir los valores para ver el resultado\n",
"print('Primeros valores del dataset')\n",
"print(df.head())\n",
"\n",
"print('')\n",
"print('Ultimos valores del dataset')\n",
"print(df.tail())"
],
"execution_count": 123,
"outputs": [
{
"output_type": "stream",
"text": [
"Primeros valores del dataset\n",
" Adj. Close Prediction\n",
"Date \n",
"2010-06-29 23.89 19.84\n",
"2010-06-30 23.83 19.89\n",
"2010-07-01 21.96 20.64\n",
"2010-07-02 19.20 21.91\n",
"2010-07-06 16.11 20.30\n",
"\n",
"Ultimos valores del dataset\n",
" Adj. Close Prediction\n",
"Date \n",
"2018-03-21 316.53 NaN\n",
"2018-03-22 309.10 NaN\n",
"2018-03-23 301.54 NaN\n",
"2018-03-26 304.18 NaN\n",
"2018-03-27 279.18 NaN\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "GX4pwoGl2uTW",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 136
},
"outputId": "132f0d49-c2de-4949-ea8f-21b689d4aa01"
},
"source": [
"# Convertir los datos en un array de numpy\n",
"X = np.array(df.drop(['Prediction'],1))\n",
"X = X[:-forecast_out]\n",
"print(X)"
],
"execution_count": 124,
"outputs": [
{
"output_type": "stream",
"text": [
"[[ 23.89]\n",
" [ 23.83]\n",
" [ 21.96]\n",
" ...\n",
" [327.17]\n",
" [345.51]\n",
" [341.84]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "JOqDGDLa3hvL",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "26bd7528-5370-450e-e7af-fbc615a758e4"
},
"source": [
"# Crear un dataset (y)\n",
"# Convertir los datos en un array de numpy (incluyendo los NaNs)\n",
"y = np.array(df['Prediction'])\n",
"y = y[:-forecast_out]\n",
"print(y)"
],
"execution_count": 125,
"outputs": [
{
"output_type": "stream",
"text": [
"[ 19.84 19.89 20.64 ... 301.54 304.18 279.18]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "SiV2b4u74ZiW",
"colab_type": "code",
"colab": {}
},
"source": [
"# Dividir la información en 80% de datos de entrenamiento y 20% de datos para testing\n",
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
],
"execution_count": 126,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "IBTI_uCD44CM",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "a674e72c-c988-428c-d7ca-7cde2eb3cba4"
},
"source": [
"# Crear y entrenar el modelo usando Support Vector Machine (Regressor)\n",
"# Más info: https://en.wikipedia.org/wiki/Support_vector_machine\n",
"svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)\n",
"# Entrenar el modelo\n",
"svr_rbf.fit(x_train, y_train)"
],
"execution_count": 127,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,\n",
" kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
]
},
"metadata": {
"tags": []
},
"execution_count": 127
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5UUfe9fc5p7e",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "64a64584-469a-4832-ed73-db0fbc0976b8"
},
"source": [
"# Probar el modelo y obtener un valor de confidencia.\n",
"# El valor mas preciso es el mas cercano a 1.0\n",
"svm_confidence = svr_rbf.score(x_test, y_test)\n",
"print(\"SVM Confidence: \", svm_confidence)"
],
"execution_count": 128,
"outputs": [
{
"output_type": "stream",
"text": [
"SVM Confidence: 0.9755202671503753\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "BQ4wufDN5-KW",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "13ac9824-d3b2-405c-d4b4-18a14fcd5c69"
},
"source": [
"# Crear y entrenar el modelo usando Linear Regression\n",
"# Más info: https://en.wikipedia.org/wiki/Linear_regression\n",
"lr = LinearRegression()\n",
"# Entrenar el Modelo\n",
"lr.fit(x_train, y_train)"
],
"execution_count": 129,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
]
},
"metadata": {
"tags": []
},
"execution_count": 129
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "PRs-S6Tz6LJF",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "649f8cea-4651-4dd6-a19c-c21ef92dc4fe"
},
"source": [
"# Probar el modelo y obtener un valor de confidencia.\n",
"# El valor mas preciso es el mas cercano a 1.0\n",
"lr_confidence = lr.score(x_test, y_test)\n",
"print(\"LR Confidence: \", lr_confidence)"
],
"execution_count": 130,
"outputs": [
{
"output_type": "stream",
"text": [
"LR Confidence: 0.9798808859371537\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "_RRHXPDJ6b11",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 187
},
"outputId": "472b82b3-3652-49fa-a203-67b3fdca06f5"
},
"source": [
"x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]\n",
"print(x_forecast)"
],
"execution_count": 131,
"outputs": [
{
"output_type": "stream",
"text": [
"[[326.63]\n",
" [325.6 ]\n",
" [321.35]\n",
" [313.56]\n",
" [310.55]\n",
" [316.53]\n",
" [309.1 ]\n",
" [301.54]\n",
" [304.18]\n",
" [279.18]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "vjvieAws696c",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "a1730c02-05c8-4d20-ffa8-e63aa2d7f41f"
},
"source": [
"# Imprimir el resultado para los próximos 'n' días usando el modelo \"Lineal Regression\" \n",
"lr_prediction = lr.predict(x_forecast)\n",
"print(lr_prediction)"
],
"execution_count": 132,
"outputs": [
{
"output_type": "stream",
"text": [
"[326.53142815 325.51134492 321.30226364 313.58726524 310.60624532\n",
" 316.52867028 309.17020582 301.68299299 304.29757525 279.53827358]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "8srQBMKb7PFl",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "370e382c-8da4-4d81-ca6d-af6d74aaf702"
},
"source": [
"# Imprimir el resultado para los próximos 'n' días usando el modelo \"Support Vector Regressor\"\n",
"svm_prediction = svr_rbf.predict(x_forecast)\n",
"print(svm_prediction)"
],
"execution_count": 133,
"outputs": [
{
"output_type": "stream",
"text": [
"[303.30585525 319.28318742 313.81326199 323.52528789 350.09919994\n",
" 343.671505 334.25198917 321.38354181 335.62400205 258.43457551]\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment