Skip to content

Instantly share code, notes, and snippets.

@usametov
Last active June 4, 2023 19:44
Show Gist options
  • Save usametov/3684fea1bf3138d050521e4a13326fcd to your computer and use it in GitHub Desktop.
Save usametov/3684fea1bf3138d050521e4a13326fcd to your computer and use it in GitHub Desktop.
prophet
import pandas as pd
import matplotlib.pyplot as plt
from fbprophet import Prophet
from datetime import datetime
import psycopg2
from sqlalchemy import create_engine
#Read the Parquet file
#df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet')
# read from db
alchemyEngine = create_engine('postgresql+psycopg2://clj_user:bolzham_mojo@127.0.0.1/volumebars', pool_recycle=3600);
dbConnection = alchemyEngine.connect();
pair = 'BTCUSD'
df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection);
# Select specific columns, high, low
selected_columns = df[['high', 'time']]
# Convert timestamp in milliseconds to pandas timestamp
selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms')
selected_columns = selected_columns.drop('time', axis=1)
selected_columns.columns = ['y', 'ds']
reordered_columns = ['ds', 'y']
df_reordered = selected_columns[reordered_columns]
# Show the resulting DataFrame
print(df_reordered.tail(5))
changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection)
changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms')
changepoints_df = changepoints_df.drop('time', axis=1)
changepoints = [x[0] for x in changepoints_df.values.tolist()]
model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9)
model.fit(df_reordered)
future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly
forecast = model.predict(future)
print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8))
fig = model.plot(forecast, xlabel='Date',
ylabel=r'yhat')
plt.title('price prediction - high')
plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png")
# Load the 'arrow' package
library(arrow)
#TODO: extract function from the code below
# Read data from a Parquet file
parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet"
data <- arrow::read_parquet(parquet_file)
#make a data frame
df = as.data.frame(data) 
colnames(df)
#build features, indicators and target class
rsi = RSI(df$close, n=14, maType="WMA")
adx = data.frame(ADX(df[,c("high","low","close")]))
sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2))
trend = df$Close - sar
#create a lag
rsi = c(NA,head(rsi,-1))
adx$ADX = c(NA,head(adx$ADX,-1))
trend = c(NA,head(trend,-1))
price = df$close-df$open
#target variable
class = ifelse(price > 0,1,0)
# Create a Matrix
model_df = data.frame(class,rsi,adx$ADX,trend)
model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class))
model = na.omit(model)
colnames(model) = c("class","rsi","adx","trend")
# Split data into train and test sets
train_size = 2/3
breakpoint = nrow(model) * train_size
training_data = model[1:breakpoint,]
test_data = model[(breakpoint+1):nrow(model),]
# Split data training and test data into X and Y
X_train = training_data[,2:4] ; Y_train = training_data[,1]
class(X_train)[1]; class(Y_train)
X_test = test_data[,2:4] ; Y_test = test_data[,1]
class(X_test)[1]; class(Y_test)
# Train the xgboost model using the "xgboost" function
dtrain = xgb.DMatrix(data = X_train, label = Y_train)
xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic")
# Using cross validation
dtrain = xgb.DMatrix(data = X_train, label = Y_train)
cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic")
# Make the predictions on the test data
preds = predict(xgModel, X_test)
# Determine the size of the prediction vector
print(length(preds))
# Limit display of predictions to the first 6
print(head(preds))
# Measuring model performance
error_value = mean(as.numeric(preds > 0.5) != Y_test)
print(paste("test-error=", error_value))
prediction = as.numeric(preds > 0.5)
print(head(prediction))
# View feature importance from the learnt model
importance_matrix = xgb.importance(model = xgModel)
print(importance_matrix)
#TODO: set device to print plots to png
# View the trees from a model
# xgb.plot.tree(model = xgModel)
# View only the first tree in the XGBoost model
# xgb.plot.tree(model = xgModel, n_first_tree = 1)
@usametov
Copy link
Author

#how to save a checkpoint:
with open('volume-bars-model.json', 'w') as file_out:
    json.dump(model_to_json(model), file_out)

#how to load checkpoint
with open('volume-bars-model.json', 'r') as file_in:
    model = model_from_json(json.load(file_in))

then we just use it:

forecast = model.predict()
fig = model.plot(forecast)

@usametov
Copy link
Author

#re-train Prophet from existing checkpoint:
import numpy as np
class StanInit:
    def init(self, model):
        self.params = {
            'k': np.mean(model.params['k']),
            'm': np.mean(model.params['m']),
            'sigma_obs':
             np.mean(model.params['sigma_obs']),
            'delta': np.mean(model.params['delta'],
                             axis=0),
            'beta': np.mean(model.params['beta'], axis=0)
        }
    def call(self):
        return self.params

fine-tuning

model2 = Prophet().fit(df, init=StanInit(prev_model))

@usametov
Copy link
Author

#use plotly

from fbprophet.plot import (
    plot_plotly,
    plot_components_plotly,
    plot_forecast_component_plotly,
    plot_seasonality_plotly
)

fig = plot_plotly(model, forecast, trend=True)
py.iplot(fig)

#components
fig = plot_components_plotly(model, forecast ,
                             figsize=(800, 175))
py.iplot(fig)

#seasonality
fig = plot_seasonality_plotly(model, 'yearly')
py.iplot(fig)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment