usametov/prophet1.py

## prophet1.py
import pandas as pd
import matplotlib.pyplot as plt
from fbprophet import Prophet
from datetime import datetime
import psycopg2
from sqlalchemy import create_engine

#Read the Parquet file
#df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet')

# read from db
alchemyEngine   = create_engine('postgresql+psycopg2://clj_user:bolzham_mojo@127.0.0.1/volumebars', pool_recycle=3600);
dbConnection    = alchemyEngine.connect();

pair = 'BTCUSD'
df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection);

# Select specific columns, high, low
selected_columns = df[['high', 'time']]

# Convert timestamp in milliseconds to pandas timestamp
selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms')

selected_columns = selected_columns.drop('time', axis=1)
selected_columns.columns = ['y', 'ds']

reordered_columns = ['ds', 'y']
df_reordered = selected_columns[reordered_columns]

# Show the resulting DataFrame
print(df_reordered.tail(5))


changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection)

changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms')
changepoints_df = changepoints_df.drop('time', axis=1)

changepoints = [x[0] for x in changepoints_df.values.tolist()]

model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9)

model.fit(df_reordered)

future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly
forecast = model.predict(future)

print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8))
fig = model.plot(forecast, xlabel='Date',
                 ylabel=r'yhat')

plt.title('price prediction - high')
plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png")

## xgboost.R
# Load the 'arrow' package
library(arrow)


#TODO: extract function from the code below
# Read data from a Parquet file
parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet"

data <- arrow::read_parquet(parquet_file)

#make a data frame
df = as.data.frame(data)
colnames(df)

#build features, indicators and target class
rsi = RSI(df$close, n=14, maType="WMA")
adx = data.frame(ADX(df[,c("high","low","close")]))
sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2))
trend = df$Close - sar

#create a lag
rsi = c(NA,head(rsi,-1))
adx$ADX = c(NA,head(adx$ADX,-1))
trend = c(NA,head(trend,-1))
price = df$close-df$open

#target variable
class = ifelse(price > 0,1,0)

# Create a Matrix
model_df = data.frame(class,rsi,adx$ADX,trend)
model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class))
model = na.omit(model)
colnames(model) = c("class","rsi","adx","trend")

# Split data into train and test sets
train_size = 2/3
breakpoint = nrow(model) * train_size

training_data = model[1:breakpoint,]
test_data = model[(breakpoint+1):nrow(model),]

# Split data training and test data into X and Y
X_train = training_data[,2:4] ; Y_train = training_data[,1]
class(X_train)[1]; class(Y_train)

X_test = test_data[,2:4] ; Y_test = test_data[,1]
class(X_test)[1]; class(Y_test)

# Train the xgboost model using the "xgboost" function
dtrain = xgb.DMatrix(data = X_train, label = Y_train)
xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic")

# Using cross validation
dtrain = xgb.DMatrix(data = X_train, label = Y_train)
cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic")

# Make the predictions on the test data
preds = predict(xgModel, X_test)

# Determine the size of the prediction vector
print(length(preds))

# Limit display of predictions to the first 6
print(head(preds))

# Measuring model performance
error_value = mean(as.numeric(preds > 0.5) != Y_test)
print(paste("test-error=", error_value))

prediction = as.numeric(preds > 0.5)
print(head(prediction))

# View feature importance from the learnt model
importance_matrix = xgb.importance(model = xgModel)
print(importance_matrix)

#TODO: set device to print plots to png
# View the trees from a model
# xgb.plot.tree(model = xgModel)
# View only the first tree in the XGBoost model
# xgb.plot.tree(model = xgModel, n_first_tree = 1)
	import pandas as pd
	import matplotlib.pyplot as plt
	from fbprophet import Prophet
	from datetime import datetime
	import psycopg2
	from sqlalchemy import create_engine

	#Read the Parquet file
	#df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet')

	# read from db
	alchemyEngine = create_engine('postgresql+psycopg2://clj_user:bolzham_mojo@127.0.0.1/volumebars', pool_recycle=3600);
	dbConnection = alchemyEngine.connect();

	pair = 'BTCUSD'
	df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection);

	# Select specific columns, high, low
	selected_columns = df[['high', 'time']]

	# Convert timestamp in milliseconds to pandas timestamp
	selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms')

	selected_columns = selected_columns.drop('time', axis=1)
	selected_columns.columns = ['y', 'ds']

	reordered_columns = ['ds', 'y']
	df_reordered = selected_columns[reordered_columns]

	# Show the resulting DataFrame
	print(df_reordered.tail(5))


	changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection)

	changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms')
	changepoints_df = changepoints_df.drop('time', axis=1)

	changepoints = [x[0] for x in changepoints_df.values.tolist()]

	model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9)

	model.fit(df_reordered)

	future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly
	forecast = model.predict(future)

	print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8))
	fig = model.plot(forecast, xlabel='Date',
	ylabel=r'yhat')

	plt.title('price prediction - high')
	plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png")
	# Load the 'arrow' package
	library(arrow)


	#TODO: extract function from the code below
	# Read data from a Parquet file
	parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet"

	data <- arrow::read_parquet(parquet_file)

	#make a data frame
	df = as.data.frame(data)
	colnames(df)

	#build features, indicators and target class
	rsi = RSI(df$close, n=14, maType="WMA")
	adx = data.frame(ADX(df[,c("high","low","close")]))
	sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2))
	trend = df$Close - sar

	#create a lag
	rsi = c(NA,head(rsi,-1))
	adx$ADX = c(NA,head(adx$ADX,-1))
	trend = c(NA,head(trend,-1))
	price = df$close-df$open

	#target variable
	class = ifelse(price > 0,1,0)

	# Create a Matrix
	model_df = data.frame(class,rsi,adx$ADX,trend)
	model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class))
	model = na.omit(model)
	colnames(model) = c("class","rsi","adx","trend")

	# Split data into train and test sets
	train_size = 2/3
	breakpoint = nrow(model) * train_size

	training_data = model[1:breakpoint,]
	test_data = model[(breakpoint+1):nrow(model),]

	# Split data training and test data into X and Y
	X_train = training_data[,2:4] ; Y_train = training_data[,1]
	class(X_train)[1]; class(Y_train)

	X_test = test_data[,2:4] ; Y_test = test_data[,1]
	class(X_test)[1]; class(Y_test)

	# Train the xgboost model using the "xgboost" function
	dtrain = xgb.DMatrix(data = X_train, label = Y_train)
	xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic")

	# Using cross validation
	dtrain = xgb.DMatrix(data = X_train, label = Y_train)
	cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic")

	# Make the predictions on the test data
	preds = predict(xgModel, X_test)

	# Determine the size of the prediction vector
	print(length(preds))

	# Limit display of predictions to the first 6
	print(head(preds))

	# Measuring model performance
	error_value = mean(as.numeric(preds > 0.5) != Y_test)
	print(paste("test-error=", error_value))

	prediction = as.numeric(preds > 0.5)
	print(head(prediction))

	# View feature importance from the learnt model
	importance_matrix = xgb.importance(model = xgModel)
	print(importance_matrix)

	#TODO: set device to print plots to png
	# View the trees from a model
	# xgb.plot.tree(model = xgModel)
	# View only the first tree in the XGBoost model
	# xgb.plot.tree(model = xgModel, n_first_tree = 1)