Skip to content

Instantly share code, notes, and snippets.

View finlytics-hub's full-sized avatar

Asad Mumtaz finlytics-hub

View GitHub Profile
@finlytics-hub
finlytics-hub / LR_stochastic_GD.py
Last active October 24, 2020 17:37
Linear Regression - Stochastic GD
'''
NORMAL EQUATION
'''
# generate some random numbers (independent variable)
X = 5 * np.random.rand(500,1)
# calculate linearly related (plus some noise) target variable in the form y = 10 + 2x + noise
y = 10 + 2 * X + np.random.randn(500,1)
# add ones to X for each observation (X0)
X_2d = np.c_[np.ones((500, 1)), X]
# calculate theta that minimizes MSE through Normal Equation
@finlytics-hub
finlytics-hub / LR_batch_GD.py
Last active October 24, 2020 17:09
Linear Regression - Batch GD
'''
NORMAL EQUATION
'''
# generate some random numbers (independent variable)
X = 5 * np.random.rand(500,1)
# calculate linearly related (plus some noise) target variable in the form y = 10 + 2x + noise
y = 10 + 2 * X + np.random.randn(500,1)
# add ones to X for each observation (X0)
X_2d = np.c_[np.ones((500, 1)), X]
# calculate theta that minimizes MSE through Normal Equation
@finlytics-hub
finlytics-hub / LR_norm_eq_pred.py
Last active October 24, 2020 05:56
Linear Regression - Normal Equation - Prediction
# make predictions on new data - deliberately chosen the minimum and maximum possible X values to plot a straight line
X_new = np.array([[0], [5]])
# add ones
X_new_2d = np.c_[np.ones((2, 1)), X_new]
# predict
y_predict = X_new_2d.dot(theta_best)
y_predict
@finlytics-hub
finlytics-hub / LR_norm_eq.py
Last active October 24, 2020 14:10
Linear Regression - Normal Equation
# generate some random numbers (independent variable)
X = 5 * np.random.rand(500,1)
# calculate linearly related (plus some noise) target variable in the form y = 10 + 2x + noise
y = 10 + 2 * X + np.random.randn(500,1)
# add ones to X for each observation (X0)
X_2d = np.c_[np.ones((500, 1)), X]
# calculate theta that minimizes MSE through Normal Equation
theta_best = np.linalg.inv(X_2d.T.dot(X_2d)).dot(X_2d.T).dot(y)
theta_best
@finlytics-hub
finlytics-hub / ODI_app.py
Created October 3, 2020 05:35
ODI: Flask web application
# import all the required libraries
from flask import Flask, request, url_for, render_template
from sklearn.linear_model import LogisticRegression
import pandas as pd
import pickle
import numpy as np
# instantiate a Flask app
app = Flask(__name__)
# load our saved model
@finlytics-hub
finlytics-hub / ODI_model_training_validation.py
Created October 2, 2020 18:26
ODI: Model Training & Validation
# drop features we decided not to keep as a result of the Feature Selection Steps above
X = final_data.drop(columns = ['Ground', 'Match Date', 'Result', 'Toss Won?', 'Match Month', 'Country Total Bowling Rank', 'Country Total Batting Rank', 'Opposition Total Bowling Rank', 'Opposition Total Batting Rank', 'Country Average Bowling Rank', 'Country Average Batting Rank', 'Opposition Average Bowling Rank', 'Opposition Average Batting Rank', 'Country Median Bowling Rank', 'Country Median Batting Rank', 'Opposition Median Bowling Rank', 'Opposition Median Batting Rank'])
# get dummy variables of all the remaining categorical features
X = pd.get_dummies(X, columns = ['Country', 'Opposition', 'Home/Away'], drop_first = True)
# target variable after representing wins as 1 and losses as 0
y = final_data['Result'].replace({'Won': 1, 'Lost': 0})
# instantiate Logistic Regression with 'balanced' class_weight parameter just to make sure
reg = LogisticRegression(max_iter = 10000, class_weight = 'balanced')
# define the cros
@finlytics-hub
finlytics-hub / ODI_feature_selection.py
Created October 2, 2020 18:16
ODI: Feature Selection
# since the player names are not useful for our feature selecting and model training steps, we will drop them after creating a backup of the DF
final_data.drop(columns = final_data.loc[:, 'Country_Player_1': 'Opposition_Player_12'].columns.values, inplace = True)
# Categorical Feature Selection
X = final_data.drop(columns = ['Result'], axis = 1)
y = final_data['Result']
# define an empty dictionary to store chi-test results
chi2_check = {}
@finlytics-hub
finlytics-hub / ODI_feature_engineering.py
Created October 2, 2020 18:06
ODI: Feature Engineering
# create a new Match Month column
final_data['Match Month'] = final_data['Match Date'].dt.month
## win/loss ratio
# instantiate an empty list
win_loss_ratio = []
# loop over the final_data DF for each playing Country in it
for c in final_data['Country'].unique():
# slice all matches with 'c' country in the 'Country' column
@finlytics-hub
finlytics-hub / ODI_data_munging_2.py
Last active October 2, 2020 17:25
ODI: Data Munging Part 2
# decompose ODI_matches into a long format
melted_data = pd.melt(ODI_matches, id_vars = ['Country', 'Opposition', 'Home/Away', 'Ground', 'Match Date', 'Result'], value_name = 'Players').sort_values(by = ['Match Date', 'Country']).reset_index(drop = True).drop(columns = ['variable'])
# WEB SCRAPING FOR RANKINGS
# instantiate an empty list to store the date-wise DFs
df_list = []
for d in ODI_matches['Match Date'].unique():
# convert each unique date to a timestamp
date = pd.Timestamp(d)
@finlytics-hub
finlytics-hub / ODI_data_munging_1.py
Last active October 2, 2020 17:21
ODI: data munging Part 1
# ODI_results DF
# create a copy of the DF
ODI_results_clean = ODI_results.copy()
# split Match column into two separate columns
ODI_results_clean[['Country 1', 'Country 2']] = ODI_results_clean['Match'].str.split('v', expand = True)
# strip out all the leading and trailing whitespaces
ODI_results_clean['Country 1'] = ODI_results_clean['Country 1'].str.strip()
ODI_results_clean['Country 2'] = ODI_results_clean['Country 2'].str.strip()
# create the Opposition column using information from the 2 newly created columns
ODI_results_clean['Opposition'] = np.where(ODI_results_clean['Country 1'] == ODI_results_clean['Country'], ODI_results_clean['Country 2'], ODI_results_clean['Country 1'])