Skip to content

Instantly share code, notes, and snippets.

View JLFDataScience's full-sized avatar

Jose Luis Fernández Nuevo JLFDataScience

  • FGCSIC
View GitHub Profile
@JLFDataScience
JLFDataScience / model_metrics_evaluation.py
Created September 25, 2020 18:18
Function to measure the performance of predictive models
from sklearn.metrics import accuracy_score, auc, confusion_matrix, f1_score, precision_score, recall_score, roc_curve
def metrics_models(y_true, y_pred):
from sklearn.metrics import accuracy_score, auc, confusion_matrix, f1_score, precision_score, recall_score, roc_curve
# Obtención de matriz de confusión
confusion_matrix = confusion_matrix(y_true, y_pred)
print("La matriz de confusión es ")
print(confusion_matrix)
@JLFDataScience
JLFDataScience / Join_dataset_dummies.py
Created September 25, 2020 18:03
Generate Datasets apart from different variables and concatenate and convert a variable to dummy to apply to prediction models
data_model_1 = pd.concat([diagnostic[categorical_1],
pd.get_dummies(diagnostic['a1_g1_b'],
prefix = 'pclass')], axis = 1)
data_model_2 = pd.concat([diagnostic[categorical_2],
pd.get_dummies(diagnostic['a1_g2'],
prefix = 'pclass')], axis = 1)
@JLFDataScience
JLFDataScience / Function_calculate_IV.py
Created September 25, 2020 17:59
Function to calculate Information Value (IV)
#Information Value Function
def calculateIV(data, features, target):
result = pd.DataFrame(index = ['IV'], columns = features)
result = result.fillna(0)
var_target = array(data[target])
for cat in features:
var_values = array(data[cat])
var_levels = unique(var_values)
@JLFDataScience
JLFDataScience / Function_get_WoE.py
Created September 25, 2020 17:50
Functions to calculate Weight of Evidence (WOE)
def get_WoE(data, var, target):
crosstab = pd.crosstab(data[target], data[var])
print("Obteniendo el Woe para la variable", var, ":")
for col in crosstab.columns:
if crosstab[col][1] == 0:
print(" El WoE para", col, "[", sum(crosstab[col]), "] es infinito")
else:
print(" El WoE para", col, "[", sum(crosstab[col]), "] es", np.log(float(crosstab[col][0]) /
@JLFDataScience
JLFDataScience / Beta_function_all.py
Created July 8, 2020 17:17
Full Beta function to extract data from Yahoo finances about a company and its index and automatically calculate its beta in a time window
#We generate a function with the above tests to calculate the Beta of each record
def BetaCalculated(ticker_symbol, ref_index, source_web, sdate, edate):
try:
df_stock = web.DataReader(ticker_symbol,source_web,sdate,edate)
df_index = web.DataReader(ref_index,source_web,sdate,edate)
#df_stock = pdr.get_data_yahoo(ticker_symbol, start=sdate, end=edate)
#df_index = pdr.get_data_yahoo(ref_index, start=sdate, end=edate)
# create a time-series of monthly data points
df_stock = df_stock.resample('M').last() #We group by Month, i.e. one record per month of each year
df_index = df_index.resample('M').last() #We group by Month
@JLFDataScience
JLFDataScience / beta_result.py
Created July 8, 2020 17:13
calculation of beta from covariance function
print(edate)
numerator = covariance(df['stock_returns'],df['index_returns'])
print("COVARIANCE(stock, benchmark) = COVARIANCE("+ticker_symbol+", "+ref_index +") = " +str(numerator))
denominator = covariance(df['index_returns'],df['index_returns'])
print("VARIANCE(benchmark) = COVARIANCE(benchmark, benchmark) = COVARIANCE("+ref_index+", "+ref_index +") = " +str(denominator))
# BETA = Covariance (stock,index) / Variance (Index) = Covariance (stock,index) / Covariance (stock,stock)
print("BETA = COVARIANCE(stock, benchmark) / VARIANCE(benchmark) = " + str(numerator) + " / " + str(
denominator) + " = " +str(covariance(df['stock_returns'],df['index_returns'])/covariance(
df['index_returns'],df['index_returns'])))
@JLFDataScience
JLFDataScience / covariance_function.py
Created July 8, 2020 17:12
Covariance function in Python
#We define a function to calculate the covariance
def covariance(a, b):
if len(a) != len(b):
return
a_mean = np.mean(a)
b_mean = np.mean(b)
sum = 0
for i in range(0, len(a)):
sum += ((a[i] - a_mean) * (b[i] - b_mean))
return sum/(len(a)-1)
@JLFDataScience
JLFDataScience / CalculatedReturn_stocks.py
Created July 8, 2020 17:09
Calculate returns from a company's daily stock price data or stock index
#Calculating returns
# -*- coding: utf-8 -*-
"""
reference: http://gouthamanbalaraman.com/blog/calculating-stock-beta.html
"""
df_stock['returns'] = df_stock['Adj Close']/ df_stock['Adj Close'].shift(1) -1
df_stock = df_stock.dropna()
df_index['returns'] = df_index['Adj Close']/ df_index['Adj Close'].shift(1) -1
df_index = df_index.dropna()
@JLFDataScience
JLFDataScience / Final_Code_choroplet map.py
Last active May 19, 2020 10:22
Final code to make a choroplet map in Folium with geographic point in other layer
m = folium.Map(location=[40,-4], zoom_start=6, width=700, height=500, control_scale=True, tiles='CartoDB Positron')
folium.Choropleth(
geo_data=geojson_counties,
name='Comunidades Autónomas',
data=df_casos,
columns=['CCAA', 'Casos'],
key_on='feature.properties.texto', #'features.properties.comunidade_autonoma'
fill_color='YlGn',
fill_opacity=0.5,
@JLFDataScience
JLFDataScience / Map_Folium_with_label.py
Last active May 19, 2020 11:11
Put label in Choroplet map in Folium
m = folium.Map(location=[40,-4], zoom_start=6, width=700, height=500, control_scale=True, tiles='CartoDB Positron')
folium.Choropleth(
geo_data=geojson_counties,
name='Comunidades Autónomas',
data=df_casos,
columns=['CCAA', 'Casos'],
key_on='feature.properties.texto', #'features.properties.comunidade_autonoma'
fill_color='YlGn',
fill_opacity=0.5,