Skip to content

Instantly share code, notes, and snippets.

View GermanCM's full-sized avatar

Germán Martínez GermanCM

View GitHub Profile
@GermanCM
GermanCM / gradient_descent_optimization.py
Created December 19, 2018 10:28
gradient descent regression snippets
import math
import numpy as np
# get the overall cost of the model
def compute_cost(X, y, coeff):
'''
inputs:
* 'X': features matrix (independent variables)
* 'y': target values (dependent variable)
* 'coeff': regression coefficients
@GermanCM
GermanCM / features_normalization.py
Last active December 20, 2018 23:34
Features normalization
def normalize_features(df):
"""
Normalize the features in the data set.
Returns the normalized values, mean and standard deviation for each feature
"""
mu = df.mean()
sigma = df.std()
if (sigma == 0).any():
@GermanCM
GermanCM / r2_score.py
Created December 19, 2018 10:36
r2 score via scikit-learn
def find_r2_score(labels_test, predicted_outputs):
from sklearn.metrics import r2_score
corr_coeff = r2_score(labels_test, predicted_outputs)
print('the value of r2 is: ', corr_coeff)
@GermanCM
GermanCM / normalize_and_predict.py
Created December 19, 2018 10:47
normalization and prediction
# returns the dependent variable (y axis) value which the model assigns to a certain independent variable (x axis) value
def predict_output(feature_matrix, coefficients):
'''
inputs:
* feature_matrix: two-dimensions array of the data points, where each columns is a feature and a row a point
* coefficients: one-dimension array of estimated feature coefficients
output:
* one-dimension array of predictions
'''
@GermanCM
GermanCM / scikit_linear_regression.py
Created December 19, 2018 10:53
linear regression via scikit-learn
def linear_regression_via_scikit(X_train, y_train):
from sklearn import linear_model
linear_reg = linear_model.LinearRegression()
linear_reg.fit(X_train.values.reshape((len(X_train),1)), y_train.values.reshape((len(X_train),1)))
return linear_reg
@GermanCM
GermanCM / correlation_coeff_matrix.py
Last active May 31, 2019 11:34
Pearson's correlation coefficients matrix
def get_corr_coeff(dataframe):
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# data is the dataframe having the attributes which you want to get the correlation coeffcients from
cm = np.corrcoef(dataframe[dataframe.columns].values.T)
sns.set(font_scale=1.5)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', \
annot_kws={'size': 10}, yticklabels=dataframe.columns, xticklabels=dataframe.columns)
plt.show()
@GermanCM
GermanCM / attributes_imputer.py
Created February 2, 2019 18:36
Impute attributes from a dataframe
def imputeMissingValues(dataframe, desired_strategy, attributes_to_impute):
from sklearn.preprocessing import Imputer
for attr in attributes_to_impute:
values_ = dataframe[attr].values.reshape(-1, 1)
imp = Imputer(missing_values=np.nan, strategy=desired_strategy, axis=0)
imp.fit(values_)
transformed_values = imp.transform(values_)
dataframe.loc[:,attr] = transformed_values
return dataframe
@GermanCM
GermanCM / one_hot_encoding_stratified.py
Created February 2, 2019 18:53
Get dummy variables stratifying by frequency threshold
# source: https://stackoverflow.com/questions/18016495/get-subset-of-most-frequent-dummy-variables-in-pandas
# func that returns a dummified DataFrame of significant dummies in a given column
def dum_sign(dummy_col, threshold=0.1):
import pandas as pd
import numpy as np
# removes the bind
dummy_col = dummy_col.copy()
# what is the ratio of a dummy in whole column
@GermanCM
GermanCM / profile_dataframe.py
Last active February 14, 2019 16:04
Creates and saves a dataframe profile in an HTML file
def profile_dataframe(dataframe):
import pandas_profiling as pp
profile = pp.ProfileReport(dataframe)
profile.to_file(outputfile="df_profiling_report.html")
return
@GermanCM
GermanCM / get_drive_file_to_dataframe.py
Created February 4, 2019 12:34
Read a csv file from google drive nd download it as a dataframe
def loadDataFromDrive(dataLink, fileName):
'''
dataLink: link obtained from the right button option 'get shareable link' in drive
fileName: name of the file in frive
'''
# Code to read csv file into Colaboratory:
get_ipython().system('pip install -U -q PyDrive')
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth