Skip to content

Instantly share code, notes, and snippets.

View JLFDataScience's full-sized avatar

Jose Luis Fernández Nuevo JLFDataScience

  • FGCSIC
View GitHub Profile
@JLFDataScience
JLFDataScience / Logistic_regression_pipeline.py
Last active January 30, 2020 16:44
Logistic regression pipeline
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
pipe = Pipeline([('kBest', SelectKBest(f_classif, k = 2)),
('lr', LogisticRegression())])
pipe.fit(x_train, y_train)
@JLFDataScience
JLFDataScience / DummyClassifier.py
Created January 30, 2020 16:42
DummyClassifier
from sklearn.dummy import DummyClassifier
dc = DummyClassifier()
dc.fit(x_train, y_train)
print(u'The performance of the model is: %0.5f' % dc.score(x_test, y_test))
@JLFDataScience
JLFDataScience / train_test_df.py
Created January 30, 2020 16:39
Divide the dataset is a training set and a test set.
from sklearn.model_selection import train_test_split
target = 'Target'
features = list(transf_selection.columns)
features.remove(target)
x_train, x_test, y_train, y_test = train_test_split(transf_selection[features], transf_selection[target], random_state = 0)
@JLFDataScience
JLFDataScience / VIF_aplication.py
Last active January 30, 2020 16:35
Aplication VIF function to the donation dataset
calculateVIF(transfusion)
#generate the new dataset with the selection of variables of the VIF function
transf_selection = selectDataUsingVIF(transfusion, 5)
@JLFDataScience
JLFDataScience / new_features_charts.py
Created January 30, 2020 16:24
New features and histograms
transfusion['Average (c.c./months)'] = transfusion['Volume'] / transfusion['Time']
transfusion['Donations per Month'] = (transfusion['Time'] - transfusion['Recency']) / transfusion['Frequency']
transfusion['Frequent Donor'] = transfusion['Frequency'] > median(transfusion['Frequency'])
plt.figure(1, figsize=(10, 10))
plt.subplot(221)
plot_distribution(transfusion, 'Average (c.c./months)', 'Target')
plt.subplot(222)
plot_distribution(transfusion, 'Donations per Month', 'Target')
plt.subplot(223)
@JLFDataScience
JLFDataScience / histogram_charts.py
Created January 30, 2020 15:56
Histogram define the relasionship target whith other features
plt.figure(1, figsize=(10, 10))
plt.subplot(221)
plot_distribution(transfusion, 'Recency', 'Target')
plt.subplot(222)
plot_distribution(transfusion, 'Frequency', 'Target')
plt.subplot(223)
plot_distribution(transfusion, 'Volume', 'Target')
plt.subplot(224)
plot_distribution(transfusion, 'Time', 'Target')
@JLFDataScience
JLFDataScience / histogram_function.py
Created January 30, 2020 11:14
Histogram function
from seaborn import distplot
def plot_distribution(data, feature, target):
min_value = floor(min(data[feature]) - 1)
max_value = ceil(max(data[feature]) + 1)
bins = range(int(min_value), int(max_value), int(max(1, round((max_value - min_value) / 20))))
distplot(transfusion[data[target] == 0][feature],
bins = bins,
@JLFDataScience
JLFDataScience / pairplot.py
Created January 30, 2020 11:11
donation analysis pairplot
from seaborn import pairplot
pairplot(transfusion.iloc[:, 0:4], diag_kind = 'kde');
@JLFDataScience
JLFDataScience / heatmap_corr.py
Created January 30, 2020 11:09
Heatmap correlation of seaborn
from seaborn import heatmap
heatmap(transfusion.corr(), annot = True);
@JLFDataScience
JLFDataScience / import_data_donation.py
Created January 30, 2020 11:05
Data import from url UCI repository
import pandas as pd
transfusion = pd.read_table('https://archive.ics.uci.edu/ml/machine-learning-databases/blood-transfusion/transfusion.data', sep=',')
#rename columns
transfusion = transfusion.rename(columns={'Recency (months)':'Recency',
'Frequency (times)':'Frequency','Monetary (c.c. blood)':'Volume',
'Time (months)':'Time', 'whether he/she donated blood in March 2007':'Target'})
transfusion.head()