Skip to content

Instantly share code, notes, and snippets.

@astoeckl
Last active November 13, 2021 13:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save astoeckl/2c3832f8d78ddfc0148b7bf948bdad99 to your computer and use it in GitHub Desktop.
Save astoeckl/2c3832f8d78ddfc0148b7bf948bdad99 to your computer and use it in GitHub Desktop.
leadprediction
# Importing libraries
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
#Import data
df_Leads = pd.DataFrame(pd.read_csv('/Leads.csv'))
X = df_Leads.drop(['Prospect ID','Converted'], axis=1)
y = df_Leads['Converted']
#Train/test split
X_train, X_test, y_train, y_test =
train_test_split(X, y, train_size=0.7, test_size=0.3, random_state=100)
# instantiating the standard scaler
scaler = StandardScaler()
num_cols = X_train[['TotalVisits','Total Time Spent on Website','Page Views Per Visit']]
# Scaling the numerical columns..
X_train[['TotalVisits','Total Time Spent on Website','Page Views Per Visit']] =
scaler.fit_transform(num_cols)
# logistic regression - RFE
logreg = LogisticRegression()
rfe = RFE(logreg, 15)
rfe = rfe.fit(X_train, y_train)
col = X_train.columns[rfe.support_]
# train model
X_train_sm = sm.add_constant(X_train[col])
logm = sm.GLM(y_train,X_train_sm, family = sm.families.Binomial())
res = logm.fit()
res.summary()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment