Skip to content

Instantly share code, notes, and snippets.

@jo4x962k7JL
Created August 3, 2018 21:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jo4x962k7JL/06af77c0d82da5dfbc2d82788d42659b to your computer and use it in GitHub Desktop.
Save jo4x962k7JL/06af77c0d82da5dfbc2d82788d42659b to your computer and use it in GitHub Desktop.
#
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, Imputer
from sklearn.linear_model import LogisticRegression
data = pd.read_csv('input/application_train.csv')
y_train = data['TARGET']
X_train = data.drop(columns = ['SK_ID_CURR', 'TARGET'])
test = pd.read_csv('input/application_test.csv')
submission = test[['SK_ID_CURR']]
X_test = test.drop(columns = ['SK_ID_CURR'])
# one-hot encoding
X_train = pd.get_dummies(X_train)
X_test = pd.get_dummies(X_test)
# align
X_train, X_test = X_train.align(X_test, join = 'inner', axis = 1)
# missing values
X_train = Imputer(strategy='median').fit_transform(X_train)
X_test = Imputer(strategy='median').fit_transform(X_test)
# Scaling
X_train = MinMaxScaler(feature_range = (0, 1)).fit_transform(X_train)
X_test = MinMaxScaler(feature_range = (0, 1)).fit_transform(X_test)
# train and predict
clf = LogisticRegression().fit(X_train, y_train)
y_pred = clf.predict_proba(X_test)[:, 1]
# submit
submission['TARGET'] = y_pred
print(submission.head())
submission.to_csv('benchmark.csv', index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment