sazio/block18.python Secret

## block18.python
# evaluation of a model using 5 features chosen with random forest importance
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# feature selection
def select_features(X_train, y_train, X_test):
	# configure to select a subset of features
	fs = SelectFromModel(RandomForestClassifier(n_estimators=1000), max_features=5)
	# learn relationship from training data
	fs.fit(X_train, y_train)
	# transform train input data
	X_train_fs = fs.transform(X_train)
	# transform test input data
	X_test_fs = fs.transform(X_test)
	return X_train_fs, X_test_fs, fs

# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_clf, y_clf, test_size=0.33, random_state=1)
# feature selection
X_train_fs, X_test_fs, fs = select_features(X_train, y_train, X_test)
# fit the model
model = LogisticRegression(solver='liblinear')
model.fit(X_train_fs, y_train)
# evaluate the model
yhat = model.predict(X_test_fs)
# evaluate predictions
accuracy = accuracy_score(y_test, yhat)
print('Accuracy: %.2f' % (accuracy*100))
	# evaluation of a model using 5 features chosen with random forest importance
	from sklearn.model_selection import train_test_split
	from sklearn.feature_selection import SelectFromModel
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import accuracy_score

	# feature selection
	def select_features(X_train, y_train, X_test):
	# configure to select a subset of features
	fs = SelectFromModel(RandomForestClassifier(n_estimators=1000), max_features=5)
	# learn relationship from training data
	fs.fit(X_train, y_train)
	# transform train input data
	X_train_fs = fs.transform(X_train)
	# transform test input data
	X_test_fs = fs.transform(X_test)
	return X_train_fs, X_test_fs, fs

	# split into train and test sets
	X_train, X_test, y_train, y_test = train_test_split(X_clf, y_clf, test_size=0.33, random_state=1)
	# feature selection
	X_train_fs, X_test_fs, fs = select_features(X_train, y_train, X_test)
	# fit the model
	model = LogisticRegression(solver='liblinear')
	model.fit(X_train_fs, y_train)
	# evaluate the model
	yhat = model.predict(X_test_fs)
	# evaluate predictions
	accuracy = accuracy_score(y_test, yhat)
	print('Accuracy: %.2f' % (accuracy*100))