Skip to content

Instantly share code, notes, and snippets.

@ksv-muralidhar
Last active April 4, 2021 16:53
Show Gist options
  • Save ksv-muralidhar/d638983ed37318ab34a186aa57f20ee0 to your computer and use it in GitHub Desktop.
Save ksv-muralidhar/d638983ed37318ab34a186aa57f20ee0 to your computer and use it in GitHub Desktop.
streamlit_iris_demo
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import mutual_info_classif, SelectKBest
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
import joblib
X = pd.DataFrame(load_iris()['data'], columns=load_iris()['feature_names'])
y = load_iris()['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.1, random_state=11)
def model(X, y):
X = X.copy()
y = y.copy()
pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
['feature_selection', SelectKBest(score_func=mutual_info_classif)],
['classifier', LogisticRegression(random_state=11, max_iter=1000)]])
param_grid = {'feature_selection__k': range(1, X.shape[1]),
'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
grid_search = GridSearchCV(estimator=pipeline,
param_grid=param_grid,
scoring='accuracy',
n_jobs=-1,
cv=3)
grid_search.fit(X, y)
return grid_search
pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
['feature_selection', SelectKBest(score_func=mutual_info_classif,
k=3)],
['classifier', LogisticRegression(random_state=11,
max_iter=1000,
C=1000)]])
#Refitting the pipeline to the data to find features selected
pipeline.fit(X_train, y_train)
feature_selection = (pipeline['feature_selection']).scores_
feature_scores = {key: value for key,value in zip(X_train.columns, (np.round(pipeline['feature_selection'].scores_,2)))}
#Fitting the final model from GridSearchCV
iris_model = model(X_train, y_train)
joblib.dump(iris_model, 'iris_model.pkl')
print(f'Best params: {iris_model.best_params_}\nBest score: {iris_model.best_score_}\nFeatures scores: {feature_scores}')
def model(X, y):
X = X.copy()
y = y.copy()
pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
['classifier', LogisticRegression(random_state=11, max_iter=1000)]])
param_grid = {'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
grid_search = GridSearchCV(estimator=pipeline,
param_grid=param_grid,
scoring='accuracy',
n_jobs=-1,
cv=3)
grid_search.fit(X, y)
return grid_search
#Dropping sepal width
X_train = X_train.drop(columns='sepal width (cm)').copy()
iris_model1 = model(X_train, y_train)
print(f'Best params: {iris_model1.best_params_}\nBest score: {iris_model1.best_score_}')
import streamlit as st
import numpy as np
import joblib
#Interface
st.markdown('## Iris Species Prediction')
sepal_length = st.number_input('sepal length (cm)')
sepal_width = st.number_input('sepal width (cm)')
petal_length = st.number_input('petal length (cm)')
petal_width = st.number_input('petal width (cm)')
#Predict button
if st.button('Predict'):
model = joblib.load('iris_model.pkl')
X = np.array([sepal_length, sepal_width, petal_length, petal_width])
if any(X <= 0):
st.markdown('### Inputs must be greater than 0')
else:
st.markdown(f'### Prediction is {model.predict([[sepal_length, sepal_width, petal_length, petal_width]])[0]}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment