ksv-muralidhar/1.py

## 1.py
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import mutual_info_classif, SelectKBest
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
import joblib


X = pd.DataFrame(load_iris()['data'], columns=load_iris()['feature_names'])
y = load_iris()['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.1, random_state=11)

def model(X, y):
    X = X.copy()
    y = y.copy()
    pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
                               ['feature_selection', SelectKBest(score_func=mutual_info_classif)],
                               ['classifier', LogisticRegression(random_state=11, max_iter=1000)]])

    param_grid = {'feature_selection__k': range(1, X.shape[1]),
                  'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

    grid_search = GridSearchCV(estimator=pipeline,
                               param_grid=param_grid,
                               scoring='accuracy',
                               n_jobs=-1,
                               cv=3)

    grid_search.fit(X, y)

    return grid_search


pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
                           ['feature_selection', SelectKBest(score_func=mutual_info_classif,
                                                             k=3)],
                           ['classifier', LogisticRegression(random_state=11,
                                                             max_iter=1000,
                                                             C=1000)]])

#Refitting the pipeline to the data to find features selected
pipeline.fit(X_train, y_train)
feature_selection = (pipeline['feature_selection']).scores_
feature_scores = {key: value for key,value in zip(X_train.columns, (np.round(pipeline['feature_selection'].scores_,2)))}

#Fitting the final model from GridSearchCV
iris_model = model(X_train, y_train)
joblib.dump(iris_model, 'iris_model.pkl')

print(f'Best params: {iris_model.best_params_}\nBest score: {iris_model.best_score_}\nFeatures scores: {feature_scores}')

## 2.py
def model(X, y):
    X = X.copy()
    y = y.copy()
    pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
                               ['classifier', LogisticRegression(random_state=11, max_iter=1000)]])

    param_grid = {'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

    grid_search = GridSearchCV(estimator=pipeline,
                               param_grid=param_grid,
                               scoring='accuracy',
                               n_jobs=-1,
                               cv=3)

    grid_search.fit(X, y)

    return grid_search

#Dropping sepal width
X_train = X_train.drop(columns='sepal width (cm)').copy()
iris_model1 = model(X_train, y_train)
print(f'Best params: {iris_model1.best_params_}\nBest score: {iris_model1.best_score_}')

## 3.py
import streamlit as st
import numpy as np
import joblib

#Interface
st.markdown('## Iris Species Prediction')
sepal_length = st.number_input('sepal length (cm)')
sepal_width = st.number_input('sepal width (cm)')
petal_length = st.number_input('petal length (cm)')
petal_width = st.number_input('petal width (cm)')

#Predict button
if st.button('Predict'):
    model = joblib.load('iris_model.pkl')
    X = np.array([sepal_length, sepal_width, petal_length, petal_width])
    if any(X <= 0):
        st.markdown('### Inputs must be greater than 0')
    else:
        st.markdown(f'### Prediction is {model.predict([[sepal_length, sepal_width, petal_length, petal_width]])[0]}')
	import numpy as np
	import pandas as pd
	from sklearn.datasets import load_iris
	from sklearn.feature_selection import mutual_info_classif, SelectKBest
	from sklearn.model_selection import GridSearchCV, train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import MinMaxScaler
	import joblib


	X = pd.DataFrame(load_iris()['data'], columns=load_iris()['feature_names'])
	y = load_iris()['target']

	X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.1, random_state=11)

	def model(X, y):
	X = X.copy()
	y = y.copy()
	pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
	['feature_selection', SelectKBest(score_func=mutual_info_classif)],
	['classifier', LogisticRegression(random_state=11, max_iter=1000)]])

	param_grid = {'feature_selection__k': range(1, X.shape[1]),
	'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

	grid_search = GridSearchCV(estimator=pipeline,
	param_grid=param_grid,
	scoring='accuracy',
	n_jobs=-1,
	cv=3)

	grid_search.fit(X, y)

	return grid_search


	pipeline = Pipeline(steps=[['scaler', MinMaxScaler()],
	['feature_selection', SelectKBest(score_func=mutual_info_classif,
	k=3)],
	['classifier', LogisticRegression(random_state=11,
	max_iter=1000,
	C=1000)]])

	#Refitting the pipeline to the data to find features selected
	pipeline.fit(X_train, y_train)
	feature_selection = (pipeline['feature_selection']).scores_
	feature_scores = {key: value for key,value in zip(X_train.columns, (np.round(pipeline['feature_selection'].scores_,2)))}

	#Fitting the final model from GridSearchCV
	iris_model = model(X_train, y_train)
	joblib.dump(iris_model, 'iris_model.pkl')

	print(f'Best params: {iris_model.best_params_}\nBest score: {iris_model.best_score_}\nFeatures scores: {feature_scores}')
	import streamlit as st
	import numpy as np
	import joblib

	#Interface
	st.markdown('## Iris Species Prediction')
	sepal_length = st.number_input('sepal length (cm)')
	sepal_width = st.number_input('sepal width (cm)')
	petal_length = st.number_input('petal length (cm)')
	petal_width = st.number_input('petal width (cm)')

	#Predict button
	if st.button('Predict'):
	model = joblib.load('iris_model.pkl')
	X = np.array([sepal_length, sepal_width, petal_length, petal_width])
	if any(X <= 0):
	st.markdown('### Inputs must be greater than 0')
	else:
	st.markdown(f'### Prediction is {model.predict([[sepal_length, sepal_width, petal_length, petal_width]])[0]}')