Hrithik Gupta sgsg704

## code
features = data.drop(['RainTomorrow', 'Date','day', 'month'], axis=1) # dropping target and extra columns
target = data['RainTomorrow']
#Set up a standard scaler for the features
col_names = list(features.columns)
s_scaler = preprocessing.StandardScaler()
features = s_scaler.fit_transform(features)
features = pd.DataFrame(features, columns=col_names)
features.describe().T

#Detecting outliers

## code
#full data for
features["RainTomorrow"] = target
#Dropping with outlier
features = features[(features["MinTemp"]<2.3)&(features["MinTemp"]>-2.3)]
features = features[(features["MaxTemp"]<2.3)&(features["MaxTemp"]>-2)]
features = features[(features["Rainfall"]<4.5)]
features = features[(features["Evaporation"]<2.8)]
features = features[(features["Sunshine"]<2.1)]
features = features[(features["WindGustSpeed"]<4)&(features["WindGustSpeed"]>-4)]
features = features[(features["WindSpeed9am"]<4)]

## code
X = features.drop(["RainTomorrow"], axis=1)
y = features["RainTomorrow"]
# Splitting test and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X.shape

## code
early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)
# Initialising the NN
model = Sequential()
# layers
model.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu', input_dim = 26))
model.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu'))

## code
history_df = pd.DataFrame(history.history)
plt.plot(history_df.loc[:, ['loss']], "#BDE2E2", label='Training loss')
plt.plot(history_df.loc[:, ['val_loss']],"#C2C4E2", label='Validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc="best")
plt.show()

## code
history_df = pd.DataFrame(history.history)
plt.plot(history_df.loc[:, ['accuracy']], "#BDE2E2", label='Training accuracy')
plt.plot(history_df.loc[:, ['val_accuracy']], "#C2C4E2", label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

## code
# Predicting the test set results
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
# confusion matrix
cmap1 = sns.diverging_palette(260,-10,s=50, l=75, n=5, as_cmap=True)
plt.subplots(figsize=(12,8))
cf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(cf_matrix/np.sum(cf_matrix), cmap = cmap1, annot = True, annot_kws = {'size':15})

## code
model.save('rain.h5')
!deepCC rain.h5

## code
model.summary()

## code
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import os
plt.rcParams['figure.figsize']=(12,5)
import warnings
warnings.filterwarnings("ignore")
	features = data.drop(['RainTomorrow', 'Date','day', 'month'], axis=1) # dropping target and extra columns
	target = data['RainTomorrow']
	#Set up a standard scaler for the features
	col_names = list(features.columns)
	s_scaler = preprocessing.StandardScaler()
	features = s_scaler.fit_transform(features)
	features = pd.DataFrame(features, columns=col_names)
	features.describe().T

	#Detecting outliers
	#full data for
	features["RainTomorrow"] = target
	#Dropping with outlier
	features = features[(features["MinTemp"]<2.3)&(features["MinTemp"]>-2.3)]
	features = features[(features["MaxTemp"]<2.3)&(features["MaxTemp"]>-2)]
	features = features[(features["Rainfall"]<4.5)]
	features = features[(features["Evaporation"]<2.8)]
	features = features[(features["Sunshine"]<2.1)]
	features = features[(features["WindGustSpeed"]<4)&(features["WindGustSpeed"]>-4)]
	features = features[(features["WindSpeed9am"]<4)]
	X = features.drop(["RainTomorrow"], axis=1)
	y = features["RainTomorrow"]
	# Splitting test and training sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
	X.shape
	early_stopping = callbacks.EarlyStopping(
	min_delta=0.001, # minimium amount of change to count as an improvement
	patience=20, # how many epochs to wait before stopping
	restore_best_weights=True,
	)
	# Initialising the NN
	model = Sequential()
	# layers
	model.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu', input_dim = 26))
	model.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu'))
	history_df = pd.DataFrame(history.history)
	plt.plot(history_df.loc[:, ['loss']], "#BDE2E2", label='Training loss')
	plt.plot(history_df.loc[:, ['val_loss']],"#C2C4E2", label='Validation loss')
	plt.title('Training and Validation loss')
	plt.xlabel('Epochs')
	plt.ylabel('Loss')
	plt.legend(loc="best")
	plt.show()
	history_df = pd.DataFrame(history.history)
	plt.plot(history_df.loc[:, ['accuracy']], "#BDE2E2", label='Training accuracy')
	plt.plot(history_df.loc[:, ['val_accuracy']], "#C2C4E2", label='Validation accuracy')
	plt.title('Training and Validation accuracy')
	plt.xlabel('Epochs')
	plt.ylabel('Accuracy')
	plt.legend()
	plt.show()
	# Predicting the test set results
	y_pred = model.predict(X_test)
	y_pred = (y_pred > 0.5)
	# confusion matrix
	cmap1 = sns.diverging_palette(260,-10,s=50, l=75, n=5, as_cmap=True)
	plt.subplots(figsize=(12,8))
	cf_matrix = confusion_matrix(y_test, y_pred)
	sns.heatmap(cf_matrix/np.sum(cf_matrix), cmap = cmap1, annot = True, annot_kws = {'size':15})
	import numpy as np # linear algebra
	import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
	import matplotlib.pyplot as plt
	import seaborn as sns
	import os
	plt.rcParams['figure.figsize']=(12,5)
	import warnings
	warnings.filterwarnings("ignore")