Hrithik Gupta sgsg704

## IMPORTING LIBRARIES
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, LSTM
from tensorflow.keras.models import Sequential

## code
data = pd.read_csv("https://cainvas-static.s3.amazonaws.com/media/user_data/hrithikgupta/weatherAUS.csv")
data.head()

## code
#first of all let us evaluate the target and find out if our data is imbalanced or not
cols= ["#C2C4E2","#EED4E5"]
sns.countplot(x= data["RainTomorrow"], palette= cols)

## code
# Correlation amongst numeric attributes
plt.figure(figsize=(10,10))
corrmat = data.corr()
cmap = sns.diverging_palette(260,-10,s=50, l=75, n=6, as_cmap=True)
plt.subplots(figsize=(18,18))
sns.heatmap(corrmat,cmap= cmap,annot=True, square=True,fmt="%")

## code
#Parsing datetime
#exploring the length of date objects
lengths = data["Date"].str.len()
lengths.value_counts()
data['Date']= pd.to_datetime(data["Date"])
#Creating a collumn of year
data['year'] = data.Date.dt.year
# function to encode datetime into cyclic parameters.
#As I am planning to use this data in a neural network I prefer the months and days in a cyclic continuous feature.
def encode(data, col, max_val):

## code
cyclic_month =
sns.scatterplot(x="month_sin",y="month_cos",data=data, color="#C2C4E2")
cyclic_month.set_title("Cyclic Encoding of Month")
cyclic_month.set_ylabel("Cosine Encoded Months")
cyclic_month.set_xlabel("Sine Encoded Months")

## code
cyclic_day = sns.scatterplot(x='day_sin',y='day_cos',data=data, color="#C2C4E2")
cyclic_day.set_title("Cyclic Encoding of Day")
cyclic_day.set_ylabel("Cosine Encoded Day")
cyclic_day.set_xlabel("Sine Encoded Day")

## code
# Get list of categorical variables
s = (data.dtypes == "object")
object_cols = list(s[s].index)
print("Categorical variables:")
print(object_cols)

## code
# Get list of categorical variables
s = (data.dtypes == "object")
object_cols = list(s[s].index)
print("Categorical variables:")
print(object_cols)

## code
# Apply label encoder to each column with categorical data
label_encoder = LabelEncoder()
for i in object_cols:
    data[i] = label_encoder.fit_transform(data[i])

data.info()
	import matplotlib.pyplot as plt
	import seaborn as sns
	import datetime
	from sklearn.preprocessing import LabelEncoder
	from sklearn import preprocessing
	from sklearn.preprocessing import StandardScaler
	from sklearn.model_selection import train_test_split
	import seaborn as sns
	from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, LSTM
	from tensorflow.keras.models import Sequential
	data = pd.read_csv("https://cainvas-static.s3.amazonaws.com/media/user_data/hrithikgupta/weatherAUS.csv")
	data.head()
	#first of all let us evaluate the target and find out if our data is imbalanced or not
	cols= ["#C2C4E2","#EED4E5"]
	sns.countplot(x= data["RainTomorrow"], palette= cols)
	# Correlation amongst numeric attributes
	plt.figure(figsize=(10,10))
	corrmat = data.corr()
	cmap = sns.diverging_palette(260,-10,s=50, l=75, n=6, as_cmap=True)
	plt.subplots(figsize=(18,18))
	sns.heatmap(corrmat,cmap= cmap,annot=True, square=True,fmt="%")
	#Parsing datetime
	#exploring the length of date objects
	lengths = data["Date"].str.len()
	lengths.value_counts()
	data['Date']= pd.to_datetime(data["Date"])
	#Creating a collumn of year
	data['year'] = data.Date.dt.year
	# function to encode datetime into cyclic parameters.
	#As I am planning to use this data in a neural network I prefer the months and days in a cyclic continuous feature.
	def encode(data, col, max_val):
	cyclic_month =
	sns.scatterplot(x="month_sin",y="month_cos",data=data, color="#C2C4E2")
	cyclic_month.set_title("Cyclic Encoding of Month")
	cyclic_month.set_ylabel("Cosine Encoded Months")
	cyclic_month.set_xlabel("Sine Encoded Months")
	cyclic_day = sns.scatterplot(x='day_sin',y='day_cos',data=data, color="#C2C4E2")
	cyclic_day.set_title("Cyclic Encoding of Day")
	cyclic_day.set_ylabel("Cosine Encoded Day")
	cyclic_day.set_xlabel("Sine Encoded Day")
	# Get list of categorical variables
	s = (data.dtypes == "object")
	object_cols = list(s[s].index)
	print("Categorical variables:")
	print(object_cols)
	# Apply label encoder to each column with categorical data
	label_encoder = LabelEncoder()
	for i in object_cols:
	data[i] = label_encoder.fit_transform(data[i])

	data.info()