AI HUB aihubprojects

## n1.py
import pandas as pd
import numpy as np

# Create an empty dataframe
data = pd.DataFrame()

# Create our target variable
data['Gender'] = ['male','male','male','male','female','female','female','female']

# Create our feature variables

## n2.py
# Create an empty dataframe
person = pd.DataFrame()

# Create some feature values for this single row
person['Height'] = [6]
person['Weight'] = [130]
person['Foot_Size'] = [8]

## n3.py
# Number of males
n_male = data['Gender'][data['Gender'] == 'male'].count()

# Number of males
n_female = data['Gender'][data['Gender'] == 'female'].count()

# Total rows
total_ppl = data['Gender'].count()

# Number of males divided by the total rows

## n4.py
# Group the data by gender and calculate the means of each feature
data_means = data.groupby('Gender').mean()

# Group the data by gender and calculate the variance of each feature
data_variance = data.groupby('Gender').var()

## n5.py
# Create a function that calculates p(x | y):
def p_x_given_y(x, mean_y, variance_y):

    # Input the arguments into a probability density function
    p = 1/(np.sqrt(2*np.pi*variance_y)) * np.exp((-(x-mean_y)**2)/(2*variance_y))

    # return p
    return p


## ns1.py
import pandas as pd
import numpy as np

# Create an empty dataframe
data = pd.DataFrame()

# Create our target variable
data['Gender'] = [1,1,1,1,0,0,0,0]   #1 is male
# Create our feature variables
data['Height'] = [6,5.92,5.58,5.92,5,5.5,5.42,5.75]

## ns2.py
X = data.drop(['Gender'],axis=1)
y=data.Gender


# splitting X and y into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

# training the model on training set
from sklearn.naive_bayes import GaussianNB

## ns3.py
from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(y, gnb.predict(X))

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s'))
ax.set_ylim(1.5, -0.5)

## ns4.py
# Create our target variable
data1 = pd.DataFrame()

# Create our feature variables
data1['Height'] = [6]
data1['Weight'] = [130]
data1['Foot_Size'] = [8]

y_pred = gnb.predict(data1)
if y_pred==0:

## dy.py
X_train,X_test,y_train,y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=1)

dtree = tree.DecisionTreeClassifier(criterion='entropy' , max_depth=3, random_state = 0)

dtree.fit(X_train, y_train)
	import pandas as pd
	import numpy as np

	# Create an empty dataframe
	data = pd.DataFrame()

	# Create our target variable
	data['Gender'] = ['male','male','male','male','female','female','female','female']

	# Create our feature variables
	# Create an empty dataframe
	person = pd.DataFrame()

	# Create some feature values for this single row
	person['Height'] = [6]
	person['Weight'] = [130]
	person['Foot_Size'] = [8]
	# Number of males
	n_male = data['Gender'][data['Gender'] == 'male'].count()

	# Number of males
	n_female = data['Gender'][data['Gender'] == 'female'].count()

	# Total rows
	total_ppl = data['Gender'].count()

	# Number of males divided by the total rows
	# Group the data by gender and calculate the means of each feature
	data_means = data.groupby('Gender').mean()

	# Group the data by gender and calculate the variance of each feature
	data_variance = data.groupby('Gender').var()
	# Create a function that calculates p(x \| y):
	def p_x_given_y(x, mean_y, variance_y):

	# Input the arguments into a probability density function
	p = 1/(np.sqrt(2np.pivariance_y)) * np.exp((-(x-mean_y)*2)/(2variance_y))

	# return p
	return p
	X = data.drop(['Gender'],axis=1)
	y=data.Gender


	# splitting X and y into training and testing sets
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

	# training the model on training set
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import classification_report, confusion_matrix

	cm = confusion_matrix(y, gnb.predict(X))

	fig, ax = plt.subplots(figsize=(8, 8))
	ax.imshow(cm)
	ax.grid(False)
	ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s'))
	ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s'))
	ax.set_ylim(1.5, -0.5)
	# Create our target variable
	data1 = pd.DataFrame()

	# Create our feature variables
	data1['Height'] = [6]
	data1['Weight'] = [130]
	data1['Foot_Size'] = [8]

	y_pred = gnb.predict(data1)
	if y_pred==0:
	X_train,X_test,y_train,y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=1)

	dtree = tree.DecisionTreeClassifier(criterion='entropy' , max_depth=3, random_state = 0)

	dtree.fit(X_train, y_train)