This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| # Create an empty dataframe | |
| data = pd.DataFrame() | |
| # Create our target variable | |
| data['Gender'] = ['male','male','male','male','female','female','female','female'] | |
| # Create our feature variables |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Create an empty dataframe | |
| person = pd.DataFrame() | |
| # Create some feature values for this single row | |
| person['Height'] = [6] | |
| person['Weight'] = [130] | |
| person['Foot_Size'] = [8] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Number of males | |
| n_male = data['Gender'][data['Gender'] == 'male'].count() | |
| # Number of males | |
| n_female = data['Gender'][data['Gender'] == 'female'].count() | |
| # Total rows | |
| total_ppl = data['Gender'].count() | |
| # Number of males divided by the total rows |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Group the data by gender and calculate the means of each feature | |
| data_means = data.groupby('Gender').mean() | |
| # Group the data by gender and calculate the variance of each feature | |
| data_variance = data.groupby('Gender').var() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Create a function that calculates p(x | y): | |
| def p_x_given_y(x, mean_y, variance_y): | |
| # Input the arguments into a probability density function | |
| p = 1/(np.sqrt(2*np.pi*variance_y)) * np.exp((-(x-mean_y)**2)/(2*variance_y)) | |
| # return p | |
| return p | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| # Create an empty dataframe | |
| data = pd.DataFrame() | |
| # Create our target variable | |
| data['Gender'] = [1,1,1,1,0,0,0,0] #1 is male | |
| # Create our feature variables | |
| data['Height'] = [6,5.92,5.58,5.92,5,5.5,5.42,5.75] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| X = data.drop(['Gender'],axis=1) | |
| y=data.Gender | |
| # splitting X and y into training and testing sets | |
| from sklearn.model_selection import train_test_split | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) | |
| # training the model on training set | |
| from sklearn.naive_bayes import GaussianNB |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.metrics import classification_report, confusion_matrix | |
| cm = confusion_matrix(y, gnb.predict(X)) | |
| fig, ax = plt.subplots(figsize=(8, 8)) | |
| ax.imshow(cm) | |
| ax.grid(False) | |
| ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s')) | |
| ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s')) | |
| ax.set_ylim(1.5, -0.5) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Create our target variable | |
| data1 = pd.DataFrame() | |
| # Create our feature variables | |
| data1['Height'] = [6] | |
| data1['Weight'] = [130] | |
| data1['Foot_Size'] = [8] | |
| y_pred = gnb.predict(data1) | |
| if y_pred==0: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| X_train,X_test,y_train,y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=1) | |
| dtree = tree.DecisionTreeClassifier(criterion='entropy' , max_depth=3, random_state = 0) | |
| dtree.fit(X_train, y_train) |
OlderNewer