Skip to content

Instantly share code, notes, and snippets.

@soumikghosal
Created February 13, 2017 08:53
Show Gist options
  • Save soumikghosal/34ce215b047056e258b31e17f7dd05b2 to your computer and use it in GitHub Desktop.
Save soumikghosal/34ce215b047056e258b31e17f7dd05b2 to your computer and use it in GitHub Desktop.
Implementing Naive Bayes without using sklearn.naive_bayes
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data=pd.read_csv(\"C:\\\\Users\\\\COM\\\\Desktop\\\\Test\\\\Q2-tennis.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df=pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Outlook</th>\n",
" <th>Temp.</th>\n",
" <th>Humidity</th>\n",
" <th>Windy</th>\n",
" <th>Play</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>sunny</td>\n",
" <td>hot</td>\n",
" <td>high</td>\n",
" <td>false</td>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>sunny</td>\n",
" <td>hot</td>\n",
" <td>high</td>\n",
" <td>true</td>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>overcast</td>\n",
" <td>hot</td>\n",
" <td>high</td>\n",
" <td>false</td>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>rainy</td>\n",
" <td>mild</td>\n",
" <td>high</td>\n",
" <td>false</td>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>rainy</td>\n",
" <td>cool</td>\n",
" <td>normal</td>\n",
" <td>false</td>\n",
" <td>yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Outlook Temp. Humidity Windy Play\n",
"0 sunny hot high false no\n",
"1 sunny hot high true no\n",
"2 overcast hot high false yes\n",
"3 rainy mild high false yes\n",
"4 rainy cool normal false yes"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=14, step=1)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Outlook', 'Temp.', 'Humidity', 'Windy', 'Play'], dtype='object')"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"yes 0.642857\n",
"no 0.357143\n",
"Name: Play, dtype: float64"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g=pd.value_counts(df.Play)/len(df.index)\n",
"g"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Frequency Table"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Outlook</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>overcast</th>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rainy</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sunny</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes All\n",
"Outlook \n",
"overcast 0 4 4\n",
"rainy 2 3 5\n",
"sunny 3 2 5\n",
"All 5 9 14"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Outlook_play=pd.crosstab(df.Outlook,df.Play,margins='TRUE')\n",
"Outlook_play"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Smoothing: changing the value 0's to 1's"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for i in range(len(Outlook_play.index)):\n",
" for j in range(len(Outlook_play.columns)):\n",
" if(Outlook_play.ix[i,j]==0):\n",
" Outlook_play.ix[i,j]=1\n",
" \n",
"Outlook_play.ix[0,2]=Outlook_play.ix[0,1]+Outlook_play.ix[0,0]\n",
"Outlook_play.ix[3,0]=Outlook_play.ix[2,0]+Outlook_play.ix[1,0]+Outlook_play.ix[0,0]\n",
"Outlook_play.ix[3,2]=Outlook_play.ix[3,0]+Outlook_play.ix[3,1]"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Outlook</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>overcast</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rainy</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sunny</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes All\n",
"Outlook \n",
"overcast 1 4 5\n",
"rainy 2 3 5\n",
"sunny 3 2 5\n",
"All 6 9 15"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Outlook_play"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Temp.</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>cool</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hot</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mild</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes All\n",
"Temp. \n",
"cool 1 3 4\n",
"hot 2 2 4\n",
"mild 2 4 6\n",
"All 5 9 14"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Temp_play=pd.crosstab(df['Temp.'],df.Play,margins='TRUE')\n",
"Temp_play"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Humidity</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>high</th>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>normal</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes All\n",
"Humidity \n",
"high 4 3 7\n",
"normal 1 6 7\n",
"All 5 9 14"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Humidity_play=pd.crosstab(df.Humidity,df.Play,margins='TRUE')\n",
"Humidity_play"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Windy</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>false</th>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>true</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes All\n",
"Windy \n",
"false 2 6 8\n",
"true 3 3 6\n",
"All 5 9 14"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Windy_play=pd.crosstab(df.Windy,df.Play,margins='TRUE')\n",
"Windy_play"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Frequency Table for probability"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Outlook_play.ix[0:3:,1]=Outlook_play.ix[0:3,1]/Outlook_play.ix[3,1]\n",
"Outlook_play.ix[0:3:,0]=Outlook_play.ix[0:3,0]/Outlook_play.ix[3,0]\n",
"Outlook_play.ix[0:3:,2]=Outlook_play.ix[0:3,2]/Outlook_play.ix[3,2]\n",
"Outlook_play.ix[3,:]=Outlook_play.ix[3,:]/Outlook_play.ix[3,2]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>P(x)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Outlook</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>overcast</th>\n",
" <td>0.166667</td>\n",
" <td>0.444444</td>\n",
" <td>0.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rainy</th>\n",
" <td>0.333333</td>\n",
" <td>0.333333</td>\n",
" <td>0.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sunny</th>\n",
" <td>0.500000</td>\n",
" <td>0.222222</td>\n",
" <td>0.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P(c)</th>\n",
" <td>0.400000</td>\n",
" <td>0.600000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes P(x)\n",
"Outlook \n",
"overcast 0.166667 0.444444 0.333333\n",
"rainy 0.333333 0.333333 0.333333\n",
"sunny 0.500000 0.222222 0.333333\n",
"P(c) 0.400000 0.600000 1.000000"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Outlook_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Temp_play.ix[0:3:,1]=Temp_play.ix[0:3,1]/Temp_play.ix[3,1]\n",
"Temp_play.ix[0:3:,0]=Temp_play.ix[0:3,0]/Temp_play.ix[3,0]\n",
"Temp_play.ix[0:3:,2]=Temp_play.ix[0:3,2]/Temp_play.ix[3,2]\n",
"Temp_play.ix[3,:]=Temp_play.ix[3,:]/Temp_play.ix[3,2]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>P(x)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Temp.</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>cool</th>\n",
" <td>0.200000</td>\n",
" <td>0.333333</td>\n",
" <td>0.285714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hot</th>\n",
" <td>0.400000</td>\n",
" <td>0.222222</td>\n",
" <td>0.285714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mild</th>\n",
" <td>0.400000</td>\n",
" <td>0.444444</td>\n",
" <td>0.428571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P(c)</th>\n",
" <td>0.357143</td>\n",
" <td>0.642857</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes P(x)\n",
"Temp. \n",
"cool 0.200000 0.333333 0.285714\n",
"hot 0.400000 0.222222 0.285714\n",
"mild 0.400000 0.444444 0.428571\n",
"P(c) 0.357143 0.642857 1.000000"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Temp_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Humidity_play.ix[0:2:,1]=Humidity_play.ix[0:2,1]/Humidity_play.ix[2,1]\n",
"Humidity_play.ix[0:2:,0]=Humidity_play.ix[0:2,0]/Humidity_play.ix[2,0]\n",
"Humidity_play.ix[0:2:,2]=Humidity_play.ix[0:2,2]/Humidity_play.ix[2,2]\n",
"Humidity_play.ix[2,:]=Humidity_play.ix[2,:]/Humidity_play.ix[2,2]"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>P(x)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Humidity</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>high</th>\n",
" <td>0.800000</td>\n",
" <td>0.333333</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>normal</th>\n",
" <td>0.200000</td>\n",
" <td>0.666667</td>\n",
" <td>0.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P(c)</th>\n",
" <td>0.357143</td>\n",
" <td>0.642857</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes P(x)\n",
"Humidity \n",
"high 0.800000 0.333333 0.5\n",
"normal 0.200000 0.666667 0.5\n",
"P(c) 0.357143 0.642857 1.0"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Humidity_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Windy_play.ix[0:2:,1]=Windy_play.ix[0:2,1]/Windy_play.ix[2,1]\n",
"Windy_play.ix[0:2:,0]=Windy_play.ix[0:2,0]/Windy_play.ix[2,0]\n",
"Windy_play.ix[0:2:,2]=Windy_play.ix[0:2,2]/Windy_play.ix[2,2]\n",
"Windy_play.ix[2,:]=Windy_play.ix[2,:]/Windy_play.ix[2,2]"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Play</th>\n",
" <th>no</th>\n",
" <th>yes</th>\n",
" <th>P(x)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Windy</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>false</th>\n",
" <td>0.400000</td>\n",
" <td>0.666667</td>\n",
" <td>0.571429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>true</th>\n",
" <td>0.600000</td>\n",
" <td>0.333333</td>\n",
" <td>0.428571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>P(c)</th>\n",
" <td>0.357143</td>\n",
" <td>0.642857</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Play no yes P(x)\n",
"Windy \n",
"false 0.400000 0.666667 0.571429\n",
"true 0.600000 0.333333 0.428571\n",
"P(c) 0.357143 0.642857 1.000000"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Windy_play.rename(columns={'All':'P(x)'} , index={'All':'P(c)'})"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Prediction"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pred_play=[]\n",
"for i in range(len(df.index)):\n",
" pcxy=Outlook_play.ix[df.ix[i,0],'yes']*Temp_play.ix[df.ix[i,1],'yes']*Humidity_play.ix[df.ix[i,2],'yes']*Windy_play.ix[df.ix[i,3],'yes']*g['yes']\n",
" pcxn=Outlook_play.ix[df.ix[i,0],'no']*Temp_play.ix[df.ix[i,1],'no']*Humidity_play.ix[df.ix[i,2],'no']*Windy_play.ix[df.ix[i,3],'no']*g['no']\n",
" yes_prob=pcxy/(pcxy+pcxn)\n",
" no_prob=pcxn/(pcxy+pcxn)\n",
" if(yes_prob > no_prob):\n",
" pred_play.append(\"yes\")\n",
" elif(yes_prob < no_prob):\n",
" pred_play.append(\"no\")"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>no</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
"0 no\n",
"1 no\n",
"2 yes\n",
"3 yes\n",
"4 yes\n",
"5 yes\n",
"6 yes\n",
"7 no\n",
"8 yes\n",
"9 yes\n",
"10 yes\n",
"11 yes\n",
"12 yes\n",
"13 no"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred_play=pd.DataFrame(pred_play)\n",
"pred_play"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Play</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>no</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>no</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Play\n",
"0 no\n",
"1 no\n",
"2 yes\n",
"3 yes\n",
"4 yes\n",
"5 no\n",
"6 yes\n",
"7 no\n",
"8 yes\n",
"9 yes\n",
"10 yes\n",
"11 yes\n",
"12 yes\n",
"13 no"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Expected=df[['Play']]\n",
"Expected"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Calculating the Accuracy"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn import metrics"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.928571428571\n"
]
}
],
"source": [
"print(metrics.accuracy_score(Expected,pred_play))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
@saksham164
Copy link

• Implement k-fold cross-validation (e.g., 5-fold) for the Naïve Bayesian classifier on a given dataset. Calculate the average accuracy of the classifier over the k folds and report the results.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment