lasershow/classify_junk_mail.ipynb

## classify_junk_mail.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_table(\n",
    "             'smsspamcollection/SMSSpamCollection', \n",
    "              names=['label', 'message']\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ham</td>\n",
       "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ham</td>\n",
       "      <td>Ok lar... Joking wif u oni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>spam</td>\n",
       "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ham</td>\n",
       "      <td>U dun say so early hor... U c already then say...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ham</td>\n",
       "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  label                                            message\n",
       "0   ham  Go until jurong point, crazy.. Available only ...\n",
       "1   ham                      Ok lar... Joking wif u oni...\n",
       "2  spam  Free entry in 2 a wkly comp to win FA Cup fina...\n",
       "3   ham  U dun say so early hor... U c already then say...\n",
       "4   ham  Nah I don't think he goes to usf, he lives aro..."
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5572 entries, 0 to 5571\n",
      "Data columns (total 2 columns):\n",
      "label      5572 non-null object\n",
      "message    5572 non-null object\n",
      "dtypes: object(2)\n",
      "memory usage: 87.1+ KB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5572, 2)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>Ok lar... Joking wif u oni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>U dun say so early hor... U c already then say...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   label                                            message\n",
       "0      0  Go until jurong point, crazy.. Available only ...\n",
       "1      0                      Ok lar... Joking wif u oni...\n",
       "2      1  Free entry in 2 a wkly comp to win FA Cup fina...\n",
       "3      0  U dun say so early hor... U c already then say...\n",
       "4      0  Nah I don't think he goes to usf, he lives aro..."
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['label'] = df.label.map({'ham':0, 'spam':1})\n",
    "print(df.shape)\n",
    "df.head() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "count_vec_sample = CountVectorizer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "messages = ['Thank you for calling.',\n",
    "                      'Thank you for your inquiry.',\n",
    "                      'Thanks for keeping in touch.',\n",
    "                      'Thanks for getting in touch with me?']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
       "        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
       "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
       "        ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
       "        strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
       "        tokenizer=None, vocabulary=None)"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_vec_sample.fit(messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "data = count_vec_sample.transform(messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],\n",
       "        [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1],\n",
       "        [0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0],\n",
       "        [0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0]])"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.todense()   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "count_vec = CountVectorizer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
       "        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
       "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
       "        ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
       "        strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
       "        tokenizer=None, vocabulary=None)"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_vec.fit(df.message)\n",
    "\n",
    "#出力する場合はコメントをはずしてください。\n",
    "# count_vec.vocabulary_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(df['message'], \n",
    "                                                    df['label'], \n",
    "                                                    random_state=1)\n",
    "\n",
    "count_vector = CountVectorizer()\n",
    "count_vector.fit(X_train)\n",
    "X_train = count_vector.transform(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "# count_vector.vocabulary_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_test = count_vector.transform(X_test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.naive_bayes import MultinomialNB\n",
    "naive_bayes = MultinomialNB()\n",
    "naive_bayes.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions = naive_bayes.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy score:  0.9885139985642498\n",
      "Precision score:  0.9720670391061452\n",
      "Recall score:  0.9405405405405406\n",
      "F1 score:  0.9560439560439562\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
    "print('Accuracy score: ', format(accuracy_score(y_test, predictions)))\n",
    "print('Precision score: ', format(precision_score(y_test, predictions)))\n",
    "print('Recall score: ', format(recall_score(y_test, predictions)))\n",
    "print('F1 score: ', format(f1_score(y_test, predictions)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 評価関数について\n",
    "Accuracy score、Recallなどを見ていますが、実際にはどの評価関数を重要視するかを決定する必要があります。\n",
    "例えば、Recallの場合、\n",
    "\n",
    "\n",
    "recall = (スパムであると正しく判定できたメール数)/(迷惑メールの全体の数)\n",
    "\n",
    "\n",
    "であるので、迷惑メールをより検出することに、重きを置いているといえます。\n",
    "\n",
    "\n",
    "precision = (スパムと判定したメールのうち実際にスパムであった数）/（フィルターがスパムと判定したメール数）\n",
    "\n",
    "\n",
    "逆にPrecisionは迷惑メールがきちんと迷惑メールであるかということに重きを置いています。\n",
    "迷惑メールの場合、スパムで無いメールをスパムと認識しては、まずいのでPrecisionを重要視するなどの評価関数の使い分けが必要です。\n"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 54,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"df = pd.read_table(\n",
	" 'smsspamcollection/SMSSpamCollection', \n",
	" names=['label', 'message']\n",
	" )"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 55,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>label</th>\n",
	" <th>message</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>ham</td>\n",
	" <td>Go until jurong point, crazy.. Available only ...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>ham</td>\n",
	" <td>Ok lar... Joking wif u oni...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>spam</td>\n",
	" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>ham</td>\n",
	" <td>U dun say so early hor... U c already then say...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>ham</td>\n",
	" <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" label message\n",
	"0 ham Go until jurong point, crazy.. Available only ...\n",
	"1 ham Ok lar... Joking wif u oni...\n",
	"2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
	"3 ham U dun say so early hor... U c already then say...\n",
	"4 ham Nah I don't think he goes to usf, he lives aro..."
	]
	},
	"execution_count": 55,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 56,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"<class 'pandas.core.frame.DataFrame'>\n",
	"RangeIndex: 5572 entries, 0 to 5571\n",
	"Data columns (total 2 columns):\n",
	"label 5572 non-null object\n",
	"message 5572 non-null object\n",
	"dtypes: object(2)\n",
	"memory usage: 87.1+ KB\n"
	]
	}
	],
	"source": [
	"df.info()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 57,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(5572, 2)\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>label</th>\n",
	" <th>message</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>0</td>\n",
	" <td>Go until jurong point, crazy.. Available only ...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>0</td>\n",
	" <td>Ok lar... Joking wif u oni...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1</td>\n",
	" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>0</td>\n",
	" <td>U dun say so early hor... U c already then say...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>0</td>\n",
	" <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" label message\n",
	"0 0 Go until jurong point, crazy.. Available only ...\n",
	"1 0 Ok lar... Joking wif u oni...\n",
	"2 1 Free entry in 2 a wkly comp to win FA Cup fina...\n",
	"3 0 U dun say so early hor... U c already then say...\n",
	"4 0 Nah I don't think he goes to usf, he lives aro..."
	]
	},
	"execution_count": 57,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df['label'] = df.label.map({'ham':0, 'spam':1})\n",
	"print(df.shape)\n",
	"df.head() "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 58,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from sklearn.feature_extraction.text import CountVectorizer\n",
	"count_vec_sample = CountVectorizer()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 59,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"messages = ['Thank you for calling.',\n",
	" 'Thank you for your inquiry.',\n",
	" 'Thanks for keeping in touch.',\n",
	" 'Thanks for getting in touch with me?']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 60,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
	" dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
	" lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
	" ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
	" strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
	" tokenizer=None, vocabulary=None)"
	]
	},
	"execution_count": 60,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"count_vec_sample.fit(messages)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 61,
	"metadata": {
	"collapsed": true,
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"data = count_vec_sample.transform(messages)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 62,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"matrix([[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],\n",
	" [0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1],\n",
	" [0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0],\n",
	" [0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0]])"
	]
	},
	"execution_count": 62,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.todense() "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 63,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"count_vec = CountVectorizer()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 64,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
	" dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
	" lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
	" ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
	" strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
	" tokenizer=None, vocabulary=None)"
	]
	},
	"execution_count": 64,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"count_vec.fit(df.message)\n",
	"\n",
	"#出力する場合はコメントをはずしてください。\n",
	"# count_vec.vocabulary_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 65,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from sklearn.model_selection import train_test_split\n",
	"\n",
	"X_train, X_test, y_train, y_test = train_test_split(df['message'], \n",
	" df['label'], \n",
	" random_state=1)\n",
	"\n",
	"count_vector = CountVectorizer()\n",
	"count_vector.fit(X_train)\n",
	"X_train = count_vector.transform(X_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"metadata": {},
	"outputs": [],
	"source": [
	"# count_vector.vocabulary_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 67,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"X_test = count_vector.transform(X_test)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
	]
	},
	"execution_count": 68,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from sklearn.naive_bayes import MultinomialNB\n",
	"naive_bayes = MultinomialNB()\n",
	"naive_bayes.fit(X_train, y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"metadata": {},
	"outputs": [],
	"source": [
	"predictions = naive_bayes.predict(X_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 70,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Accuracy score: 0.9885139985642498\n",
	"Precision score: 0.9720670391061452\n",
	"Recall score: 0.9405405405405406\n",
	"F1 score: 0.9560439560439562\n"
	]
	}
	],
	"source": [
	"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
	"print('Accuracy score: ', format(accuracy_score(y_test, predictions)))\n",
	"print('Precision score: ', format(precision_score(y_test, predictions)))\n",
	"print('Recall score: ', format(recall_score(y_test, predictions)))\n",
	"print('F1 score: ', format(f1_score(y_test, predictions)))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## 評価関数について\n",
	"Accuracy score、Recallなどを見ていますが、実際にはどの評価関数を重要視するかを決定する必要があります。\n",
	"例えば、Recallの場合、\n",
	"\n",
	"\n",
	"recall = (スパムであると正しく判定できたメール数)/(迷惑メールの全体の数)\n",
	"\n",
	"\n",
	"であるので、迷惑メールをより検出することに、重きを置いているといえます。\n",
	"\n",
	"\n",
	"precision = (スパムと判定したメールのうち実際にスパムであった数）/（フィルターがスパムと判定したメール数）\n",
	"\n",
	"\n",
	"逆にPrecisionは迷惑メールがきちんと迷惑メールであるかということに重きを置いています。\n",
	"迷惑メールの場合、スパムで無いメールをスパムと認識しては、まずいのでPrecisionを重要視するなどの評価関数の使い分けが必要です。\n"
	]
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}