Created
September 18, 2017 12:55
-
-
Save riow1983/ef54194aeb7548ff9a8715358dee8eb6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 110, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.naive_bayes import GaussianNB\n", | |
"gnb = GaussianNB()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 112, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn import preprocessing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 113, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.feature_extraction.text import CountVectorizer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame({\"患者氏名\":[\"Adam Smith\",\n", | |
"\"Napoleon Bonaparte\",\n", | |
"\"Adolf Hitler\",\n", | |
"\"Gabriel Lippmann\",\n", | |
"\"トーマス ベイズ\",\n", | |
"\"カール ハイド\",\n", | |
"\"マーク ザッカーバーグ\",\n", | |
"\"リー クワンユー\",\n", | |
"\"湯川 秀樹\",\n", | |
"\"朝永 振一郎\",\n", | |
"\"小林 誠\",\n", | |
"\"益川 敏英\",\n", | |
"\"毛 沢東\",\n", | |
"\"習 近平\",\n", | |
"\"金 日成\",\n", | |
"\"江 沢民\"]})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df[\"患者姓\"] = df[\"患者氏名\"].apply(lambda x: x.split(\" \")[0])\n", | |
"df[\"患者名\"] = df[\"患者氏名\"].apply(lambda x: x.split(\" \")[1])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 116, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" <th>患者姓</th>\n", | |
" <th>患者名</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Adam Smith</td>\n", | |
" <td>Adam</td>\n", | |
" <td>Smith</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Napoleon Bonaparte</td>\n", | |
" <td>Napoleon</td>\n", | |
" <td>Bonaparte</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Adolf Hitler</td>\n", | |
" <td>Adolf</td>\n", | |
" <td>Hitler</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Gabriel Lippmann</td>\n", | |
" <td>Gabriel</td>\n", | |
" <td>Lippmann</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>トーマス ベイズ</td>\n", | |
" <td>トーマス</td>\n", | |
" <td>ベイズ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>カール ハイド</td>\n", | |
" <td>カール</td>\n", | |
" <td>ハイド</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>マーク ザッカーバーグ</td>\n", | |
" <td>マーク</td>\n", | |
" <td>ザッカーバーグ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>リー クワンユー</td>\n", | |
" <td>リー</td>\n", | |
" <td>クワンユー</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>湯川 秀樹</td>\n", | |
" <td>湯川</td>\n", | |
" <td>秀樹</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>朝永 振一郎</td>\n", | |
" <td>朝永</td>\n", | |
" <td>振一郎</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>小林 誠</td>\n", | |
" <td>小林</td>\n", | |
" <td>誠</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>益川 敏英</td>\n", | |
" <td>益川</td>\n", | |
" <td>敏英</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>毛 沢東</td>\n", | |
" <td>毛</td>\n", | |
" <td>沢東</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>習 近平</td>\n", | |
" <td>習</td>\n", | |
" <td>近平</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>金 日成</td>\n", | |
" <td>金</td>\n", | |
" <td>日成</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>江 沢民</td>\n", | |
" <td>江</td>\n", | |
" <td>沢民</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名 患者姓 患者名\n", | |
"0 Adam Smith Adam Smith\n", | |
"1 Napoleon Bonaparte Napoleon Bonaparte\n", | |
"2 Adolf Hitler Adolf Hitler\n", | |
"3 Gabriel Lippmann Gabriel Lippmann\n", | |
"4 トーマス ベイズ トーマス ベイズ\n", | |
"5 カール ハイド カール ハイド\n", | |
"6 マーク ザッカーバーグ マーク ザッカーバーグ\n", | |
"7 リー クワンユー リー クワンユー\n", | |
"8 湯川 秀樹 湯川 秀樹\n", | |
"9 朝永 振一郎 朝永 振一郎\n", | |
"10 小林 誠 小林 誠\n", | |
"11 益川 敏英 益川 敏英\n", | |
"12 毛 沢東 毛 沢東\n", | |
"13 習 近平 習 近平\n", | |
"14 金 日成 金 日成\n", | |
"15 江 沢民 江 沢民" | |
] | |
}, | |
"execution_count": 116, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 117, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"cv = CountVectorizer()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 118, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"counts = cv.fit_transform(df[\"患者氏名\"].values)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 119, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"counts_array = counts.toarray()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 120, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#df[\"患者氏名_array\"] = df[\"患者氏名_cv\"].apply(lambda x: x.toarray())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 121, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df[\"患者氏名文字数\"] = df[\"患者氏名\"].apply(lambda x: len(x))\n", | |
"df[\"患者姓文字数\"] = df[\"患者姓\"].apply(lambda x: len(x))\n", | |
"df[\"患者名文字数\"] = df[\"患者名\"].apply(lambda x: len(x))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 122, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" <th>患者姓</th>\n", | |
" <th>患者名</th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Adam Smith</td>\n", | |
" <td>Adam</td>\n", | |
" <td>Smith</td>\n", | |
" <td>10</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Napoleon Bonaparte</td>\n", | |
" <td>Napoleon</td>\n", | |
" <td>Bonaparte</td>\n", | |
" <td>18</td>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Adolf Hitler</td>\n", | |
" <td>Adolf</td>\n", | |
" <td>Hitler</td>\n", | |
" <td>12</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Gabriel Lippmann</td>\n", | |
" <td>Gabriel</td>\n", | |
" <td>Lippmann</td>\n", | |
" <td>16</td>\n", | |
" <td>7</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>トーマス ベイズ</td>\n", | |
" <td>トーマス</td>\n", | |
" <td>ベイズ</td>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>カール ハイド</td>\n", | |
" <td>カール</td>\n", | |
" <td>ハイド</td>\n", | |
" <td>7</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>マーク ザッカーバーグ</td>\n", | |
" <td>マーク</td>\n", | |
" <td>ザッカーバーグ</td>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>リー クワンユー</td>\n", | |
" <td>リー</td>\n", | |
" <td>クワンユー</td>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>湯川 秀樹</td>\n", | |
" <td>湯川</td>\n", | |
" <td>秀樹</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>朝永 振一郎</td>\n", | |
" <td>朝永</td>\n", | |
" <td>振一郎</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>小林 誠</td>\n", | |
" <td>小林</td>\n", | |
" <td>誠</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>益川 敏英</td>\n", | |
" <td>益川</td>\n", | |
" <td>敏英</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>毛 沢東</td>\n", | |
" <td>毛</td>\n", | |
" <td>沢東</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>習 近平</td>\n", | |
" <td>習</td>\n", | |
" <td>近平</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>金 日成</td>\n", | |
" <td>金</td>\n", | |
" <td>日成</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>江 沢民</td>\n", | |
" <td>江</td>\n", | |
" <td>沢民</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名 患者姓 患者名 患者氏名文字数 患者姓文字数 患者名文字数\n", | |
"0 Adam Smith Adam Smith 10 4 5\n", | |
"1 Napoleon Bonaparte Napoleon Bonaparte 18 8 9\n", | |
"2 Adolf Hitler Adolf Hitler 12 5 6\n", | |
"3 Gabriel Lippmann Gabriel Lippmann 16 7 8\n", | |
"4 トーマス ベイズ トーマス ベイズ 8 4 3\n", | |
"5 カール ハイド カール ハイド 7 3 3\n", | |
"6 マーク ザッカーバーグ マーク ザッカーバーグ 11 3 7\n", | |
"7 リー クワンユー リー クワンユー 8 2 5\n", | |
"8 湯川 秀樹 湯川 秀樹 5 2 2\n", | |
"9 朝永 振一郎 朝永 振一郎 6 2 3\n", | |
"10 小林 誠 小林 誠 4 2 1\n", | |
"11 益川 敏英 益川 敏英 5 2 2\n", | |
"12 毛 沢東 毛 沢東 4 1 2\n", | |
"13 習 近平 習 近平 4 1 2\n", | |
"14 金 日成 金 日成 4 1 2\n", | |
"15 江 沢民 江 沢民 4 1 2" | |
] | |
}, | |
"execution_count": 122, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df[\"判定\"] = \"\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 124, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df.loc[:4, \"判定\"] = \"アルファベット外国人\"\n", | |
"df.loc[4:8, \"判定\"] = \"カタカナ外国人\"\n", | |
"df.loc[8:12, \"判定\"] = \"日本人\"\n", | |
"df.loc[12:, \"判定\"] = \"漢字外国人\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 125, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" <th>患者姓</th>\n", | |
" <th>患者名</th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" <th>判定</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Adam Smith</td>\n", | |
" <td>Adam</td>\n", | |
" <td>Smith</td>\n", | |
" <td>10</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Napoleon Bonaparte</td>\n", | |
" <td>Napoleon</td>\n", | |
" <td>Bonaparte</td>\n", | |
" <td>18</td>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Adolf Hitler</td>\n", | |
" <td>Adolf</td>\n", | |
" <td>Hitler</td>\n", | |
" <td>12</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Gabriel Lippmann</td>\n", | |
" <td>Gabriel</td>\n", | |
" <td>Lippmann</td>\n", | |
" <td>16</td>\n", | |
" <td>7</td>\n", | |
" <td>8</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>トーマス ベイズ</td>\n", | |
" <td>トーマス</td>\n", | |
" <td>ベイズ</td>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" <td>3</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>カール ハイド</td>\n", | |
" <td>カール</td>\n", | |
" <td>ハイド</td>\n", | |
" <td>7</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>マーク ザッカーバーグ</td>\n", | |
" <td>マーク</td>\n", | |
" <td>ザッカーバーグ</td>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>リー クワンユー</td>\n", | |
" <td>リー</td>\n", | |
" <td>クワンユー</td>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>湯川 秀樹</td>\n", | |
" <td>湯川</td>\n", | |
" <td>秀樹</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>朝永 振一郎</td>\n", | |
" <td>朝永</td>\n", | |
" <td>振一郎</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>小林 誠</td>\n", | |
" <td>小林</td>\n", | |
" <td>誠</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>益川 敏英</td>\n", | |
" <td>益川</td>\n", | |
" <td>敏英</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>毛 沢東</td>\n", | |
" <td>毛</td>\n", | |
" <td>沢東</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>習 近平</td>\n", | |
" <td>習</td>\n", | |
" <td>近平</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>金 日成</td>\n", | |
" <td>金</td>\n", | |
" <td>日成</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>江 沢民</td>\n", | |
" <td>江</td>\n", | |
" <td>沢民</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名 患者姓 患者名 患者氏名文字数 患者姓文字数 患者名文字数 \\\n", | |
"0 Adam Smith Adam Smith 10 4 5 \n", | |
"1 Napoleon Bonaparte Napoleon Bonaparte 18 8 9 \n", | |
"2 Adolf Hitler Adolf Hitler 12 5 6 \n", | |
"3 Gabriel Lippmann Gabriel Lippmann 16 7 8 \n", | |
"4 トーマス ベイズ トーマス ベイズ 8 4 3 \n", | |
"5 カール ハイド カール ハイド 7 3 3 \n", | |
"6 マーク ザッカーバーグ マーク ザッカーバーグ 11 3 7 \n", | |
"7 リー クワンユー リー クワンユー 8 2 5 \n", | |
"8 湯川 秀樹 湯川 秀樹 5 2 2 \n", | |
"9 朝永 振一郎 朝永 振一郎 6 2 3 \n", | |
"10 小林 誠 小林 誠 4 2 1 \n", | |
"11 益川 敏英 益川 敏英 5 2 2 \n", | |
"12 毛 沢東 毛 沢東 4 1 2 \n", | |
"13 習 近平 習 近平 4 1 2 \n", | |
"14 金 日成 金 日成 4 1 2 \n", | |
"15 江 沢民 江 沢民 4 1 2 \n", | |
"\n", | |
" 判定 \n", | |
"0 アルファベット外国人 \n", | |
"1 アルファベット外国人 \n", | |
"2 アルファベット外国人 \n", | |
"3 アルファベット外国人 \n", | |
"4 カタカナ外国人 \n", | |
"5 カタカナ外国人 \n", | |
"6 カタカナ外国人 \n", | |
"7 カタカナ外国人 \n", | |
"8 日本人 \n", | |
"9 日本人 \n", | |
"10 日本人 \n", | |
"11 日本人 \n", | |
"12 漢字外国人 \n", | |
"13 漢字外国人 \n", | |
"14 漢字外国人 \n", | |
"15 漢字外国人 " | |
] | |
}, | |
"execution_count": 125, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 126, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"le = preprocessing.LabelEncoder()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#df[\"患者氏名_cat\"] = le.fit_transform(df[\"患者氏名\"])\n", | |
"#df[\"患者姓_cat\"] = le.fit_transform(df[\"患者姓\"])\n", | |
"#df[\"患者名_cat\"] = le.fit_transform(df[\"患者名\"])\n", | |
"df[\"判定_cat\"] = le.fit_transform(df[\"判定\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 128, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" <th>患者姓</th>\n", | |
" <th>患者名</th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" <th>判定</th>\n", | |
" <th>判定_cat</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Adam Smith</td>\n", | |
" <td>Adam</td>\n", | |
" <td>Smith</td>\n", | |
" <td>10</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Napoleon Bonaparte</td>\n", | |
" <td>Napoleon</td>\n", | |
" <td>Bonaparte</td>\n", | |
" <td>18</td>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Adolf Hitler</td>\n", | |
" <td>Adolf</td>\n", | |
" <td>Hitler</td>\n", | |
" <td>12</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Gabriel Lippmann</td>\n", | |
" <td>Gabriel</td>\n", | |
" <td>Lippmann</td>\n", | |
" <td>16</td>\n", | |
" <td>7</td>\n", | |
" <td>8</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>トーマス ベイズ</td>\n", | |
" <td>トーマス</td>\n", | |
" <td>ベイズ</td>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" <td>3</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>カール ハイド</td>\n", | |
" <td>カール</td>\n", | |
" <td>ハイド</td>\n", | |
" <td>7</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>マーク ザッカーバーグ</td>\n", | |
" <td>マーク</td>\n", | |
" <td>ザッカーバーグ</td>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>リー クワンユー</td>\n", | |
" <td>リー</td>\n", | |
" <td>クワンユー</td>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>湯川 秀樹</td>\n", | |
" <td>湯川</td>\n", | |
" <td>秀樹</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>朝永 振一郎</td>\n", | |
" <td>朝永</td>\n", | |
" <td>振一郎</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>小林 誠</td>\n", | |
" <td>小林</td>\n", | |
" <td>誠</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>益川 敏英</td>\n", | |
" <td>益川</td>\n", | |
" <td>敏英</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>毛 沢東</td>\n", | |
" <td>毛</td>\n", | |
" <td>沢東</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>習 近平</td>\n", | |
" <td>習</td>\n", | |
" <td>近平</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>金 日成</td>\n", | |
" <td>金</td>\n", | |
" <td>日成</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>江 沢民</td>\n", | |
" <td>江</td>\n", | |
" <td>沢民</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名 患者姓 患者名 患者氏名文字数 患者姓文字数 患者名文字数 \\\n", | |
"0 Adam Smith Adam Smith 10 4 5 \n", | |
"1 Napoleon Bonaparte Napoleon Bonaparte 18 8 9 \n", | |
"2 Adolf Hitler Adolf Hitler 12 5 6 \n", | |
"3 Gabriel Lippmann Gabriel Lippmann 16 7 8 \n", | |
"4 トーマス ベイズ トーマス ベイズ 8 4 3 \n", | |
"5 カール ハイド カール ハイド 7 3 3 \n", | |
"6 マーク ザッカーバーグ マーク ザッカーバーグ 11 3 7 \n", | |
"7 リー クワンユー リー クワンユー 8 2 5 \n", | |
"8 湯川 秀樹 湯川 秀樹 5 2 2 \n", | |
"9 朝永 振一郎 朝永 振一郎 6 2 3 \n", | |
"10 小林 誠 小林 誠 4 2 1 \n", | |
"11 益川 敏英 益川 敏英 5 2 2 \n", | |
"12 毛 沢東 毛 沢東 4 1 2 \n", | |
"13 習 近平 習 近平 4 1 2 \n", | |
"14 金 日成 金 日成 4 1 2 \n", | |
"15 江 沢民 江 沢民 4 1 2 \n", | |
"\n", | |
" 判定 判定_cat \n", | |
"0 アルファベット外国人 0 \n", | |
"1 アルファベット外国人 0 \n", | |
"2 アルファベット外国人 0 \n", | |
"3 アルファベット外国人 0 \n", | |
"4 カタカナ外国人 1 \n", | |
"5 カタカナ外国人 1 \n", | |
"6 カタカナ外国人 1 \n", | |
"7 カタカナ外国人 1 \n", | |
"8 日本人 2 \n", | |
"9 日本人 2 \n", | |
"10 日本人 2 \n", | |
"11 日本人 2 \n", | |
"12 漢字外国人 3 \n", | |
"13 漢字外国人 3 \n", | |
"14 漢字外国人 3 \n", | |
"15 漢字外国人 3 " | |
] | |
}, | |
"execution_count": 128, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 129, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df[df.columns[df.columns.str.contains(\"cat\")|\n", | |
" df.columns.str.contains(\"文字数\")|\n", | |
" df.columns.str.contains(\"array\")]]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 130, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" <th>判定_cat</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>10</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>18</td>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>12</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>16</td>\n", | |
" <td>7</td>\n", | |
" <td>8</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>7</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名文字数 患者姓文字数 患者名文字数 判定_cat\n", | |
"0 10 4 5 0\n", | |
"1 18 8 9 0\n", | |
"2 12 5 6 0\n", | |
"3 16 7 8 0\n", | |
"4 8 4 3 1\n", | |
"5 7 3 3 1\n", | |
"6 11 3 7 1\n", | |
"7 8 2 5 1\n", | |
"8 5 2 2 2\n", | |
"9 6 2 3 2\n", | |
"10 4 2 1 2\n", | |
"11 5 2 2 2\n", | |
"12 4 1 2 3\n", | |
"13 4 1 2 3\n", | |
"14 4 1 2 3\n", | |
"15 4 1 2 3" | |
] | |
}, | |
"execution_count": 130, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 162, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tmp = df.iloc[:, :-1].values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[10, 4, 5],\n", | |
" [18, 8, 9],\n", | |
" [12, 5, 6],\n", | |
" [16, 7, 8],\n", | |
" [ 8, 4, 3],\n", | |
" [ 7, 3, 3],\n", | |
" [11, 3, 7],\n", | |
" [ 8, 2, 5],\n", | |
" [ 5, 2, 2],\n", | |
" [ 6, 2, 3],\n", | |
" [ 4, 2, 1],\n", | |
" [ 5, 2, 2],\n", | |
" [ 4, 1, 2],\n", | |
" [ 4, 1, 2],\n", | |
" [ 4, 1, 2],\n", | |
" [ 4, 1, 2]])" | |
] | |
}, | |
"execution_count": 164, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tmp" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 166, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"X = np.array([np.append(x,y) for x,y in zip(counts_array, tmp)])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 167, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#X = np.append(df.iloc[:, :-1].values, counts_array)\n", | |
"y = df.iloc[:, -1].values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 168, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4, 5],\n", | |
" [ 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 8, 9],\n", | |
" [ 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 5, 6],\n", | |
" [ 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 7, 8],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 4, 3],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 3, 3],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 3, 7],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 2, 5],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 5, 2, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 6, 2, 3],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 1],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 5, 2, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 1, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 1, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 1, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, 2]], dtype=int64)" | |
] | |
}, | |
"execution_count": 168, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 169, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])" | |
] | |
}, | |
"execution_count": 169, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 170, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"GaussianNB(priors=None)" | |
] | |
}, | |
"execution_count": 170, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gnb.fit(X, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 171, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tf = pd.DataFrame({\"患者氏名\": [\n", | |
"\"Tom Hanks\",\n", | |
"\"Robert De Niro\",\n", | |
"\"Gen Hoshino\",\n", | |
"\"金 正男\",\n", | |
"\"朴 璐美\",\n", | |
"\"李 小龍\",\n", | |
"\"林 彪\",\n", | |
"\"古歩道 ベンジャミン\",\n", | |
"\"キム イルソン\",\n", | |
"\"山下 奉文\",\n", | |
"\"宮沢 賢治\",\n", | |
"\"徳川 家康\",\n", | |
"\"井浦 新\",\n", | |
"\"窪塚 洋介\",\n", | |
"\"伊藤 博文\",\n", | |
"\"近衛 文麿\"\n", | |
"]\n", | |
"})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 172, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Tom Hanks</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Robert De Niro</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Gen Hoshino</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>金 正男</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>朴 璐美</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>李 小龍</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>林 彪</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>古歩道 ベンジャミン</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>キム イルソン</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>山下 奉文</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>宮沢 賢治</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>徳川 家康</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>井浦 新</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>窪塚 洋介</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>伊藤 博文</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>近衛 文麿</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名\n", | |
"0 Tom Hanks\n", | |
"1 Robert De Niro\n", | |
"2 Gen Hoshino\n", | |
"3 金 正男\n", | |
"4 朴 璐美\n", | |
"5 李 小龍\n", | |
"6 林 彪\n", | |
"7 古歩道 ベンジャミン\n", | |
"8 キム イルソン\n", | |
"9 山下 奉文\n", | |
"10 宮沢 賢治\n", | |
"11 徳川 家康\n", | |
"12 井浦 新\n", | |
"13 窪塚 洋介\n", | |
"14 伊藤 博文\n", | |
"15 近衛 文麿" | |
] | |
}, | |
"execution_count": 172, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 173, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"counts2 = cv.fit_transform(tf[\"患者氏名\"].values)\n", | |
"counts2_array = counts2.toarray()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 174, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 1, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 1, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 1, 0, 1, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0],\n", | |
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", | |
" 0, 0, 0, 0, 1]], dtype=int64)" | |
] | |
}, | |
"execution_count": 174, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"counts2_array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 175, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tf[\"患者姓\"] = tf[\"患者氏名\"].apply(lambda x: x.split(\" \")[0])\n", | |
"tf[\"患者名\"] = tf[\"患者氏名\"].apply(lambda x: x.split(\" \")[1])\n", | |
"\n", | |
"tf[\"患者氏名文字数\"] = tf[\"患者氏名\"].apply(lambda x: len(x))\n", | |
"tf[\"患者姓文字数\"] = tf[\"患者姓\"].apply(lambda x: len(x))\n", | |
"tf[\"患者名文字数\"] = tf[\"患者名\"].apply(lambda x: len(x))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 176, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" <th>患者姓</th>\n", | |
" <th>患者名</th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Tom Hanks</td>\n", | |
" <td>Tom</td>\n", | |
" <td>Hanks</td>\n", | |
" <td>9</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Robert De Niro</td>\n", | |
" <td>Robert</td>\n", | |
" <td>De</td>\n", | |
" <td>14</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Gen Hoshino</td>\n", | |
" <td>Gen</td>\n", | |
" <td>Hoshino</td>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>金 正男</td>\n", | |
" <td>金</td>\n", | |
" <td>正男</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>朴 璐美</td>\n", | |
" <td>朴</td>\n", | |
" <td>璐美</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>李 小龍</td>\n", | |
" <td>李</td>\n", | |
" <td>小龍</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>林 彪</td>\n", | |
" <td>林</td>\n", | |
" <td>彪</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>古歩道 ベンジャミン</td>\n", | |
" <td>古歩道</td>\n", | |
" <td>ベンジャミン</td>\n", | |
" <td>10</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>キム イルソン</td>\n", | |
" <td>キム</td>\n", | |
" <td>イルソン</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>山下 奉文</td>\n", | |
" <td>山下</td>\n", | |
" <td>奉文</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>宮沢 賢治</td>\n", | |
" <td>宮沢</td>\n", | |
" <td>賢治</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>徳川 家康</td>\n", | |
" <td>徳川</td>\n", | |
" <td>家康</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>井浦 新</td>\n", | |
" <td>井浦</td>\n", | |
" <td>新</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>窪塚 洋介</td>\n", | |
" <td>窪塚</td>\n", | |
" <td>洋介</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>伊藤 博文</td>\n", | |
" <td>伊藤</td>\n", | |
" <td>博文</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>近衛 文麿</td>\n", | |
" <td>近衛</td>\n", | |
" <td>文麿</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名 患者姓 患者名 患者氏名文字数 患者姓文字数 患者名文字数\n", | |
"0 Tom Hanks Tom Hanks 9 3 5\n", | |
"1 Robert De Niro Robert De 14 6 2\n", | |
"2 Gen Hoshino Gen Hoshino 11 3 7\n", | |
"3 金 正男 金 正男 4 1 2\n", | |
"4 朴 璐美 朴 璐美 4 1 2\n", | |
"5 李 小龍 李 小龍 4 1 2\n", | |
"6 林 彪 林 彪 3 1 1\n", | |
"7 古歩道 ベンジャミン 古歩道 ベンジャミン 10 3 6\n", | |
"8 キム イルソン キム イルソン 7 2 4\n", | |
"9 山下 奉文 山下 奉文 5 2 2\n", | |
"10 宮沢 賢治 宮沢 賢治 5 2 2\n", | |
"11 徳川 家康 徳川 家康 5 2 2\n", | |
"12 井浦 新 井浦 新 4 2 1\n", | |
"13 窪塚 洋介 窪塚 洋介 5 2 2\n", | |
"14 伊藤 博文 伊藤 博文 5 2 2\n", | |
"15 近衛 文麿 近衛 文麿 5 2 2" | |
] | |
}, | |
"execution_count": 176, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 177, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tf.loc[:3, \"判定\"] = \"アルファベット外国人\"\n", | |
"tf.loc[3:7, \"判定\"] = \"漢字外国人\"\n", | |
"tf.loc[7:9, \"判定\"] = \"カタカナ外国人\"\n", | |
"tf.loc[9:, \"判定\"] = \"日本人\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 178, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" <th>患者姓</th>\n", | |
" <th>患者名</th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" <th>判定</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Tom Hanks</td>\n", | |
" <td>Tom</td>\n", | |
" <td>Hanks</td>\n", | |
" <td>9</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Robert De Niro</td>\n", | |
" <td>Robert</td>\n", | |
" <td>De</td>\n", | |
" <td>14</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Gen Hoshino</td>\n", | |
" <td>Gen</td>\n", | |
" <td>Hoshino</td>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>金 正男</td>\n", | |
" <td>金</td>\n", | |
" <td>正男</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>朴 璐美</td>\n", | |
" <td>朴</td>\n", | |
" <td>璐美</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>李 小龍</td>\n", | |
" <td>李</td>\n", | |
" <td>小龍</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>林 彪</td>\n", | |
" <td>林</td>\n", | |
" <td>彪</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>漢字外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>古歩道 ベンジャミン</td>\n", | |
" <td>古歩道</td>\n", | |
" <td>ベンジャミン</td>\n", | |
" <td>10</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>キム イルソン</td>\n", | |
" <td>キム</td>\n", | |
" <td>イルソン</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>山下 奉文</td>\n", | |
" <td>山下</td>\n", | |
" <td>奉文</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>宮沢 賢治</td>\n", | |
" <td>宮沢</td>\n", | |
" <td>賢治</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>徳川 家康</td>\n", | |
" <td>徳川</td>\n", | |
" <td>家康</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>井浦 新</td>\n", | |
" <td>井浦</td>\n", | |
" <td>新</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>窪塚 洋介</td>\n", | |
" <td>窪塚</td>\n", | |
" <td>洋介</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>伊藤 博文</td>\n", | |
" <td>伊藤</td>\n", | |
" <td>博文</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>近衛 文麿</td>\n", | |
" <td>近衛</td>\n", | |
" <td>文麿</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名 患者姓 患者名 患者氏名文字数 患者姓文字数 患者名文字数 判定\n", | |
"0 Tom Hanks Tom Hanks 9 3 5 アルファベット外国人\n", | |
"1 Robert De Niro Robert De 14 6 2 アルファベット外国人\n", | |
"2 Gen Hoshino Gen Hoshino 11 3 7 アルファベット外国人\n", | |
"3 金 正男 金 正男 4 1 2 漢字外国人\n", | |
"4 朴 璐美 朴 璐美 4 1 2 漢字外国人\n", | |
"5 李 小龍 李 小龍 4 1 2 漢字外国人\n", | |
"6 林 彪 林 彪 3 1 1 漢字外国人\n", | |
"7 古歩道 ベンジャミン 古歩道 ベンジャミン 10 3 6 カタカナ外国人\n", | |
"8 キム イルソン キム イルソン 7 2 4 カタカナ外国人\n", | |
"9 山下 奉文 山下 奉文 5 2 2 日本人\n", | |
"10 宮沢 賢治 宮沢 賢治 5 2 2 日本人\n", | |
"11 徳川 家康 徳川 家康 5 2 2 日本人\n", | |
"12 井浦 新 井浦 新 4 2 1 日本人\n", | |
"13 窪塚 洋介 窪塚 洋介 5 2 2 日本人\n", | |
"14 伊藤 博文 伊藤 博文 5 2 2 日本人\n", | |
"15 近衛 文麿 近衛 文麿 5 2 2 日本人" | |
] | |
}, | |
"execution_count": 178, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 179, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#tf[\"患者氏名_cat\"] = le.fit_transform(tf[\"患者氏名\"])\n", | |
"#tf[\"患者姓_cat\"] = le.fit_transform(tf[\"患者姓\"])\n", | |
"#tf[\"患者名_cat\"] = le.fit_transform(tf[\"患者名\"])\n", | |
"tf[\"判定_cat\"] = le.fit_transform(tf[\"判定\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 180, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名</th>\n", | |
" <th>患者姓</th>\n", | |
" <th>患者名</th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" <th>判定</th>\n", | |
" <th>判定_cat</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Tom Hanks</td>\n", | |
" <td>Tom</td>\n", | |
" <td>Hanks</td>\n", | |
" <td>9</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Robert De Niro</td>\n", | |
" <td>Robert</td>\n", | |
" <td>De</td>\n", | |
" <td>14</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Gen Hoshino</td>\n", | |
" <td>Gen</td>\n", | |
" <td>Hoshino</td>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>アルファベット外国人</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>金 正男</td>\n", | |
" <td>金</td>\n", | |
" <td>正男</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>朴 璐美</td>\n", | |
" <td>朴</td>\n", | |
" <td>璐美</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>李 小龍</td>\n", | |
" <td>李</td>\n", | |
" <td>小龍</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>林 彪</td>\n", | |
" <td>林</td>\n", | |
" <td>彪</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>漢字外国人</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>古歩道 ベンジャミン</td>\n", | |
" <td>古歩道</td>\n", | |
" <td>ベンジャミン</td>\n", | |
" <td>10</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>キム イルソン</td>\n", | |
" <td>キム</td>\n", | |
" <td>イルソン</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>カタカナ外国人</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>山下 奉文</td>\n", | |
" <td>山下</td>\n", | |
" <td>奉文</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>宮沢 賢治</td>\n", | |
" <td>宮沢</td>\n", | |
" <td>賢治</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>徳川 家康</td>\n", | |
" <td>徳川</td>\n", | |
" <td>家康</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>井浦 新</td>\n", | |
" <td>井浦</td>\n", | |
" <td>新</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>窪塚 洋介</td>\n", | |
" <td>窪塚</td>\n", | |
" <td>洋介</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>伊藤 博文</td>\n", | |
" <td>伊藤</td>\n", | |
" <td>博文</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>近衛 文麿</td>\n", | |
" <td>近衛</td>\n", | |
" <td>文麿</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>日本人</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名 患者姓 患者名 患者氏名文字数 患者姓文字数 患者名文字数 判定 \\\n", | |
"0 Tom Hanks Tom Hanks 9 3 5 アルファベット外国人 \n", | |
"1 Robert De Niro Robert De 14 6 2 アルファベット外国人 \n", | |
"2 Gen Hoshino Gen Hoshino 11 3 7 アルファベット外国人 \n", | |
"3 金 正男 金 正男 4 1 2 漢字外国人 \n", | |
"4 朴 璐美 朴 璐美 4 1 2 漢字外国人 \n", | |
"5 李 小龍 李 小龍 4 1 2 漢字外国人 \n", | |
"6 林 彪 林 彪 3 1 1 漢字外国人 \n", | |
"7 古歩道 ベンジャミン 古歩道 ベンジャミン 10 3 6 カタカナ外国人 \n", | |
"8 キム イルソン キム イルソン 7 2 4 カタカナ外国人 \n", | |
"9 山下 奉文 山下 奉文 5 2 2 日本人 \n", | |
"10 宮沢 賢治 宮沢 賢治 5 2 2 日本人 \n", | |
"11 徳川 家康 徳川 家康 5 2 2 日本人 \n", | |
"12 井浦 新 井浦 新 4 2 1 日本人 \n", | |
"13 窪塚 洋介 窪塚 洋介 5 2 2 日本人 \n", | |
"14 伊藤 博文 伊藤 博文 5 2 2 日本人 \n", | |
"15 近衛 文麿 近衛 文麿 5 2 2 日本人 \n", | |
"\n", | |
" 判定_cat \n", | |
"0 0 \n", | |
"1 0 \n", | |
"2 0 \n", | |
"3 3 \n", | |
"4 3 \n", | |
"5 3 \n", | |
"6 3 \n", | |
"7 1 \n", | |
"8 1 \n", | |
"9 2 \n", | |
"10 2 \n", | |
"11 2 \n", | |
"12 2 \n", | |
"13 2 \n", | |
"14 2 \n", | |
"15 2 " | |
] | |
}, | |
"execution_count": 180, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 181, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tf = tf[tf.columns[tf.columns.str.contains(\"cat\")|\n", | |
" tf.columns.str.contains(\"文字数\")]]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 182, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>患者氏名文字数</th>\n", | |
" <th>患者姓文字数</th>\n", | |
" <th>患者名文字数</th>\n", | |
" <th>判定_cat</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>9</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>14</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>11</td>\n", | |
" <td>3</td>\n", | |
" <td>7</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>10</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 患者氏名文字数 患者姓文字数 患者名文字数 判定_cat\n", | |
"0 9 3 5 0\n", | |
"1 14 6 2 0\n", | |
"2 11 3 7 0\n", | |
"3 4 1 2 3\n", | |
"4 4 1 2 3\n", | |
"5 4 1 2 3\n", | |
"6 3 1 1 3\n", | |
"7 10 3 6 1\n", | |
"8 7 2 4 1\n", | |
"9 5 2 2 2\n", | |
"10 5 2 2 2\n", | |
"11 5 2 2 2\n", | |
"12 4 2 1 2\n", | |
"13 5 2 2 2\n", | |
"14 5 2 2 2\n", | |
"15 5 2 2 2" | |
] | |
}, | |
"execution_count": 182, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 183, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"tmp_test = tf.iloc[:, :-1].values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 184, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 9, 3, 5],\n", | |
" [14, 6, 2],\n", | |
" [11, 3, 7],\n", | |
" [ 4, 1, 2],\n", | |
" [ 4, 1, 2],\n", | |
" [ 4, 1, 2],\n", | |
" [ 3, 1, 1],\n", | |
" [10, 3, 6],\n", | |
" [ 7, 2, 4],\n", | |
" [ 5, 2, 2],\n", | |
" [ 5, 2, 2],\n", | |
" [ 5, 2, 2],\n", | |
" [ 4, 2, 1],\n", | |
" [ 5, 2, 2],\n", | |
" [ 5, 2, 2],\n", | |
" [ 5, 2, 2]])" | |
] | |
}, | |
"execution_count": 184, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tmp_test" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 185, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"X_test = np.array([np.append(x,y) for x,y in zip(counts2_array, tmp_test)])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 186, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#X_test = tf.iloc[:, :-1].values\n", | |
"#X_test = counts2_array\n", | |
"y_test = tf.iloc[:, -1].values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 187, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 3, 5],\n", | |
" [ 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 6, 2],\n", | |
" [ 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 3, 7],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 1, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 1, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 1],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 3, 6],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 2, 4],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,\n", | |
" 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 5, 2, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 2, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", | |
" 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 2, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 1],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 5, 2, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 2, 2],\n", | |
" [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
" 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 5, 2, 2]], dtype=int64)" | |
] | |
}, | |
"execution_count": 187, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X_test" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 188, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0, 0, 0, 3, 3, 3, 3, 1, 1, 2, 2, 2, 2, 2, 2, 2])" | |
] | |
}, | |
"execution_count": 188, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_test" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 189, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0, 0, 0, 3, 3, 3, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2])" | |
] | |
}, | |
"execution_count": 189, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gnb.predict(X_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 190, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.8125" | |
] | |
}, | |
"execution_count": 190, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gnb.score(X_test, y_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment