Created
September 19, 2022 14:53
-
-
Save yssymmt/c55dc8db4e659b38f4f07608a0b449d3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "28c8df96", | |
"metadata": {}, | |
"source": [ | |
"#06: bow2nb" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "cecfa7a5", | |
"metadata": {}, | |
"source": [ | |
"####パッケージの読み込み" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "0d6727ff", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from sqlalchemy import create_engine\n", | |
"import teradatasql\n", | |
"import teradatasqlalchemy\n", | |
"from sklearn.feature_extraction.text import CountVectorizer\n", | |
"from sklearn.naive_bayes import MultinomialNB" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7ef55e42", | |
"metadata": {}, | |
"source": [ | |
"####Teradataへの接続、sqlalchemy エンジンを作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "a79a73d7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"host = \"192.168.999.999\"\n", | |
"user = \"jumbo\"\n", | |
"password = \"mambo\"\n", | |
"connstr = \"teradatasql://{user}:{password}@{host}\".format(host=host, user=user, password=password)\n", | |
"engine = create_engine(connstr)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7436a0a9", | |
"metadata": {}, | |
"source": [ | |
"####学習データの取得" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "51ddb7ac", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cat</th>\n", | |
" <th>word</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>若林</td>\n", | |
" <td>若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>若林</td>\n", | |
" <td>漫才 ツッコミ 担当 たりないふたり ボケ 担当</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>若林</td>\n", | |
" <td>ナナメ 夕暮れ 他 本 出す</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>若林</td>\n", | |
" <td>深夜 一人 バスケットボール スリーポイント 練習</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>若林</td>\n", | |
" <td>プライベート バスケットボール 足 怪我</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>若林</td>\n", | |
" <td>星野源 日本 テレビ 界 希望 思う</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>若林</td>\n", | |
" <td>藤井青銅 ピンク ベスト じゃない方 しゃべれる</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>若林</td>\n", | |
" <td>mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>春日</td>\n", | |
" <td>茶々 名前 チワワ 犬 飼う</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>春日</td>\n", | |
" <td>結婚 直前 浮気 ばれる</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>春日</td>\n", | |
" <td>六本木 社長 モンクレール ダウン もらう</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>春日</td>\n", | |
" <td>ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>春日</td>\n", | |
" <td>ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>春日</td>\n", | |
" <td>漫才 ボケ 担当 ラジオ テレビ ボケ ない</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>春日</td>\n", | |
" <td>普段 靴下 履く ない 足 裏 象 よう</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>春日</td>\n", | |
" <td>バカリズム 存在 面白い ウケる スベる ない</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cat word\n", | |
"0 若林 若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない\n", | |
"1 若林 漫才 ツッコミ 担当 たりないふたり ボケ 担当\n", | |
"2 若林 ナナメ 夕暮れ 他 本 出す\n", | |
"3 若林 深夜 一人 バスケットボール スリーポイント 練習\n", | |
"4 若林 プライベート バスケットボール 足 怪我\n", | |
"5 若林 星野源 日本 テレビ 界 希望 思う\n", | |
"6 若林 藤井青銅 ピンク ベスト じゃない方 しゃべれる\n", | |
"7 若林 mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる\n", | |
"8 春日 茶々 名前 チワワ 犬 飼う\n", | |
"9 春日 結婚 直前 浮気 ばれる\n", | |
"10 春日 六本木 社長 モンクレール ダウン もらう\n", | |
"11 春日 ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ\n", | |
"12 春日 ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶\n", | |
"13 春日 漫才 ボケ 担当 ラジオ テレビ ボケ ない\n", | |
"14 春日 普段 靴下 履く ない 足 裏 象 よう\n", | |
"15 春日 バカリズム 存在 面白い ウケる スベる ない" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" train = pd.read_sql(\"\"\"\n", | |
" select \n", | |
" cat, word \n", | |
" from jumbo.aud11_denorm \n", | |
" where docid <=16 \n", | |
" order by docid \n", | |
" \"\"\", conn)\n", | |
"train" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "86f646b0", | |
"metadata": {}, | |
"source": [ | |
"####結果変数の定義" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "21b73330", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 1\n", | |
"1 1\n", | |
"2 1\n", | |
"3 1\n", | |
"4 1\n", | |
"5 1\n", | |
"6 1\n", | |
"7 1\n", | |
"8 0\n", | |
"9 0\n", | |
"10 0\n", | |
"11 0\n", | |
"12 0\n", | |
"13 0\n", | |
"14 0\n", | |
"15 0\n", | |
"Name: cat, dtype: int64" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = train['cat'].apply(lambda s: 1 if s == '若林' else 0)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e4ed88b6", | |
"metadata": {}, | |
"source": [ | |
"####CoutVectorizer(最小文字1件、一文字も対象に含める)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "bc544be4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"CountVectorizer(token_pattern='(?u)\\\\b\\\\w+\\\\b')" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vectorizer = CountVectorizer(min_df=1, token_pattern='(?u)\\\\b\\\\w+\\\\b')\n", | |
"vectorizer.fit(train['word'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "4ac51f8d", | |
"metadata": {}, | |
"source": [ | |
"####抽出単語の確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "39275fd5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(['mc', 'waka', 'しゃべれる', 'じゃない方', 'すいません', 'たりないふたり', 'ない', 'ばれる',\n", | |
" 'もらう', 'よう', 'ウケる', 'スベる', 'スリーポイント', 'セーター', 'ダウン', 'チワワ', 'ツッコミ',\n", | |
" 'テレビ', 'トゥース', 'ナナメ', 'バカリズム', 'バスケットボール', 'ピンク', 'プライベート', 'ベスト',\n", | |
" 'ボケ', 'モンクレール', 'ラジオ', 'ラップ', '一人', '人', '人見知り', '他', '以外', '入れる',\n", | |
" '六本木', '出す', '務める', '叫ぶ', '司会', '名前', '夕暮れ', '大声', '存在', '履く',\n", | |
" '希望', '幾つ', '張る', '後輩', '思う', '怪我', '担当', '挨拶', '日本', '日本武道館',\n", | |
" '星野源', '普段', '本', '本番', '横浜アリーナ', '歌', '浮気', '深夜', '漫才', '犬', '界',\n", | |
" '番組', '直前', '着', '着る', '社長', '結婚', '練習', '胸', '芸人', '若槻千夏', '茶々',\n", | |
" '藤井青銅', '裏', '話す', '象', '足', '面白い', '靴下', '飼う'], dtype=object)" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vectorizer.get_feature_names_out()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "1b363815", | |
"metadata": {}, | |
"source": [ | |
"####ベクトルに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "2b12f6d1", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<16x85 sparse matrix of type '<class 'numpy.int64'>'\n", | |
"\twith 101 stored elements in Compressed Sparse Row format>" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_bow = vectorizer.transform(train['word'])\n", | |
"train_bow" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "5abb7cae", | |
"metadata": {}, | |
"source": [ | |
"####データの確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "8d911945", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mc</th>\n", | |
" <th>waka</th>\n", | |
" <th>しゃべれる</th>\n", | |
" <th>じゃない方</th>\n", | |
" <th>すいません</th>\n", | |
" <th>たりないふたり</th>\n", | |
" <th>ない</th>\n", | |
" <th>ばれる</th>\n", | |
" <th>もらう</th>\n", | |
" <th>よう</th>\n", | |
" <th>...</th>\n", | |
" <th>若槻千夏</th>\n", | |
" <th>茶々</th>\n", | |
" <th>藤井青銅</th>\n", | |
" <th>裏</th>\n", | |
" <th>話す</th>\n", | |
" <th>象</th>\n", | |
" <th>足</th>\n", | |
" <th>面白い</th>\n", | |
" <th>靴下</th>\n", | |
" <th>飼う</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>16 rows × 85 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mc waka しゃべれる じゃない方 すいません たりないふたり ない ばれる もらう よう ... 若槻千夏 茶々 \\\n", | |
"0 0 0 0 0 0 0 1 0 0 0 ... 1 0 \n", | |
"1 0 0 0 0 0 1 0 0 0 0 ... 0 0 \n", | |
"2 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"3 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"4 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"5 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"6 0 0 1 1 0 0 0 0 0 0 ... 0 0 \n", | |
"7 1 1 0 0 0 0 0 0 0 0 ... 0 1 \n", | |
"8 0 0 0 0 0 0 0 0 0 0 ... 0 1 \n", | |
"9 0 0 0 0 0 0 0 1 0 0 ... 0 0 \n", | |
"10 0 0 0 0 0 0 0 0 1 0 ... 0 0 \n", | |
"11 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"12 0 0 0 0 1 0 0 0 1 0 ... 0 0 \n", | |
"13 0 0 0 0 0 0 1 0 0 0 ... 0 0 \n", | |
"14 0 0 0 0 0 0 1 0 0 1 ... 0 0 \n", | |
"15 0 0 0 0 0 0 1 0 0 0 ... 0 0 \n", | |
"\n", | |
" 藤井青銅 裏 話す 象 足 面白い 靴下 飼う \n", | |
"0 0 0 1 0 0 0 0 0 \n", | |
"1 0 0 0 0 0 0 0 0 \n", | |
"2 0 0 0 0 0 0 0 0 \n", | |
"3 0 0 0 0 0 0 0 0 \n", | |
"4 0 0 0 0 1 0 0 0 \n", | |
"5 0 0 0 0 0 0 0 0 \n", | |
"6 1 0 0 0 0 0 0 0 \n", | |
"7 0 0 0 0 0 0 0 0 \n", | |
"8 0 0 0 0 0 0 0 1 \n", | |
"9 0 0 0 0 0 0 0 0 \n", | |
"10 0 0 0 0 0 0 0 0 \n", | |
"11 0 0 0 0 0 0 0 0 \n", | |
"12 0 0 0 0 0 0 0 0 \n", | |
"13 0 0 0 0 0 0 0 0 \n", | |
"14 0 1 0 1 1 0 1 0 \n", | |
"15 0 0 0 0 0 1 0 0 \n", | |
"\n", | |
"[16 rows x 85 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_bow_df = pd.DataFrame(train_bow.toarray(),columns=vectorizer.get_feature_names_out())\n", | |
"train_bow_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "b2afc2dc", | |
"metadata": {}, | |
"source": [ | |
"####csvに吐き出して確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "89866ace", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"train_bow_df.to_csv('train_bow.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f55495a0", | |
"metadata": {}, | |
"source": [ | |
"####モデル作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "01e003cd", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"MultinomialNB()" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = MultinomialNB()\n", | |
"model.fit(train_bow, y)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ab26f3f2", | |
"metadata": {}, | |
"source": [ | |
"####学習用データに対するスコアリング" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "cc346468", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1.0" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.score(train_bow, y)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3b221d72", | |
"metadata": {}, | |
"source": [ | |
"####評価データの取得" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "c138556e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cat</th>\n", | |
" <th>word</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>若林</td>\n", | |
" <td>山里亮太 ツッコミ 敵わ ない 思う</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>若林</td>\n", | |
" <td>入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>春日</td>\n", | |
" <td>ぼる塾 人 トゥース 掛け合い 面白い</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>春日</td>\n", | |
" <td>スベる 芸風 スベる 怖い 思う</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cat word\n", | |
"0 若林 山里亮太 ツッコミ 敵わ ない 思う\n", | |
"1 若林 入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ\n", | |
"2 春日 ぼる塾 人 トゥース 掛け合い 面白い\n", | |
"3 春日 スベる 芸風 スベる 怖い 思う" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" test = pd.read_sql(\"\"\"\n", | |
" select \n", | |
" cat, word \n", | |
" from jumbo.aud11_denorm \n", | |
" where docid>=17\n", | |
" order by docid \n", | |
" \"\"\", conn)\n", | |
"test" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "597c8261", | |
"metadata": {}, | |
"source": [ | |
"####評価データ結果変数の用意" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "84d44612", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ys = test['cat'].apply(lambda s: 1 if s == '若林' else 0)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "9acec78a", | |
"metadata": {}, | |
"source": [ | |
"####ベクトルに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "da94f261", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<4x85 sparse matrix of type '<class 'numpy.int64'>'\n", | |
"\twith 10 stored elements in Compressed Sparse Row format>" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"test_bow = vectorizer.transform(test['word'])\n", | |
"test_bow" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "48ce3df1", | |
"metadata": {}, | |
"source": [ | |
"####評価用データに対するスコアリング" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "3cb51eab", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1.0" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.score(test_bow, ys)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "97d0103f", | |
"metadata": {}, | |
"source": [ | |
"####個別の判定結果を取得" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "80c2f73a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([1, 1, 0, 0], dtype=int64)" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfy = model.predict(test_bow)\n", | |
"dfy" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f5f4d9ba", | |
"metadata": {}, | |
"source": [ | |
"####配列をデータフレームに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "f82cc21b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0\n", | |
"0 1\n", | |
"1 1\n", | |
"2 0\n", | |
"3 0" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfys = pd.DataFrame(dfy)\n", | |
"dfys" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "38a64447", | |
"metadata": {}, | |
"source": [ | |
"####列名を変更" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "57c351a8", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>predict</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" predict\n", | |
"0 1\n", | |
"1 1\n", | |
"2 0\n", | |
"3 0" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfys.columns = ['predict']\n", | |
"dfys" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f198f023", | |
"metadata": {}, | |
"source": [ | |
"####予測結果を元に戻す" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "5ff711cb", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>predict</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" predict\n", | |
"0 若林\n", | |
"1 若林\n", | |
"2 春日\n", | |
"3 春日" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfys['predict'] = dfys['predict'].apply(lambda s: '若林' if s == 1 else ('春日' if s == 0 else 'ヘップバーン'))\n", | |
"dfys" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "a7a35bf5", | |
"metadata": {}, | |
"source": [ | |
"####結合先のデータを取得" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "26cb43c0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" <th>cat</th>\n", | |
" <th>word</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>17</td>\n", | |
" <td>若林</td>\n", | |
" <td>山里亮太 ツッコミ 敵わ ない 思う</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>18</td>\n", | |
" <td>若林</td>\n", | |
" <td>入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>19</td>\n", | |
" <td>春日</td>\n", | |
" <td>ぼる塾 人 トゥース 掛け合い 面白い</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>20</td>\n", | |
" <td>春日</td>\n", | |
" <td>スベる 芸風 スベる 怖い 思う</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid cat word\n", | |
"0 17 若林 山里亮太 ツッコミ 敵わ ない 思う\n", | |
"1 18 若林 入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ\n", | |
"2 19 春日 ぼる塾 人 トゥース 掛け合い 面白い\n", | |
"3 20 春日 スベる 芸風 スベる 怖い 思う" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" bringer = pd.read_sql(\"\"\"\n", | |
" select \n", | |
" docid, cat, word \n", | |
" from jumbo.aud11_denorm \n", | |
" where docid>=17\n", | |
" order by docid \n", | |
" \"\"\", conn)\n", | |
"bringer" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ecff062a", | |
"metadata": {}, | |
"source": [ | |
"####結合する" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "f8538ee3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" <th>cat</th>\n", | |
" <th>word</th>\n", | |
" <th>predict</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>17</td>\n", | |
" <td>若林</td>\n", | |
" <td>山里亮太 ツッコミ 敵わ ない 思う</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>18</td>\n", | |
" <td>若林</td>\n", | |
" <td>入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>19</td>\n", | |
" <td>春日</td>\n", | |
" <td>ぼる塾 人 トゥース 掛け合い 面白い</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>20</td>\n", | |
" <td>春日</td>\n", | |
" <td>スベる 芸風 スベる 怖い 思う</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid cat word predict\n", | |
"0 17 若林 山里亮太 ツッコミ 敵わ ない 思う 若林\n", | |
"1 18 若林 入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ 若林\n", | |
"2 19 春日 ぼる塾 人 トゥース 掛け合い 面白い 春日\n", | |
"3 20 春日 スベる 芸風 スベる 怖い 思う 春日" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bring = bringer.join(dfys, how='inner') \n", | |
"bring" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment