Created
September 19, 2022 14:55
-
-
Save yssymmt/c396cdd35b2879adc98dee4c42522f1b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "5cfde877", | |
"metadata": {}, | |
"source": [ | |
"#08: lda" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "81f019c7", | |
"metadata": {}, | |
"source": [ | |
"####パッケージの読み込み" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "ee9cd190", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from sqlalchemy import create_engine\n", | |
"import teradatasql\n", | |
"import teradatasqlalchemy\n", | |
"from sklearn.feature_extraction.text import CountVectorizer\n", | |
"from sklearn.decomposition import LatentDirichletAllocation" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ecae56d0", | |
"metadata": {}, | |
"source": [ | |
"####Teradataへの接続、sqlalchemy エンジンを作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "8f9ac0f7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"host = \"192.168.999.999\"\n", | |
"user = \"jumbo\"\n", | |
"password = \"mambo\"\n", | |
"connstr = \"teradatasql://{user}:{password}@{host}\".format(host=host, user=user, password=password)\n", | |
"engine = create_engine(connstr)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "80e06bc8", | |
"metadata": {}, | |
"source": [ | |
"####学習データの取得" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "674ec215", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" <th>word</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>漫才 ツッコミ 担当 たりないふたり ボケ 担当</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>ナナメ 夕暮れ 他 本 出す</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>深夜 一人 バスケットボール スリーポイント 練習</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>プライベート バスケットボール 足 怪我</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>星野源 日本 テレビ 界 希望 思う</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>7</td>\n", | |
" <td>藤井青銅 ピンク ベスト じゃない方 しゃべれる</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" <td>mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>9</td>\n", | |
" <td>茶々 名前 チワワ 犬 飼う</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>10</td>\n", | |
" <td>結婚 直前 浮気 ばれる</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>11</td>\n", | |
" <td>六本木 社長 モンクレール ダウン もらう</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>12</td>\n", | |
" <td>ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>13</td>\n", | |
" <td>ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>14</td>\n", | |
" <td>漫才 ボケ 担当 ラジオ テレビ ボケ ない</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>15</td>\n", | |
" <td>普段 靴下 履く ない 足 裏 象 よう</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>16</td>\n", | |
" <td>バカリズム 存在 面白い ウケる スベる ない</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>17</td>\n", | |
" <td>山里亮太 ツッコミ 敵わ ない 思う</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>18</td>\n", | |
" <td>入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>19</td>\n", | |
" <td>ぼる塾 人 トゥース 掛け合い 面白い</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>20</td>\n", | |
" <td>スベる 芸風 スベる 怖い 思う</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid word\n", | |
"0 1 若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない\n", | |
"1 2 漫才 ツッコミ 担当 たりないふたり ボケ 担当\n", | |
"2 3 ナナメ 夕暮れ 他 本 出す\n", | |
"3 4 深夜 一人 バスケットボール スリーポイント 練習\n", | |
"4 5 プライベート バスケットボール 足 怪我\n", | |
"5 6 星野源 日本 テレビ 界 希望 思う\n", | |
"6 7 藤井青銅 ピンク ベスト じゃない方 しゃべれる\n", | |
"7 8 mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる\n", | |
"8 9 茶々 名前 チワワ 犬 飼う\n", | |
"9 10 結婚 直前 浮気 ばれる\n", | |
"10 11 六本木 社長 モンクレール ダウン もらう\n", | |
"11 12 ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ\n", | |
"12 13 ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶\n", | |
"13 14 漫才 ボケ 担当 ラジオ テレビ ボケ ない\n", | |
"14 15 普段 靴下 履く ない 足 裏 象 よう\n", | |
"15 16 バカリズム 存在 面白い ウケる スベる ない\n", | |
"16 17 山里亮太 ツッコミ 敵わ ない 思う\n", | |
"17 18 入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ\n", | |
"18 19 ぼる塾 人 トゥース 掛け合い 面白い\n", | |
"19 20 スベる 芸風 スベる 怖い 思う" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" train = pd.read_sql(\"\"\"\n", | |
" select \n", | |
" docid, word \n", | |
" from jumbo.aud11_denorm \n", | |
" order by docid \n", | |
" \"\"\", conn)\n", | |
"train" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "4b14ddc6", | |
"metadata": {}, | |
"source": [ | |
"####CoutVectorizer(最小文字1件、一文字も対象に含める)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"id": "ddac5b9c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"CountVectorizer(token_pattern='(?u)\\\\b\\\\w+\\\\b')" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vectorizer = CountVectorizer(min_df=1, token_pattern='(?u)\\\\b\\\\w+\\\\b')\n", | |
"vectorizer.fit(train['word'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e9569380", | |
"metadata": {}, | |
"source": [ | |
"####抽出単語の確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"id": "4bb9a147", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(['mc', 'waka', 'お前', 'しゃべれる', 'じゃない方', 'すいません', 'たりないふたり', 'ない',\n", | |
" 'ばれる', 'ぼる塾', 'もらう', 'よう', 'ウケる', 'スベる', 'スリーポイント', 'セーター', 'ダウン',\n", | |
" 'チワワ', 'ツッコミ', 'テレビ', 'トゥース', 'ナナメ', 'バカリズム', 'バスケットボール', 'ピンク',\n", | |
" 'プライベート', 'ベスト', 'ボケ', 'モンクレール', 'ラジオ', 'ラップ', '一人', '人', '人見知り',\n", | |
" '他', '以外', '入れる', '入船', '六本木', '出す', '出身', '務める', '叫ぶ', '司会', '名前',\n", | |
" '嘘', '地元', '夕暮れ', '大声', '存在', '履く', '山里亮太', '希望', '幾つ', '張る', '後輩',\n", | |
" '怖い', '思う', '怪我', '担当', '挨拶', '掛け合い', '敵わ', '日本', '日本武道館', '星野源',\n", | |
" '普段', '本', '本番', '横浜アリーナ', '歌', '浮気', '深夜', '漫才', '犬', '界', '番組',\n", | |
" '直前', '着', '着る', '社長', '築地', '結婚', '練習', '胸', '芸人', '芸風', '若槻千夏',\n", | |
" '茶々', '藤井青銅', '裏', '話す', '象', '足', '面白い', '靴下', '飼う'], dtype=object)" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"feature_names = vectorizer.get_feature_names_out()\n", | |
"feature_names" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ff307e30", | |
"metadata": {}, | |
"source": [ | |
"####ベクトルに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"id": "8141ff75", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<20x97 sparse matrix of type '<class 'numpy.int64'>'\n", | |
"\twith 123 stored elements in Compressed Sparse Row format>" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_bow = vectorizer.transform(train['word'])\n", | |
"train_bow" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "84c19fe4", | |
"metadata": {}, | |
"source": [ | |
"####データの確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"id": "e21879cf", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mc</th>\n", | |
" <th>waka</th>\n", | |
" <th>お前</th>\n", | |
" <th>しゃべれる</th>\n", | |
" <th>じゃない方</th>\n", | |
" <th>すいません</th>\n", | |
" <th>たりないふたり</th>\n", | |
" <th>ない</th>\n", | |
" <th>ばれる</th>\n", | |
" <th>ぼる塾</th>\n", | |
" <th>...</th>\n", | |
" <th>若槻千夏</th>\n", | |
" <th>茶々</th>\n", | |
" <th>藤井青銅</th>\n", | |
" <th>裏</th>\n", | |
" <th>話す</th>\n", | |
" <th>象</th>\n", | |
" <th>足</th>\n", | |
" <th>面白い</th>\n", | |
" <th>靴下</th>\n", | |
" <th>飼う</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>20 rows × 97 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mc waka お前 しゃべれる じゃない方 すいません たりないふたり ない ばれる ぼる塾 ... 若槻千夏 茶々 \\\n", | |
"0 0 0 0 0 0 0 0 1 0 0 ... 1 0 \n", | |
"1 0 0 0 0 0 0 1 0 0 0 ... 0 0 \n", | |
"2 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"3 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"4 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"5 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"6 0 0 0 1 1 0 0 0 0 0 ... 0 0 \n", | |
"7 1 1 0 0 0 0 0 0 0 0 ... 0 1 \n", | |
"8 0 0 0 0 0 0 0 0 0 0 ... 0 1 \n", | |
"9 0 0 0 0 0 0 0 0 1 0 ... 0 0 \n", | |
"10 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"11 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"12 0 0 0 0 0 1 0 0 0 0 ... 0 0 \n", | |
"13 0 0 0 0 0 0 0 1 0 0 ... 0 0 \n", | |
"14 0 0 0 0 0 0 0 1 0 0 ... 0 0 \n", | |
"15 0 0 0 0 0 0 0 1 0 0 ... 0 0 \n", | |
"16 0 0 0 0 0 0 0 1 0 0 ... 0 0 \n", | |
"17 0 0 1 0 0 0 0 0 0 0 ... 0 0 \n", | |
"18 0 0 0 0 0 0 0 0 0 1 ... 0 0 \n", | |
"19 0 0 0 0 0 0 0 0 0 0 ... 0 0 \n", | |
"\n", | |
" 藤井青銅 裏 話す 象 足 面白い 靴下 飼う \n", | |
"0 0 0 1 0 0 0 0 0 \n", | |
"1 0 0 0 0 0 0 0 0 \n", | |
"2 0 0 0 0 0 0 0 0 \n", | |
"3 0 0 0 0 0 0 0 0 \n", | |
"4 0 0 0 0 1 0 0 0 \n", | |
"5 0 0 0 0 0 0 0 0 \n", | |
"6 1 0 0 0 0 0 0 0 \n", | |
"7 0 0 0 0 0 0 0 0 \n", | |
"8 0 0 0 0 0 0 0 1 \n", | |
"9 0 0 0 0 0 0 0 0 \n", | |
"10 0 0 0 0 0 0 0 0 \n", | |
"11 0 0 0 0 0 0 0 0 \n", | |
"12 0 0 0 0 0 0 0 0 \n", | |
"13 0 0 0 0 0 0 0 0 \n", | |
"14 0 1 0 1 1 0 1 0 \n", | |
"15 0 0 0 0 0 1 0 0 \n", | |
"16 0 0 0 0 0 0 0 0 \n", | |
"17 0 0 0 0 0 0 0 0 \n", | |
"18 0 0 0 0 0 1 0 0 \n", | |
"19 0 0 0 0 0 0 0 0 \n", | |
"\n", | |
"[20 rows x 97 columns]" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_bow_df = pd.DataFrame(train_bow.toarray(),columns=vectorizer.get_feature_names_out())\n", | |
"train_bow_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "faadd518", | |
"metadata": {}, | |
"source": [ | |
"####トピック数の決定、文書も少なく、ばらけているのでうまくいかないが" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"id": "eb729f6b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"トピック数: 1, Perplexity: 125.86108173674388\n", | |
"トピック数: 2, Perplexity: 154.80910669865207\n", | |
"トピック数: 3, Perplexity: 179.35150604650232\n", | |
"トピック数: 4, Perplexity: 195.08356699295754\n", | |
"トピック数: 5, Perplexity: 201.6412436347303\n", | |
"トピック数: 6, Perplexity: 226.35318321696758\n", | |
"トピック数: 7, Perplexity: 241.42579659469857\n", | |
"トピック数: 8, Perplexity: 222.50809327186462\n", | |
"トピック数: 9, Perplexity: 232.7381597276757\n" | |
] | |
} | |
], | |
"source": [ | |
"for c_num in range(1, 10):\n", | |
" lda = LatentDirichletAllocation(\n", | |
" n_components=c_num,\n", | |
" )\n", | |
" lda.fit(train_bow)\n", | |
" print(f\"トピック数: {c_num}, Perplexity: {lda.perplexity(train_bow)}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "924b3d97", | |
"metadata": {}, | |
"source": [ | |
"####モデル作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"id": "e749fe3a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"LatentDirichletAllocation(max_iter=50, n_components=3, n_jobs=-1,\n", | |
" random_state=0)" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"topic_num=3\n", | |
"lda = LatentDirichletAllocation(n_components=topic_num, max_iter=50, learning_method='batch', random_state=0, n_jobs=-1)\n", | |
"lda.fit(train_bow)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "6cd2a416", | |
"metadata": {}, | |
"source": [ | |
"####モデルの確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"id": "784f9504", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0.33364249, 0.33364249, 1.33279045, 1.33228048, 1.33228048,\n", | |
" 1.33279613, 0.33375586, 0.33392037, 1.33199421, 1.33216413,\n", | |
" 1.33383922, 0.3336389 , 0.33363041, 0.33411547, 0.33382961,\n", | |
" 1.33279613, 0.33369624, 0.33367195, 1.33220832, 1.32254863,\n", | |
" 2.33247564, 0.33383099, 0.33363041, 0.33386203, 4.33270116,\n", | |
" 0.33395686, 2.3325304 , 0.33369317, 0.33369624, 0.33370699,\n", | |
" 0.33364249, 0.33382961, 2.33957957, 0.33348845, 0.33383099,\n", | |
" 0.33348845, 0.33364249, 2.33282055, 0.33369624, 0.33383099,\n", | |
" 2.33282055, 0.33348845, 1.33268539, 0.33348845, 0.33367195,\n", | |
" 1.33279045, 1.33279045, 0.33383099, 1.33268539, 0.33363041,\n", | |
" 0.3336389 , 0.33387846, 1.33227561, 0.33348845, 1.33268539,\n", | |
" 1.33279613, 0.33385206, 1.32286346, 0.33395686, 0.3337082 ,\n", | |
" 1.33279613, 1.33216413, 0.33387846, 1.33227561, 0.33364249,\n", | |
" 1.33227561, 0.3336389 , 0.33383099, 0.33348845, 0.33364249,\n", | |
" 0.33364249, 1.33199421, 0.33382961, 0.33370931, 0.33367195,\n", | |
" 1.33227561, 0.33348845, 1.33199421, 1.33279613, 2.33277364,\n", | |
" 0.33369624, 1.33279045, 1.33199421, 0.33382961, 1.33268539,\n", | |
" 1.33279613, 0.33385206, 0.33348845, 0.3339784 , 1.33228048,\n", | |
" 0.3336389 , 0.33348845, 0.3336389 , 0.33377179, 1.32420336,\n", | |
" 0.3336389 , 0.33367195],\n", | |
" [1.33262161, 1.33262161, 0.33356871, 0.33375948, 0.33375948,\n", | |
" 0.33354284, 1.33235653, 3.32460552, 0.33387422, 0.333779 ,\n", | |
" 0.33385512, 1.33258556, 0.33365902, 2.33191294, 1.33215778,\n", | |
" 0.33354284, 0.33364787, 0.33366845, 2.33354053, 1.32673701,\n", | |
" 0.33366764, 1.3321539 , 0.33365902, 2.3320815 , 0.33358567,\n", | |
" 1.33185727, 0.33365845, 3.332454 , 0.33364787, 1.33239228,\n", | |
" 1.33262161, 1.33215778, 1.32619999, 0.33348409, 1.3321539 ,\n", | |
" 0.33348409, 1.33262161, 0.33355566, 0.33364787, 1.3321539 ,\n", | |
" 0.33355566, 0.33348409, 0.33359578, 0.33348409, 0.33366845,\n", | |
" 0.33356871, 0.33356871, 1.3321539 , 0.33359578, 0.33365902,\n", | |
" 1.33258556, 1.33204519, 0.33380087, 0.33348409, 0.33359578,\n", | |
" 0.33354284, 1.33203843, 2.34249998, 1.33185727, 3.33244301,\n", | |
" 0.33354284, 0.333779 , 1.33204519, 0.33380087, 1.33262161,\n", | |
" 0.33380087, 1.33258556, 1.3321539 , 0.33348409, 1.33262161,\n", | |
" 1.33262161, 0.33387422, 1.33215778, 2.33242771, 0.33366845,\n", | |
" 0.33380087, 0.33348409, 0.33387422, 0.33354284, 0.33355622,\n", | |
" 0.33364787, 0.33356871, 0.33387422, 1.33215778, 0.33359578,\n", | |
" 0.33354284, 1.33203843, 0.33348409, 1.33165734, 0.33375948,\n", | |
" 1.33258556, 0.33348409, 1.33258556, 2.33228399, 0.33410033,\n", | |
" 1.33258556, 0.33366845],\n", | |
" [0.33373589, 0.33373589, 0.33364084, 0.33396004, 0.33396004,\n", | |
" 0.33366103, 0.33388761, 2.34147411, 0.33413157, 0.33405687,\n", | |
" 1.33230567, 0.33377553, 1.33271057, 1.33397159, 0.33401261,\n", | |
" 0.33366103, 1.33265588, 1.3326596 , 0.33425115, 1.35071436,\n", | |
" 0.33385672, 0.3340151 , 1.33271057, 0.33405646, 0.33371317,\n", | |
" 0.33418588, 0.33381115, 0.33385283, 1.33265588, 0.33390074,\n", | |
" 0.33373589, 0.33401261, 0.33422043, 1.33302746, 0.3340151 ,\n", | |
" 1.33302746, 0.33373589, 0.33362379, 1.33265588, 0.3340151 ,\n", | |
" 0.33362379, 1.33302746, 0.33371883, 1.33302746, 1.3326596 ,\n", | |
" 0.33364084, 0.33364084, 0.3340151 , 0.33371883, 1.33271057,\n", | |
" 0.33377553, 0.33407635, 0.33392352, 1.33302746, 0.33371883,\n", | |
" 0.33366103, 0.33410951, 0.33463656, 0.33418588, 0.33384879,\n", | |
" 0.33366103, 0.33405687, 0.33407635, 0.33392352, 0.33373589,\n", | |
" 0.33392352, 0.33377553, 0.3340151 , 1.33302746, 0.33373589,\n", | |
" 0.33373589, 0.33413157, 0.33401261, 0.33386298, 1.3326596 ,\n", | |
" 0.33392352, 1.33302746, 0.33413157, 0.33366103, 0.33367013,\n", | |
" 1.33265588, 0.33364084, 0.33413157, 0.33401261, 0.33371883,\n", | |
" 0.33366103, 0.33410951, 1.33302746, 1.33436426, 0.33396004,\n", | |
" 0.33377553, 1.33302746, 0.33377553, 0.33394422, 1.34169632,\n", | |
" 0.33377553, 1.3326596 ]])" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lda.components_" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "2ab9cd37", | |
"metadata": {}, | |
"source": [ | |
"####標準化もできるみたい、使用しなかった" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"id": "3fc1b2bc", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0.00415477, 0.00415477, 0.01659693, 0.01659058, 0.01659058,\n", | |
" 0.016597 , 0.00415618, 0.00415823, 0.01658701, 0.01658913,\n", | |
" 0.01660999, 0.00415473, 0.00415462, 0.00416066, 0.0041571 ,\n", | |
" 0.016597 , 0.00415544, 0.00415514, 0.01658968, 0.01646939,\n", | |
" 0.02904577, 0.00415712, 0.00415462, 0.00415751, 0.05395411,\n", | |
" 0.00415869, 0.02904645, 0.0041554 , 0.00415544, 0.00415557,\n", | |
" 0.00415477, 0.0041571 , 0.02913423, 0.00415285, 0.00415712,\n", | |
" 0.00415285, 0.00415477, 0.02905006, 0.00415544, 0.00415712,\n", | |
" 0.02905006, 0.00415285, 0.01659562, 0.00415285, 0.00415514,\n", | |
" 0.01659693, 0.01659693, 0.00415712, 0.01659562, 0.00415462,\n", | |
" 0.00415473, 0.00415771, 0.01659051, 0.00415285, 0.01659562,\n", | |
" 0.016597 , 0.00415738, 0.01647331, 0.00415869, 0.00415559,\n", | |
" 0.016597 , 0.01658913, 0.00415771, 0.01659051, 0.00415477,\n", | |
" 0.01659051, 0.00415473, 0.00415712, 0.00415285, 0.00415477,\n", | |
" 0.00415477, 0.01658701, 0.0041571 , 0.0041556 , 0.00415514,\n", | |
" 0.01659051, 0.00415285, 0.01658701, 0.016597 , 0.02904948,\n", | |
" 0.00415544, 0.01659693, 0.01658701, 0.0041571 , 0.01659562,\n", | |
" 0.016597 , 0.00415738, 0.00415285, 0.00415895, 0.01659058,\n", | |
" 0.00415473, 0.00415285, 0.00415473, 0.00415638, 0.01648999,\n", | |
" 0.00415473, 0.00415514],\n", | |
" [0.01544185, 0.01544185, 0.00386525, 0.00386746, 0.00386746,\n", | |
" 0.00386495, 0.01543878, 0.03852412, 0.00386879, 0.00386769,\n", | |
" 0.00386857, 0.01544144, 0.0038663 , 0.02702122, 0.01543648,\n", | |
" 0.00386495, 0.00386617, 0.00386641, 0.02704008, 0.01537367,\n", | |
" 0.0038664 , 0.01543643, 0.0038663 , 0.02702317, 0.00386545,\n", | |
" 0.015433 , 0.00386629, 0.03861506, 0.00386617, 0.0154392 ,\n", | |
" 0.01544185, 0.01543648, 0.01536744, 0.00386427, 0.01543643,\n", | |
" 0.00386427, 0.01544185, 0.0038651 , 0.00386617, 0.01543643,\n", | |
" 0.0038651 , 0.00386427, 0.00386557, 0.00386427, 0.00386641,\n", | |
" 0.00386525, 0.00386525, 0.01543643, 0.00386557, 0.0038663 ,\n", | |
" 0.01544144, 0.01543518, 0.00386794, 0.00386427, 0.00386557,\n", | |
" 0.00386495, 0.0154351 , 0.0271439 , 0.015433 , 0.03861494,\n", | |
" 0.00386495, 0.00386769, 0.01543518, 0.00386794, 0.01544185,\n", | |
" 0.00386794, 0.01544144, 0.01543643, 0.00386427, 0.01544185,\n", | |
" 0.01544185, 0.00386879, 0.01543648, 0.02702718, 0.00386641,\n", | |
" 0.00386794, 0.00386427, 0.00386879, 0.00386495, 0.00386511,\n", | |
" 0.00386617, 0.00386525, 0.00386879, 0.01543648, 0.00386557,\n", | |
" 0.00386495, 0.0154351 , 0.00386427, 0.01543068, 0.00386746,\n", | |
" 0.01544144, 0.00386427, 0.01544144, 0.02702552, 0.00387141,\n", | |
" 0.01544144, 0.00386641],\n", | |
" [0.00561871, 0.00561871, 0.00561711, 0.00562249, 0.00562249,\n", | |
" 0.00561745, 0.00562127, 0.0394206 , 0.00562537, 0.00562412,\n", | |
" 0.02243044, 0.00561938, 0.02243725, 0.02245849, 0.00562337,\n", | |
" 0.00561745, 0.02243633, 0.0224364 , 0.00562739, 0.02274036,\n", | |
" 0.00562075, 0.00562341, 0.02243725, 0.00562411, 0.00561833,\n", | |
" 0.00562629, 0.00561998, 0.00562068, 0.02243633, 0.00562149,\n", | |
" 0.00561871, 0.00562337, 0.00562687, 0.02244259, 0.00562341,\n", | |
" 0.02244259, 0.00561871, 0.00561682, 0.02243633, 0.00562341,\n", | |
" 0.00561682, 0.02244259, 0.00561842, 0.02244259, 0.0224364 ,\n", | |
" 0.00561711, 0.00561711, 0.00562341, 0.00561842, 0.02243725,\n", | |
" 0.00561938, 0.00562444, 0.00562187, 0.02244259, 0.00561842,\n", | |
" 0.00561745, 0.005625 , 0.00563388, 0.00562629, 0.00562061,\n", | |
" 0.00561745, 0.00562412, 0.00562444, 0.00562187, 0.00561871,\n", | |
" 0.00562187, 0.00561938, 0.00562341, 0.02244259, 0.00561871,\n", | |
" 0.00561871, 0.00562537, 0.00562337, 0.00562085, 0.0224364 ,\n", | |
" 0.00562187, 0.02244259, 0.00562537, 0.00561745, 0.00561761,\n", | |
" 0.02243633, 0.00561711, 0.00562537, 0.00562337, 0.00561842,\n", | |
" 0.00561745, 0.005625 , 0.02244259, 0.0224651 , 0.00562249,\n", | |
" 0.00561938, 0.02244259, 0.00561938, 0.00562222, 0.02258854,\n", | |
" 0.00561938, 0.0224364 ]])" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"noma = lda.components_ / lda.components_.sum(axis=1)[:, np.newaxis]\n", | |
"noma" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d12ad615", | |
"metadata": {}, | |
"source": [ | |
"####データフレームに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"id": "194119e2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mc</th>\n", | |
" <th>waka</th>\n", | |
" <th>お前</th>\n", | |
" <th>しゃべれる</th>\n", | |
" <th>じゃない方</th>\n", | |
" <th>すいません</th>\n", | |
" <th>たりないふたり</th>\n", | |
" <th>ない</th>\n", | |
" <th>ばれる</th>\n", | |
" <th>ぼる塾</th>\n", | |
" <th>...</th>\n", | |
" <th>若槻千夏</th>\n", | |
" <th>茶々</th>\n", | |
" <th>藤井青銅</th>\n", | |
" <th>裏</th>\n", | |
" <th>話す</th>\n", | |
" <th>象</th>\n", | |
" <th>足</th>\n", | |
" <th>面白い</th>\n", | |
" <th>靴下</th>\n", | |
" <th>飼う</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.333642</td>\n", | |
" <td>0.333642</td>\n", | |
" <td>1.332790</td>\n", | |
" <td>1.332280</td>\n", | |
" <td>1.332280</td>\n", | |
" <td>1.332796</td>\n", | |
" <td>0.333756</td>\n", | |
" <td>0.333920</td>\n", | |
" <td>1.331994</td>\n", | |
" <td>1.332164</td>\n", | |
" <td>...</td>\n", | |
" <td>0.333488</td>\n", | |
" <td>0.333978</td>\n", | |
" <td>1.332280</td>\n", | |
" <td>0.333639</td>\n", | |
" <td>0.333488</td>\n", | |
" <td>0.333639</td>\n", | |
" <td>0.333772</td>\n", | |
" <td>1.324203</td>\n", | |
" <td>0.333639</td>\n", | |
" <td>0.333672</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.332622</td>\n", | |
" <td>1.332622</td>\n", | |
" <td>0.333569</td>\n", | |
" <td>0.333759</td>\n", | |
" <td>0.333759</td>\n", | |
" <td>0.333543</td>\n", | |
" <td>1.332357</td>\n", | |
" <td>3.324606</td>\n", | |
" <td>0.333874</td>\n", | |
" <td>0.333779</td>\n", | |
" <td>...</td>\n", | |
" <td>0.333484</td>\n", | |
" <td>1.331657</td>\n", | |
" <td>0.333759</td>\n", | |
" <td>1.332586</td>\n", | |
" <td>0.333484</td>\n", | |
" <td>1.332586</td>\n", | |
" <td>2.332284</td>\n", | |
" <td>0.334100</td>\n", | |
" <td>1.332586</td>\n", | |
" <td>0.333668</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.333736</td>\n", | |
" <td>0.333736</td>\n", | |
" <td>0.333641</td>\n", | |
" <td>0.333960</td>\n", | |
" <td>0.333960</td>\n", | |
" <td>0.333661</td>\n", | |
" <td>0.333888</td>\n", | |
" <td>2.341474</td>\n", | |
" <td>0.334132</td>\n", | |
" <td>0.334057</td>\n", | |
" <td>...</td>\n", | |
" <td>1.333027</td>\n", | |
" <td>1.334364</td>\n", | |
" <td>0.333960</td>\n", | |
" <td>0.333776</td>\n", | |
" <td>1.333027</td>\n", | |
" <td>0.333776</td>\n", | |
" <td>0.333944</td>\n", | |
" <td>1.341696</td>\n", | |
" <td>0.333776</td>\n", | |
" <td>1.332660</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>3 rows × 97 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mc waka お前 しゃべれる じゃない方 すいません たりないふたり \\\n", | |
"0 0.333642 0.333642 1.332790 1.332280 1.332280 1.332796 0.333756 \n", | |
"1 1.332622 1.332622 0.333569 0.333759 0.333759 0.333543 1.332357 \n", | |
"2 0.333736 0.333736 0.333641 0.333960 0.333960 0.333661 0.333888 \n", | |
"\n", | |
" ない ばれる ぼる塾 ... 若槻千夏 茶々 藤井青銅 裏 \\\n", | |
"0 0.333920 1.331994 1.332164 ... 0.333488 0.333978 1.332280 0.333639 \n", | |
"1 3.324606 0.333874 0.333779 ... 0.333484 1.331657 0.333759 1.332586 \n", | |
"2 2.341474 0.334132 0.334057 ... 1.333027 1.334364 0.333960 0.333776 \n", | |
"\n", | |
" 話す 象 足 面白い 靴下 飼う \n", | |
"0 0.333488 0.333639 0.333772 1.324203 0.333639 0.333672 \n", | |
"1 0.333484 1.332586 2.332284 0.334100 1.332586 0.333668 \n", | |
"2 1.333027 0.333776 0.333944 1.341696 0.333776 1.332660 \n", | |
"\n", | |
"[3 rows x 97 columns]" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ldacomponents = pd.DataFrame(lda.components_,columns=feature_names)\n", | |
"ldacomponents" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "88ff06de", | |
"metadata": {}, | |
"source": [ | |
"####トピックIDの作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"id": "a781e404", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0, 1, 2]" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tpcid = list(range(topic_num))\n", | |
"tpcid" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d958a6b6", | |
"metadata": {}, | |
"source": [ | |
"####トピックIDをデータフレームに" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"id": "f1665516", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>topicid</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" topicid\n", | |
"0 0\n", | |
"1 1\n", | |
"2 2" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tpcidf = pd.DataFrame(tpcid,columns=['topicid'])\n", | |
"tpcidf" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "87245b49", | |
"metadata": {}, | |
"source": [ | |
"####結合" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"id": "ac927946", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>topicid</th>\n", | |
" <th>mc</th>\n", | |
" <th>waka</th>\n", | |
" <th>お前</th>\n", | |
" <th>しゃべれる</th>\n", | |
" <th>じゃない方</th>\n", | |
" <th>すいません</th>\n", | |
" <th>たりないふたり</th>\n", | |
" <th>ない</th>\n", | |
" <th>ばれる</th>\n", | |
" <th>...</th>\n", | |
" <th>若槻千夏</th>\n", | |
" <th>茶々</th>\n", | |
" <th>藤井青銅</th>\n", | |
" <th>裏</th>\n", | |
" <th>話す</th>\n", | |
" <th>象</th>\n", | |
" <th>足</th>\n", | |
" <th>面白い</th>\n", | |
" <th>靴下</th>\n", | |
" <th>飼う</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0.333642</td>\n", | |
" <td>0.333642</td>\n", | |
" <td>1.332790</td>\n", | |
" <td>1.332280</td>\n", | |
" <td>1.332280</td>\n", | |
" <td>1.332796</td>\n", | |
" <td>0.333756</td>\n", | |
" <td>0.333920</td>\n", | |
" <td>1.331994</td>\n", | |
" <td>...</td>\n", | |
" <td>0.333488</td>\n", | |
" <td>0.333978</td>\n", | |
" <td>1.332280</td>\n", | |
" <td>0.333639</td>\n", | |
" <td>0.333488</td>\n", | |
" <td>0.333639</td>\n", | |
" <td>0.333772</td>\n", | |
" <td>1.324203</td>\n", | |
" <td>0.333639</td>\n", | |
" <td>0.333672</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>1.332622</td>\n", | |
" <td>1.332622</td>\n", | |
" <td>0.333569</td>\n", | |
" <td>0.333759</td>\n", | |
" <td>0.333759</td>\n", | |
" <td>0.333543</td>\n", | |
" <td>1.332357</td>\n", | |
" <td>3.324606</td>\n", | |
" <td>0.333874</td>\n", | |
" <td>...</td>\n", | |
" <td>0.333484</td>\n", | |
" <td>1.331657</td>\n", | |
" <td>0.333759</td>\n", | |
" <td>1.332586</td>\n", | |
" <td>0.333484</td>\n", | |
" <td>1.332586</td>\n", | |
" <td>2.332284</td>\n", | |
" <td>0.334100</td>\n", | |
" <td>1.332586</td>\n", | |
" <td>0.333668</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>0.333736</td>\n", | |
" <td>0.333736</td>\n", | |
" <td>0.333641</td>\n", | |
" <td>0.333960</td>\n", | |
" <td>0.333960</td>\n", | |
" <td>0.333661</td>\n", | |
" <td>0.333888</td>\n", | |
" <td>2.341474</td>\n", | |
" <td>0.334132</td>\n", | |
" <td>...</td>\n", | |
" <td>1.333027</td>\n", | |
" <td>1.334364</td>\n", | |
" <td>0.333960</td>\n", | |
" <td>0.333776</td>\n", | |
" <td>1.333027</td>\n", | |
" <td>0.333776</td>\n", | |
" <td>0.333944</td>\n", | |
" <td>1.341696</td>\n", | |
" <td>0.333776</td>\n", | |
" <td>1.332660</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>3 rows × 98 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" topicid mc waka お前 しゃべれる じゃない方 すいません \\\n", | |
"0 0 0.333642 0.333642 1.332790 1.332280 1.332280 1.332796 \n", | |
"1 1 1.332622 1.332622 0.333569 0.333759 0.333759 0.333543 \n", | |
"2 2 0.333736 0.333736 0.333641 0.333960 0.333960 0.333661 \n", | |
"\n", | |
" たりないふたり ない ばれる ... 若槻千夏 茶々 藤井青銅 裏 \\\n", | |
"0 0.333756 0.333920 1.331994 ... 0.333488 0.333978 1.332280 0.333639 \n", | |
"1 1.332357 3.324606 0.333874 ... 0.333484 1.331657 0.333759 1.332586 \n", | |
"2 0.333888 2.341474 0.334132 ... 1.333027 1.334364 0.333960 0.333776 \n", | |
"\n", | |
" 話す 象 足 面白い 靴下 飼う \n", | |
"0 0.333488 0.333639 0.333772 1.324203 0.333639 0.333672 \n", | |
"1 0.333484 1.332586 2.332284 0.334100 1.332586 0.333668 \n", | |
"2 1.333027 0.333776 0.333944 1.341696 0.333776 1.332660 \n", | |
"\n", | |
"[3 rows x 98 columns]" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"topicmodel = tpcidf.join(ldacomponents, how='inner') \n", | |
"topicmodel" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "fbcd1e11", | |
"metadata": {}, | |
"source": [ | |
"####縦持ちパラノイア" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"id": "773899b1", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>topicid</th>\n", | |
" <th>word</th>\n", | |
" <th>prob</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>mc</td>\n", | |
" <td>0.333642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>mc</td>\n", | |
" <td>1.332622</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>mc</td>\n", | |
" <td>0.333736</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>waka</td>\n", | |
" <td>0.333642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>waka</td>\n", | |
" <td>1.332622</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>286</th>\n", | |
" <td>1</td>\n", | |
" <td>靴下</td>\n", | |
" <td>1.332586</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>287</th>\n", | |
" <td>2</td>\n", | |
" <td>靴下</td>\n", | |
" <td>0.333776</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>288</th>\n", | |
" <td>0</td>\n", | |
" <td>飼う</td>\n", | |
" <td>0.333672</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>289</th>\n", | |
" <td>1</td>\n", | |
" <td>飼う</td>\n", | |
" <td>0.333668</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>290</th>\n", | |
" <td>2</td>\n", | |
" <td>飼う</td>\n", | |
" <td>1.332660</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>291 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" topicid word prob\n", | |
"0 0 mc 0.333642\n", | |
"1 1 mc 1.332622\n", | |
"2 2 mc 0.333736\n", | |
"3 0 waka 0.333642\n", | |
"4 1 waka 1.332622\n", | |
".. ... ... ...\n", | |
"286 1 靴下 1.332586\n", | |
"287 2 靴下 0.333776\n", | |
"288 0 飼う 0.333672\n", | |
"289 1 飼う 0.333668\n", | |
"290 2 飼う 1.332660\n", | |
"\n", | |
"[291 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"topicmodel_vertico = pd.melt(topicmodel, id_vars='topicid')\n", | |
"topicmodel_vertico.columns =['topicid','word','prob']\n", | |
"topicmodel_vertico" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "402441be", | |
"metadata": {}, | |
"source": [ | |
"####空テーブル作成、元データ用 " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"id": "08ba81f2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" x1 = pd.read_sql(\"\"\"\n", | |
" create multiset table jumbo.aud12_ldamodel (\n", | |
" topicid integer, \n", | |
" word varchar(50) character set unicode, \n", | |
" prob float \n", | |
" ) primary index (topicid, word) \n", | |
" \"\"\", conn)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3ab9e952", | |
"metadata": {}, | |
"source": [ | |
"####元データの格納" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"id": "adb16439", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"topicmodel_vertico.to_sql('aud12_ldamodel',engine,if_exists='append',index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e1684a5a", | |
"metadata": {}, | |
"source": [ | |
"####格納を確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"id": "b92ddc30", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>topicid</th>\n", | |
" <th>word</th>\n", | |
" <th>prob</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>芸風</td>\n", | |
" <td>0.333852</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>夕暮れ</td>\n", | |
" <td>0.333831</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>存在</td>\n", | |
" <td>0.333630</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>普段</td>\n", | |
" <td>0.333639</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>星野源</td>\n", | |
" <td>1.332276</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>286</th>\n", | |
" <td>2</td>\n", | |
" <td>歌</td>\n", | |
" <td>0.333736</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>287</th>\n", | |
" <td>2</td>\n", | |
" <td>バスケットボール</td>\n", | |
" <td>0.334056</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>288</th>\n", | |
" <td>2</td>\n", | |
" <td>プライベート</td>\n", | |
" <td>0.334186</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>289</th>\n", | |
" <td>2</td>\n", | |
" <td>名前</td>\n", | |
" <td>1.332660</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>290</th>\n", | |
" <td>2</td>\n", | |
" <td>若槻千夏</td>\n", | |
" <td>1.333027</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>291 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" topicid word prob\n", | |
"0 0 芸風 0.333852\n", | |
"1 0 夕暮れ 0.333831\n", | |
"2 0 存在 0.333630\n", | |
"3 0 普段 0.333639\n", | |
"4 0 星野源 1.332276\n", | |
".. ... ... ...\n", | |
"286 2 歌 0.333736\n", | |
"287 2 バスケットボール 0.334056\n", | |
"288 2 プライベート 0.334186\n", | |
"289 2 名前 1.332660\n", | |
"290 2 若槻千夏 1.333027\n", | |
"\n", | |
"[291 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" x2 = pd.read_sql(\"\"\"\n", | |
" select * from aud12_ldamodel order by 1 \n", | |
" \"\"\", conn)\n", | |
"x2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e2c8f6cd", | |
"metadata": {}, | |
"source": [ | |
"####スコアリング" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"id": "c1718f57", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0.02815007, 0.02851718, 0.94333275],\n", | |
" [0.04833132, 0.90382262, 0.04784607],\n", | |
" [0.05597141, 0.88790096, 0.05612763],\n", | |
" [0.05592618, 0.88800959, 0.05606423],\n", | |
" [0.06703399, 0.86579661, 0.0671694 ],\n", | |
" [0.89820077, 0.05205886, 0.04974037],\n", | |
" [0.88823887, 0.05581847, 0.05594266],\n", | |
" [0.03489033, 0.93065291, 0.03445676],\n", | |
" [0.05583749, 0.05689241, 0.8872701 ],\n", | |
" [0.86559214, 0.06709998, 0.06730788],\n", | |
" [0.05701029, 0.05581749, 0.88717222],\n", | |
" [0.92551864, 0.03720181, 0.03727955],\n", | |
" [0.9384334 , 0.03045398, 0.03111261],\n", | |
" [0.04270575, 0.91309559, 0.04419867],\n", | |
" [0.03726732, 0.92472372, 0.03800896],\n", | |
" [0.0486369 , 0.05116647, 0.90019663],\n", | |
" [0.05746142, 0.88500902, 0.05752956],\n", | |
" [0.93795394, 0.0315373 , 0.03050876],\n", | |
" [0.88514716, 0.05650422, 0.05834862],\n", | |
" [0.05660835, 0.8850894 , 0.05830225]])" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ldasco = lda.transform(train_bow)\n", | |
"ldasco" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ecfc60b3", | |
"metadata": {}, | |
"source": [ | |
"####データフレームに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"id": "e4a44fe9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.028150</td>\n", | |
" <td>0.028517</td>\n", | |
" <td>0.943333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.048331</td>\n", | |
" <td>0.903823</td>\n", | |
" <td>0.047846</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.055971</td>\n", | |
" <td>0.887901</td>\n", | |
" <td>0.056128</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.055926</td>\n", | |
" <td>0.888010</td>\n", | |
" <td>0.056064</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.067034</td>\n", | |
" <td>0.865797</td>\n", | |
" <td>0.067169</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.898201</td>\n", | |
" <td>0.052059</td>\n", | |
" <td>0.049740</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.888239</td>\n", | |
" <td>0.055818</td>\n", | |
" <td>0.055943</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.034890</td>\n", | |
" <td>0.930653</td>\n", | |
" <td>0.034457</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.055837</td>\n", | |
" <td>0.056892</td>\n", | |
" <td>0.887270</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.865592</td>\n", | |
" <td>0.067100</td>\n", | |
" <td>0.067308</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>0.057010</td>\n", | |
" <td>0.055817</td>\n", | |
" <td>0.887172</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0.925519</td>\n", | |
" <td>0.037202</td>\n", | |
" <td>0.037280</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0.938433</td>\n", | |
" <td>0.030454</td>\n", | |
" <td>0.031113</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0.042706</td>\n", | |
" <td>0.913096</td>\n", | |
" <td>0.044199</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0.037267</td>\n", | |
" <td>0.924724</td>\n", | |
" <td>0.038009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>0.048637</td>\n", | |
" <td>0.051166</td>\n", | |
" <td>0.900197</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0.057461</td>\n", | |
" <td>0.885009</td>\n", | |
" <td>0.057530</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>0.937954</td>\n", | |
" <td>0.031537</td>\n", | |
" <td>0.030509</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>0.885147</td>\n", | |
" <td>0.056504</td>\n", | |
" <td>0.058349</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>0.056608</td>\n", | |
" <td>0.885089</td>\n", | |
" <td>0.058302</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2\n", | |
"0 0.028150 0.028517 0.943333\n", | |
"1 0.048331 0.903823 0.047846\n", | |
"2 0.055971 0.887901 0.056128\n", | |
"3 0.055926 0.888010 0.056064\n", | |
"4 0.067034 0.865797 0.067169\n", | |
"5 0.898201 0.052059 0.049740\n", | |
"6 0.888239 0.055818 0.055943\n", | |
"7 0.034890 0.930653 0.034457\n", | |
"8 0.055837 0.056892 0.887270\n", | |
"9 0.865592 0.067100 0.067308\n", | |
"10 0.057010 0.055817 0.887172\n", | |
"11 0.925519 0.037202 0.037280\n", | |
"12 0.938433 0.030454 0.031113\n", | |
"13 0.042706 0.913096 0.044199\n", | |
"14 0.037267 0.924724 0.038009\n", | |
"15 0.048637 0.051166 0.900197\n", | |
"16 0.057461 0.885009 0.057530\n", | |
"17 0.937954 0.031537 0.030509\n", | |
"18 0.885147 0.056504 0.058349\n", | |
"19 0.056608 0.885089 0.058302" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ldascore = pd.DataFrame(ldasco,columns=['0','1','2'])\n", | |
"ldascore" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3c515b7b", | |
"metadata": {}, | |
"source": [ | |
"####元のデータフレームに結合" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"id": "675622d3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" <th>word</th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない</td>\n", | |
" <td>0.028150</td>\n", | |
" <td>0.028517</td>\n", | |
" <td>0.943333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>漫才 ツッコミ 担当 たりないふたり ボケ 担当</td>\n", | |
" <td>0.048331</td>\n", | |
" <td>0.903823</td>\n", | |
" <td>0.047846</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>ナナメ 夕暮れ 他 本 出す</td>\n", | |
" <td>0.055971</td>\n", | |
" <td>0.887901</td>\n", | |
" <td>0.056128</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>深夜 一人 バスケットボール スリーポイント 練習</td>\n", | |
" <td>0.055926</td>\n", | |
" <td>0.888010</td>\n", | |
" <td>0.056064</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>プライベート バスケットボール 足 怪我</td>\n", | |
" <td>0.067034</td>\n", | |
" <td>0.865797</td>\n", | |
" <td>0.067169</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>星野源 日本 テレビ 界 希望 思う</td>\n", | |
" <td>0.898201</td>\n", | |
" <td>0.052059</td>\n", | |
" <td>0.049740</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>7</td>\n", | |
" <td>藤井青銅 ピンク ベスト じゃない方 しゃべれる</td>\n", | |
" <td>0.888239</td>\n", | |
" <td>0.055818</td>\n", | |
" <td>0.055943</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" <td>mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる</td>\n", | |
" <td>0.034890</td>\n", | |
" <td>0.930653</td>\n", | |
" <td>0.034457</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>9</td>\n", | |
" <td>茶々 名前 チワワ 犬 飼う</td>\n", | |
" <td>0.055837</td>\n", | |
" <td>0.056892</td>\n", | |
" <td>0.887270</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>10</td>\n", | |
" <td>結婚 直前 浮気 ばれる</td>\n", | |
" <td>0.865592</td>\n", | |
" <td>0.067100</td>\n", | |
" <td>0.067308</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>11</td>\n", | |
" <td>六本木 社長 モンクレール ダウン もらう</td>\n", | |
" <td>0.057010</td>\n", | |
" <td>0.055817</td>\n", | |
" <td>0.887172</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>12</td>\n", | |
" <td>ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ</td>\n", | |
" <td>0.925519</td>\n", | |
" <td>0.037202</td>\n", | |
" <td>0.037280</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>13</td>\n", | |
" <td>ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶</td>\n", | |
" <td>0.938433</td>\n", | |
" <td>0.030454</td>\n", | |
" <td>0.031113</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>14</td>\n", | |
" <td>漫才 ボケ 担当 ラジオ テレビ ボケ ない</td>\n", | |
" <td>0.042706</td>\n", | |
" <td>0.913096</td>\n", | |
" <td>0.044199</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>15</td>\n", | |
" <td>普段 靴下 履く ない 足 裏 象 よう</td>\n", | |
" <td>0.037267</td>\n", | |
" <td>0.924724</td>\n", | |
" <td>0.038009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>16</td>\n", | |
" <td>バカリズム 存在 面白い ウケる スベる ない</td>\n", | |
" <td>0.048637</td>\n", | |
" <td>0.051166</td>\n", | |
" <td>0.900197</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>17</td>\n", | |
" <td>山里亮太 ツッコミ 敵わ ない 思う</td>\n", | |
" <td>0.057461</td>\n", | |
" <td>0.885009</td>\n", | |
" <td>0.057530</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>18</td>\n", | |
" <td>入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ</td>\n", | |
" <td>0.937954</td>\n", | |
" <td>0.031537</td>\n", | |
" <td>0.030509</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>19</td>\n", | |
" <td>ぼる塾 人 トゥース 掛け合い 面白い</td>\n", | |
" <td>0.885147</td>\n", | |
" <td>0.056504</td>\n", | |
" <td>0.058349</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>20</td>\n", | |
" <td>スベる 芸風 スベる 怖い 思う</td>\n", | |
" <td>0.056608</td>\n", | |
" <td>0.885089</td>\n", | |
" <td>0.058302</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid word 0 1 \\\n", | |
"0 1 若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない 0.028150 0.028517 \n", | |
"1 2 漫才 ツッコミ 担当 たりないふたり ボケ 担当 0.048331 0.903823 \n", | |
"2 3 ナナメ 夕暮れ 他 本 出す 0.055971 0.887901 \n", | |
"3 4 深夜 一人 バスケットボール スリーポイント 練習 0.055926 0.888010 \n", | |
"4 5 プライベート バスケットボール 足 怪我 0.067034 0.865797 \n", | |
"5 6 星野源 日本 テレビ 界 希望 思う 0.898201 0.052059 \n", | |
"6 7 藤井青銅 ピンク ベスト じゃない方 しゃべれる 0.888239 0.055818 \n", | |
"7 8 mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる 0.034890 0.930653 \n", | |
"8 9 茶々 名前 チワワ 犬 飼う 0.055837 0.056892 \n", | |
"9 10 結婚 直前 浮気 ばれる 0.865592 0.067100 \n", | |
"10 11 六本木 社長 モンクレール ダウン もらう 0.057010 0.055817 \n", | |
"11 12 ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ 0.925519 0.037202 \n", | |
"12 13 ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶 0.938433 0.030454 \n", | |
"13 14 漫才 ボケ 担当 ラジオ テレビ ボケ ない 0.042706 0.913096 \n", | |
"14 15 普段 靴下 履く ない 足 裏 象 よう 0.037267 0.924724 \n", | |
"15 16 バカリズム 存在 面白い ウケる スベる ない 0.048637 0.051166 \n", | |
"16 17 山里亮太 ツッコミ 敵わ ない 思う 0.057461 0.885009 \n", | |
"17 18 入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ 0.937954 0.031537 \n", | |
"18 19 ぼる塾 人 トゥース 掛け合い 面白い 0.885147 0.056504 \n", | |
"19 20 スベる 芸風 スベる 怖い 思う 0.056608 0.885089 \n", | |
"\n", | |
" 2 \n", | |
"0 0.943333 \n", | |
"1 0.047846 \n", | |
"2 0.056128 \n", | |
"3 0.056064 \n", | |
"4 0.067169 \n", | |
"5 0.049740 \n", | |
"6 0.055943 \n", | |
"7 0.034457 \n", | |
"8 0.887270 \n", | |
"9 0.067308 \n", | |
"10 0.887172 \n", | |
"11 0.037280 \n", | |
"12 0.031113 \n", | |
"13 0.044199 \n", | |
"14 0.038009 \n", | |
"15 0.900197 \n", | |
"16 0.057530 \n", | |
"17 0.030509 \n", | |
"18 0.058349 \n", | |
"19 0.058302 " | |
] | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ldascore1 = train.join(ldascore, how='inner') \n", | |
"ldascore1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "daac7706", | |
"metadata": {}, | |
"source": [ | |
"####縦持ちパラノイア" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"id": "92fd22eb", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" <th>topicid</th>\n", | |
" <th>score</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0.028150</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0.048331</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0.055971</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0.055926</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>0.067034</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>0</td>\n", | |
" <td>0.898201</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>7</td>\n", | |
" <td>0</td>\n", | |
" <td>0.888239</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" <td>0</td>\n", | |
" <td>0.034890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>9</td>\n", | |
" <td>0</td>\n", | |
" <td>0.055837</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>10</td>\n", | |
" <td>0</td>\n", | |
" <td>0.865592</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>11</td>\n", | |
" <td>0</td>\n", | |
" <td>0.057010</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>12</td>\n", | |
" <td>0</td>\n", | |
" <td>0.925519</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>13</td>\n", | |
" <td>0</td>\n", | |
" <td>0.938433</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>14</td>\n", | |
" <td>0</td>\n", | |
" <td>0.042706</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>15</td>\n", | |
" <td>0</td>\n", | |
" <td>0.037267</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>16</td>\n", | |
" <td>0</td>\n", | |
" <td>0.048637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>17</td>\n", | |
" <td>0</td>\n", | |
" <td>0.057461</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>18</td>\n", | |
" <td>0</td>\n", | |
" <td>0.937954</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>19</td>\n", | |
" <td>0</td>\n", | |
" <td>0.885147</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>20</td>\n", | |
" <td>0</td>\n", | |
" <td>0.056608</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0.028517</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>0.903823</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>0.887901</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>0.888010</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>0.865797</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0.052059</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>7</td>\n", | |
" <td>1</td>\n", | |
" <td>0.055818</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>8</td>\n", | |
" <td>1</td>\n", | |
" <td>0.930653</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>9</td>\n", | |
" <td>1</td>\n", | |
" <td>0.056892</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>10</td>\n", | |
" <td>1</td>\n", | |
" <td>0.067100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>30</th>\n", | |
" <td>11</td>\n", | |
" <td>1</td>\n", | |
" <td>0.055817</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31</th>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" <td>0.037202</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>32</th>\n", | |
" <td>13</td>\n", | |
" <td>1</td>\n", | |
" <td>0.030454</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33</th>\n", | |
" <td>14</td>\n", | |
" <td>1</td>\n", | |
" <td>0.913096</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>34</th>\n", | |
" <td>15</td>\n", | |
" <td>1</td>\n", | |
" <td>0.924724</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>35</th>\n", | |
" <td>16</td>\n", | |
" <td>1</td>\n", | |
" <td>0.051166</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>36</th>\n", | |
" <td>17</td>\n", | |
" <td>1</td>\n", | |
" <td>0.885009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>37</th>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" <td>0.031537</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>38</th>\n", | |
" <td>19</td>\n", | |
" <td>1</td>\n", | |
" <td>0.056504</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39</th>\n", | |
" <td>20</td>\n", | |
" <td>1</td>\n", | |
" <td>0.885089</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>0.943333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>41</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0.047846</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>42</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>0.056128</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>43</th>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>0.056064</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>44</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>0.067169</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>45</th>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>0.049740</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>46</th>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>0.055943</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>47</th>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>0.034457</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>48</th>\n", | |
" <td>9</td>\n", | |
" <td>2</td>\n", | |
" <td>0.887270</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49</th>\n", | |
" <td>10</td>\n", | |
" <td>2</td>\n", | |
" <td>0.067308</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50</th>\n", | |
" <td>11</td>\n", | |
" <td>2</td>\n", | |
" <td>0.887172</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51</th>\n", | |
" <td>12</td>\n", | |
" <td>2</td>\n", | |
" <td>0.037280</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>52</th>\n", | |
" <td>13</td>\n", | |
" <td>2</td>\n", | |
" <td>0.031113</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53</th>\n", | |
" <td>14</td>\n", | |
" <td>2</td>\n", | |
" <td>0.044199</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>54</th>\n", | |
" <td>15</td>\n", | |
" <td>2</td>\n", | |
" <td>0.038009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>55</th>\n", | |
" <td>16</td>\n", | |
" <td>2</td>\n", | |
" <td>0.900197</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>56</th>\n", | |
" <td>17</td>\n", | |
" <td>2</td>\n", | |
" <td>0.057530</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>57</th>\n", | |
" <td>18</td>\n", | |
" <td>2</td>\n", | |
" <td>0.030509</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>58</th>\n", | |
" <td>19</td>\n", | |
" <td>2</td>\n", | |
" <td>0.058349</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>59</th>\n", | |
" <td>20</td>\n", | |
" <td>2</td>\n", | |
" <td>0.058302</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid topicid score\n", | |
"0 1 0 0.028150\n", | |
"1 2 0 0.048331\n", | |
"2 3 0 0.055971\n", | |
"3 4 0 0.055926\n", | |
"4 5 0 0.067034\n", | |
"5 6 0 0.898201\n", | |
"6 7 0 0.888239\n", | |
"7 8 0 0.034890\n", | |
"8 9 0 0.055837\n", | |
"9 10 0 0.865592\n", | |
"10 11 0 0.057010\n", | |
"11 12 0 0.925519\n", | |
"12 13 0 0.938433\n", | |
"13 14 0 0.042706\n", | |
"14 15 0 0.037267\n", | |
"15 16 0 0.048637\n", | |
"16 17 0 0.057461\n", | |
"17 18 0 0.937954\n", | |
"18 19 0 0.885147\n", | |
"19 20 0 0.056608\n", | |
"20 1 1 0.028517\n", | |
"21 2 1 0.903823\n", | |
"22 3 1 0.887901\n", | |
"23 4 1 0.888010\n", | |
"24 5 1 0.865797\n", | |
"25 6 1 0.052059\n", | |
"26 7 1 0.055818\n", | |
"27 8 1 0.930653\n", | |
"28 9 1 0.056892\n", | |
"29 10 1 0.067100\n", | |
"30 11 1 0.055817\n", | |
"31 12 1 0.037202\n", | |
"32 13 1 0.030454\n", | |
"33 14 1 0.913096\n", | |
"34 15 1 0.924724\n", | |
"35 16 1 0.051166\n", | |
"36 17 1 0.885009\n", | |
"37 18 1 0.031537\n", | |
"38 19 1 0.056504\n", | |
"39 20 1 0.885089\n", | |
"40 1 2 0.943333\n", | |
"41 2 2 0.047846\n", | |
"42 3 2 0.056128\n", | |
"43 4 2 0.056064\n", | |
"44 5 2 0.067169\n", | |
"45 6 2 0.049740\n", | |
"46 7 2 0.055943\n", | |
"47 8 2 0.034457\n", | |
"48 9 2 0.887270\n", | |
"49 10 2 0.067308\n", | |
"50 11 2 0.887172\n", | |
"51 12 2 0.037280\n", | |
"52 13 2 0.031113\n", | |
"53 14 2 0.044199\n", | |
"54 15 2 0.038009\n", | |
"55 16 2 0.900197\n", | |
"56 17 2 0.057530\n", | |
"57 18 2 0.030509\n", | |
"58 19 2 0.058349\n", | |
"59 20 2 0.058302" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ldascore_vertico = ldascore1[['docid','0','1','2']]\n", | |
"ldascore_vertico = pd.melt(ldascore_vertico, id_vars='docid')\n", | |
"ldascore_vertico.columns =['docid','topicid','score']\n", | |
"ldascore_vertico" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "5b59a5d2", | |
"metadata": {}, | |
"source": [ | |
"####空テーブル作成、元データ用 " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"id": "6211ca34", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" x3 = pd.read_sql(\"\"\"\n", | |
" create multiset table jumbo.aud13_ldascore (\n", | |
" docid integer, \n", | |
" topicid integer, \n", | |
" score float \n", | |
" ) primary index (docid) \n", | |
" \"\"\", conn)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "124734c9", | |
"metadata": {}, | |
"source": [ | |
"####元データの格納" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"id": "3ded80b1", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ldascore_vertico.to_sql('aud13_ldascore',engine,if_exists='append',index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ffd3650b", | |
"metadata": {}, | |
"source": [ | |
"####格納を確認 " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"id": "ab152516", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" <th>topicid</th>\n", | |
" <th>score</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0.028517</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0.028150</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>0.943333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>0.903823</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0.047846</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0.048331</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>0.887901</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0.055971</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>0.056128</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0.055926</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" <td>0.056064</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>0.888010</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>0.067034</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>0.067169</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>0.865797</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0.052059</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>6</td>\n", | |
" <td>0</td>\n", | |
" <td>0.898201</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" <td>0.049740</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>0.055943</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>7</td>\n", | |
" <td>1</td>\n", | |
" <td>0.055818</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>7</td>\n", | |
" <td>0</td>\n", | |
" <td>0.888239</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>8</td>\n", | |
" <td>2</td>\n", | |
" <td>0.034457</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>8</td>\n", | |
" <td>1</td>\n", | |
" <td>0.930653</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>8</td>\n", | |
" <td>0</td>\n", | |
" <td>0.034890</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>9</td>\n", | |
" <td>2</td>\n", | |
" <td>0.887270</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>9</td>\n", | |
" <td>0</td>\n", | |
" <td>0.055837</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>9</td>\n", | |
" <td>1</td>\n", | |
" <td>0.056892</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>10</td>\n", | |
" <td>0</td>\n", | |
" <td>0.865592</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>10</td>\n", | |
" <td>1</td>\n", | |
" <td>0.067100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>10</td>\n", | |
" <td>2</td>\n", | |
" <td>0.067308</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>30</th>\n", | |
" <td>11</td>\n", | |
" <td>0</td>\n", | |
" <td>0.057010</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31</th>\n", | |
" <td>11</td>\n", | |
" <td>2</td>\n", | |
" <td>0.887172</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>32</th>\n", | |
" <td>11</td>\n", | |
" <td>1</td>\n", | |
" <td>0.055817</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33</th>\n", | |
" <td>12</td>\n", | |
" <td>2</td>\n", | |
" <td>0.037280</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>34</th>\n", | |
" <td>12</td>\n", | |
" <td>1</td>\n", | |
" <td>0.037202</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>35</th>\n", | |
" <td>12</td>\n", | |
" <td>0</td>\n", | |
" <td>0.925519</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>36</th>\n", | |
" <td>13</td>\n", | |
" <td>0</td>\n", | |
" <td>0.938433</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>37</th>\n", | |
" <td>13</td>\n", | |
" <td>2</td>\n", | |
" <td>0.031113</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>38</th>\n", | |
" <td>13</td>\n", | |
" <td>1</td>\n", | |
" <td>0.030454</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39</th>\n", | |
" <td>14</td>\n", | |
" <td>2</td>\n", | |
" <td>0.044199</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40</th>\n", | |
" <td>14</td>\n", | |
" <td>1</td>\n", | |
" <td>0.913096</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>41</th>\n", | |
" <td>14</td>\n", | |
" <td>0</td>\n", | |
" <td>0.042706</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>42</th>\n", | |
" <td>15</td>\n", | |
" <td>2</td>\n", | |
" <td>0.038009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>43</th>\n", | |
" <td>15</td>\n", | |
" <td>1</td>\n", | |
" <td>0.924724</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>44</th>\n", | |
" <td>15</td>\n", | |
" <td>0</td>\n", | |
" <td>0.037267</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>45</th>\n", | |
" <td>16</td>\n", | |
" <td>2</td>\n", | |
" <td>0.900197</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>46</th>\n", | |
" <td>16</td>\n", | |
" <td>1</td>\n", | |
" <td>0.051166</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>47</th>\n", | |
" <td>16</td>\n", | |
" <td>0</td>\n", | |
" <td>0.048637</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>48</th>\n", | |
" <td>17</td>\n", | |
" <td>2</td>\n", | |
" <td>0.057530</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49</th>\n", | |
" <td>17</td>\n", | |
" <td>1</td>\n", | |
" <td>0.885009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50</th>\n", | |
" <td>17</td>\n", | |
" <td>0</td>\n", | |
" <td>0.057461</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51</th>\n", | |
" <td>18</td>\n", | |
" <td>2</td>\n", | |
" <td>0.030509</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>52</th>\n", | |
" <td>18</td>\n", | |
" <td>1</td>\n", | |
" <td>0.031537</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53</th>\n", | |
" <td>18</td>\n", | |
" <td>0</td>\n", | |
" <td>0.937954</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>54</th>\n", | |
" <td>19</td>\n", | |
" <td>2</td>\n", | |
" <td>0.058349</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>55</th>\n", | |
" <td>19</td>\n", | |
" <td>0</td>\n", | |
" <td>0.885147</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>56</th>\n", | |
" <td>19</td>\n", | |
" <td>1</td>\n", | |
" <td>0.056504</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>57</th>\n", | |
" <td>20</td>\n", | |
" <td>2</td>\n", | |
" <td>0.058302</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>58</th>\n", | |
" <td>20</td>\n", | |
" <td>1</td>\n", | |
" <td>0.885089</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>59</th>\n", | |
" <td>20</td>\n", | |
" <td>0</td>\n", | |
" <td>0.056608</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid topicid score\n", | |
"0 1 1 0.028517\n", | |
"1 1 0 0.028150\n", | |
"2 1 2 0.943333\n", | |
"3 2 1 0.903823\n", | |
"4 2 2 0.047846\n", | |
"5 2 0 0.048331\n", | |
"6 3 1 0.887901\n", | |
"7 3 0 0.055971\n", | |
"8 3 2 0.056128\n", | |
"9 4 0 0.055926\n", | |
"10 4 2 0.056064\n", | |
"11 4 1 0.888010\n", | |
"12 5 0 0.067034\n", | |
"13 5 2 0.067169\n", | |
"14 5 1 0.865797\n", | |
"15 6 1 0.052059\n", | |
"16 6 0 0.898201\n", | |
"17 6 2 0.049740\n", | |
"18 7 2 0.055943\n", | |
"19 7 1 0.055818\n", | |
"20 7 0 0.888239\n", | |
"21 8 2 0.034457\n", | |
"22 8 1 0.930653\n", | |
"23 8 0 0.034890\n", | |
"24 9 2 0.887270\n", | |
"25 9 0 0.055837\n", | |
"26 9 1 0.056892\n", | |
"27 10 0 0.865592\n", | |
"28 10 1 0.067100\n", | |
"29 10 2 0.067308\n", | |
"30 11 0 0.057010\n", | |
"31 11 2 0.887172\n", | |
"32 11 1 0.055817\n", | |
"33 12 2 0.037280\n", | |
"34 12 1 0.037202\n", | |
"35 12 0 0.925519\n", | |
"36 13 0 0.938433\n", | |
"37 13 2 0.031113\n", | |
"38 13 1 0.030454\n", | |
"39 14 2 0.044199\n", | |
"40 14 1 0.913096\n", | |
"41 14 0 0.042706\n", | |
"42 15 2 0.038009\n", | |
"43 15 1 0.924724\n", | |
"44 15 0 0.037267\n", | |
"45 16 2 0.900197\n", | |
"46 16 1 0.051166\n", | |
"47 16 0 0.048637\n", | |
"48 17 2 0.057530\n", | |
"49 17 1 0.885009\n", | |
"50 17 0 0.057461\n", | |
"51 18 2 0.030509\n", | |
"52 18 1 0.031537\n", | |
"53 18 0 0.937954\n", | |
"54 19 2 0.058349\n", | |
"55 19 0 0.885147\n", | |
"56 19 1 0.056504\n", | |
"57 20 2 0.058302\n", | |
"58 20 1 0.885089\n", | |
"59 20 0 0.056608" | |
] | |
}, | |
"execution_count": 47, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" x4 = pd.read_sql(\"\"\"\n", | |
" select * from aud13_ldascore order by 1 \n", | |
" \"\"\", conn)\n", | |
"x4" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment