Created
September 23, 2022 18:47
-
-
Save yssymmt/f01a0a8e9d37e133a1a04b47cc13dc62 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "69b3e813", | |
"metadata": {}, | |
"source": [ | |
"#10_cosine_similarity" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d28d7b7b", | |
"metadata": {}, | |
"source": [ | |
"####パッケージの読み込み" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "26b3970a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from sqlalchemy import create_engine\n", | |
"import teradatasql\n", | |
"import teradatasqlalchemy\n", | |
"from sklearn.feature_extraction.text import TfidfVectorizer\n", | |
"from sklearn.metrics.pairwise import cosine_similarity,cosine_distances" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "775f0be5", | |
"metadata": {}, | |
"source": [ | |
"####Teradataへの接続、sqlalchemy エンジンを作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "35d3eb81", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"host = \"192.168.999.999\"\n", | |
"user = \"jumbo\"\n", | |
"password = \"mambo\"\n", | |
"connstr = \"teradatasql://{user}:{password}@{host}\".format(host=host, user=user, password=password)\n", | |
"engine = create_engine(connstr)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f97f8c9e", | |
"metadata": {}, | |
"source": [ | |
"####データの取得" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "c4b1f2a4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" <th>word</th>\n", | |
" <th>cat</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>漫才 ツッコミ 担当 たりないふたり ボケ 担当</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>ナナメ 夕暮れ 他 本 出す</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>深夜 一人 バスケットボール スリーポイント 練習</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>プライベート バスケットボール 足 怪我</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>星野源 日本 テレビ 界 希望 思う</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>7</td>\n", | |
" <td>藤井青銅 ピンク ベスト じゃない方 しゃべれる</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" <td>mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>9</td>\n", | |
" <td>茶々 名前 チワワ 犬 飼う</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>10</td>\n", | |
" <td>結婚 直前 浮気 ばれる</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>11</td>\n", | |
" <td>六本木 社長 モンクレール ダウン もらう</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>12</td>\n", | |
" <td>ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>13</td>\n", | |
" <td>ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>14</td>\n", | |
" <td>漫才 ボケ 担当 ラジオ テレビ ボケ ない</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>15</td>\n", | |
" <td>普段 靴下 履く ない 足 裏 象 よう</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>16</td>\n", | |
" <td>バカリズム 存在 面白い ウケる スベる ない</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>17</td>\n", | |
" <td>山里亮太 ツッコミ 敵わ ない 思う</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>18</td>\n", | |
" <td>入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ</td>\n", | |
" <td>若林</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>19</td>\n", | |
" <td>ぼる塾 人 トゥース 掛け合い 面白い</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>20</td>\n", | |
" <td>スベる 芸風 スベる 怖い 思う</td>\n", | |
" <td>春日</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid word cat\n", | |
"0 1 若槻千夏 幾つ テレビ 番組 司会 務める 本番 以外 人見知り 話す ない 若林\n", | |
"1 2 漫才 ツッコミ 担当 たりないふたり ボケ 担当 若林\n", | |
"2 3 ナナメ 夕暮れ 他 本 出す 若林\n", | |
"3 4 深夜 一人 バスケットボール スリーポイント 練習 若林\n", | |
"4 5 プライベート バスケットボール 足 怪我 若林\n", | |
"5 6 星野源 日本 テレビ 界 希望 思う 若林\n", | |
"6 7 藤井青銅 ピンク ベスト じゃない方 しゃべれる 若林\n", | |
"7 8 mc waka 日本武道館 横浜アリーナ 人 歌 ラップ 茶々 入れる 若林\n", | |
"8 9 茶々 名前 チワワ 犬 飼う 春日\n", | |
"9 10 結婚 直前 浮気 ばれる 春日\n", | |
"10 11 六本木 社長 モンクレール ダウン もらう 春日\n", | |
"11 12 ピンク ベスト 着る 胸 張る トゥース 大声 叫ぶ 春日\n", | |
"12 13 ピンク セーター 着る 後輩 芸人 すいません ピンク 着 もらう 挨拶 春日\n", | |
"13 14 漫才 ボケ 担当 ラジオ テレビ ボケ ない 春日\n", | |
"14 15 普段 靴下 履く ない 足 裏 象 よう 春日\n", | |
"15 16 バカリズム 存在 面白い ウケる スベる ない 春日\n", | |
"16 17 山里亮太 ツッコミ 敵わ ない 思う 若林\n", | |
"17 18 入船 出身 築地 出身 嘘 地元 人 お前 入船 ツッコミ 若林\n", | |
"18 19 ぼる塾 人 トゥース 掛け合い 面白い 春日\n", | |
"19 20 スベる 芸風 スベる 怖い 思う 春日" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" train = pd.read_sql(\"\"\"\n", | |
" select * from jumbo.aud11_denorm order by 1 \n", | |
" \"\"\", conn)\n", | |
"train" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "12e8b15e", | |
"metadata": {}, | |
"source": [ | |
"####TfidfVectorizer(最小文字1件、一文字も対象に含める)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "7c701546", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"TfidfVectorizer(norm=None, smooth_idf=False, token_pattern='(?u)\\\\b\\\\w+\\\\b')" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vectorizer = TfidfVectorizer(min_df=1, token_pattern='(?u)\\\\b\\\\w+\\\\b', norm=None, smooth_idf=False)\n", | |
"vectorizer.fit(train['word'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "5fa7e13a", | |
"metadata": {}, | |
"source": [ | |
"####抽出単語の確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "a843eb0a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(['mc', 'waka', 'お前', 'しゃべれる', 'じゃない方', 'すいません', 'たりないふたり', 'ない',\n", | |
" 'ばれる', 'ぼる塾', 'もらう', 'よう', 'ウケる', 'スベる', 'スリーポイント', 'セーター', 'ダウン',\n", | |
" 'チワワ', 'ツッコミ', 'テレビ', 'トゥース', 'ナナメ', 'バカリズム', 'バスケットボール', 'ピンク',\n", | |
" 'プライベート', 'ベスト', 'ボケ', 'モンクレール', 'ラジオ', 'ラップ', '一人', '人', '人見知り',\n", | |
" '他', '以外', '入れる', '入船', '六本木', '出す', '出身', '務める', '叫ぶ', '司会', '名前',\n", | |
" '嘘', '地元', '夕暮れ', '大声', '存在', '履く', '山里亮太', '希望', '幾つ', '張る', '後輩',\n", | |
" '怖い', '思う', '怪我', '担当', '挨拶', '掛け合い', '敵わ', '日本', '日本武道館', '星野源',\n", | |
" '普段', '本', '本番', '横浜アリーナ', '歌', '浮気', '深夜', '漫才', '犬', '界', '番組',\n", | |
" '直前', '着', '着る', '社長', '築地', '結婚', '練習', '胸', '芸人', '芸風', '若槻千夏',\n", | |
" '茶々', '藤井青銅', '裏', '話す', '象', '足', '面白い', '靴下', '飼う'], dtype=object)" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"vectorizer.get_feature_names_out()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "261b8fb6", | |
"metadata": {}, | |
"source": [ | |
"####ベクトルに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "c156ae3a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<20x97 sparse matrix of type '<class 'numpy.float64'>'\n", | |
"\twith 123 stored elements in Compressed Sparse Row format>" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_tfidf = vectorizer.transform(train['word'])\n", | |
"train_tfidf" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "9f9d7f10", | |
"metadata": {}, | |
"source": [ | |
"####データの確認" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "4ef18bf5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mc</th>\n", | |
" <th>waka</th>\n", | |
" <th>お前</th>\n", | |
" <th>しゃべれる</th>\n", | |
" <th>じゃない方</th>\n", | |
" <th>すいません</th>\n", | |
" <th>たりないふたり</th>\n", | |
" <th>ない</th>\n", | |
" <th>ばれる</th>\n", | |
" <th>ぼる塾</th>\n", | |
" <th>...</th>\n", | |
" <th>若槻千夏</th>\n", | |
" <th>茶々</th>\n", | |
" <th>藤井青銅</th>\n", | |
" <th>裏</th>\n", | |
" <th>話す</th>\n", | |
" <th>象</th>\n", | |
" <th>足</th>\n", | |
" <th>面白い</th>\n", | |
" <th>靴下</th>\n", | |
" <th>飼う</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>2.386294</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.302585</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>3.995732</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.302585</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.302585</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>2.386294</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>2.386294</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>3.302585</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>2.386294</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.302585</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>2.386294</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.995732</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.302585</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>20 rows × 97 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mc waka お前 しゃべれる じゃない方 すいません たりないふたり \\\n", | |
"0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.995732 \n", | |
"2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"5 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"6 0.000000 0.000000 0.000000 3.995732 3.995732 0.000000 0.000000 \n", | |
"7 3.995732 3.995732 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"8 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"9 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"12 0.000000 0.000000 0.000000 0.000000 0.000000 3.995732 0.000000 \n", | |
"13 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"14 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"15 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"16 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"17 0.000000 0.000000 3.995732 0.000000 0.000000 0.000000 0.000000 \n", | |
"18 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"19 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"\n", | |
" ない ばれる ぼる塾 ... 若槻千夏 茶々 藤井青銅 裏 \\\n", | |
"0 2.386294 0.000000 0.000000 ... 3.995732 0.000000 0.000000 0.000000 \n", | |
"1 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"2 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"5 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"6 0.000000 0.000000 0.000000 ... 0.000000 0.000000 3.995732 0.000000 \n", | |
"7 0.000000 0.000000 0.000000 ... 0.000000 3.302585 0.000000 0.000000 \n", | |
"8 0.000000 0.000000 0.000000 ... 0.000000 3.302585 0.000000 0.000000 \n", | |
"9 0.000000 3.995732 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"12 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"13 2.386294 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"14 2.386294 0.000000 0.000000 ... 0.000000 0.000000 0.000000 3.995732 \n", | |
"15 2.386294 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"16 2.386294 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"17 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"18 0.000000 0.000000 3.995732 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"19 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"\n", | |
" 話す 象 足 面白い 靴下 飼う \n", | |
"0 3.995732 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 3.302585 0.000000 0.000000 0.000000 \n", | |
"5 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"6 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"7 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"8 0.000000 0.000000 0.000000 0.000000 0.000000 3.995732 \n", | |
"9 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"12 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"13 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"14 0.000000 3.995732 3.302585 0.000000 3.995732 0.000000 \n", | |
"15 0.000000 0.000000 0.000000 3.302585 0.000000 0.000000 \n", | |
"16 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"17 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"18 0.000000 0.000000 0.000000 3.302585 0.000000 0.000000 \n", | |
"19 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"\n", | |
"[20 rows x 97 columns]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_tfidf_df = pd.DataFrame(train_tfidf.toarray(),columns=vectorizer.get_feature_names_out())\n", | |
"train_tfidf_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "236fb3fb", | |
"metadata": {}, | |
"source": [ | |
"####コサイン類似度の計算" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "69c74a85", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[1. , 0. , 0. , 0. , 0. ,\n", | |
" 0.07440527, 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0.11476802, 0.04276066,\n", | |
" 0.05220569, 0.06145693, 0. , 0. , 0. ],\n", | |
" [0. , 1. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0.58890257, 0. ,\n", | |
" 0. , 0.12007167, 0.06135726, 0. , 0. ],\n", | |
" [0. , 0. , 1. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0. , 0. , 0. , 1. , 0.17205609,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0. , 0. , 0. , 0.17205609, 1. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0.1403326 ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0.07440527, 0. , 0. , 0. , 0. ,\n", | |
" 1. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0.09563963, 0. ,\n", | |
" 0. , 0.12670101, 0. , 0. , 0.10200266],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 1. , 0. , 0. , 0. ,\n", | |
" 0. , 0.22979381, 0.16654587, 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 1. , 0.11018059, 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0.05078908, 0.09300635, 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0.11018059, 1. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 1. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 1. , 0. , 0.10258686, 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0.22979381, 0. , 0. , 0. ,\n", | |
" 0. , 1. , 0.21982674, 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0.13504423, 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0.16654587, 0. , 0. , 0. ,\n", | |
" 0.10258686, 0.21982674, 1. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ],\n", | |
" [0.11476802, 0.58890257, 0. , 0. , 0. ,\n", | |
" 0.09563963, 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 1. , 0.05496404,\n", | |
" 0.06710456, 0.078996 , 0. , 0. , 0. ],\n", | |
" [0.04276066, 0. , 0. , 0. , 0.1403326 ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0.05496404, 1. ,\n", | |
" 0.061854 , 0.07281499, 0. , 0. , 0. ],\n", | |
" [0.05220569, 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0.06710456, 0.061854 ,\n", | |
" 1. , 0.08889845, 0. , 0.15933889, 0.27416729],\n", | |
" [0.06145693, 0.12007167, 0. , 0. , 0. ,\n", | |
" 0.12670101, 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0.078996 , 0.07281499,\n", | |
" 0.08889845, 1. , 0.07882383, 0. , 0.12418342],\n", | |
" [0. , 0.06135726, 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0.05078908, 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0.07882383, 1. , 0.07376072, 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0.09300635, 0. , 0. ,\n", | |
" 0. , 0.13504423, 0. , 0. , 0. ,\n", | |
" 0.15933889, 0. , 0.07376072, 1. , 0. ],\n", | |
" [0. , 0. , 0. , 0. , 0. ,\n", | |
" 0.10200266, 0. , 0. , 0. , 0. ,\n", | |
" 0. , 0. , 0. , 0. , 0. ,\n", | |
" 0.27416729, 0.12418342, 0. , 0. , 1. ]])" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result=cosine_similarity(train_tfidf)\n", | |
"result" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "2fe86dc1", | |
"metadata": {}, | |
"source": [ | |
"####docid列を抽出" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "c7c0a288", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>17</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid\n", | |
"0 1\n", | |
"1 2\n", | |
"2 3\n", | |
"3 4\n", | |
"4 5\n", | |
"5 6\n", | |
"6 7\n", | |
"7 8\n", | |
"8 9\n", | |
"9 10\n", | |
"10 11\n", | |
"11 12\n", | |
"12 13\n", | |
"13 14\n", | |
"14 15\n", | |
"15 16\n", | |
"16 17\n", | |
"17 18\n", | |
"18 19\n", | |
"19 20" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfcosdocid = pd.DataFrame(train,columns=['docid'])\n", | |
"dfcosdocid" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0ee0b7c1", | |
"metadata": {}, | |
"source": [ | |
"####docid列を数値型から文字型に変換し、横に展開し、リストに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "30671b6d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['1',\n", | |
" '2',\n", | |
" '3',\n", | |
" '4',\n", | |
" '5',\n", | |
" '6',\n", | |
" '7',\n", | |
" '8',\n", | |
" '9',\n", | |
" '10',\n", | |
" '11',\n", | |
" '12',\n", | |
" '13',\n", | |
" '14',\n", | |
" '15',\n", | |
" '16',\n", | |
" '17',\n", | |
" '18',\n", | |
" '19',\n", | |
" '20']" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfcosdocid_str = dfcosdocid['docid'].astype(str)\n", | |
"a_dfcosdocid=dfcosdocid_str.values.T.tolist()\n", | |
"a_dfcosdocid" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "9a43884c", | |
"metadata": {}, | |
"source": [ | |
"####データフレームに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "0ab781ba", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>10</th>\n", | |
" <th>11</th>\n", | |
" <th>12</th>\n", | |
" <th>13</th>\n", | |
" <th>14</th>\n", | |
" <th>15</th>\n", | |
" <th>16</th>\n", | |
" <th>17</th>\n", | |
" <th>18</th>\n", | |
" <th>19</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.074405</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.114768</td>\n", | |
" <td>0.042761</td>\n", | |
" <td>0.052206</td>\n", | |
" <td>0.061457</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.588903</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.120072</td>\n", | |
" <td>0.061357</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.172056</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.172056</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.140333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.074405</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.095640</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.126701</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102003</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.229794</td>\n", | |
" <td>0.166546</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.110181</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.050789</td>\n", | |
" <td>0.093006</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.110181</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102587</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.229794</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.219827</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.135044</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.166546</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.102587</td>\n", | |
" <td>0.219827</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0.114768</td>\n", | |
" <td>0.588903</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.095640</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.054964</td>\n", | |
" <td>0.067105</td>\n", | |
" <td>0.078996</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0.042761</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.140333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.054964</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.061854</td>\n", | |
" <td>0.072815</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>0.052206</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.067105</td>\n", | |
" <td>0.061854</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.088898</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.159339</td>\n", | |
" <td>0.274167</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0.061457</td>\n", | |
" <td>0.120072</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.126701</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.078996</td>\n", | |
" <td>0.072815</td>\n", | |
" <td>0.088898</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.078824</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.124183</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.061357</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.050789</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.078824</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.073761</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.093006</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.135044</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.159339</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.073761</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102003</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.274167</td>\n", | |
" <td>0.124183</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3 4 5 6 7 \\\n", | |
"0 1.000000 0.000000 0.0 0.000000 0.000000 0.074405 0.000000 0.000000 \n", | |
"1 0.000000 1.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"2 0.000000 0.000000 1.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.0 1.000000 0.172056 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 0.0 0.172056 1.000000 0.000000 0.000000 0.000000 \n", | |
"5 0.074405 0.000000 0.0 0.000000 0.000000 1.000000 0.000000 0.000000 \n", | |
"6 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 1.000000 0.000000 \n", | |
"7 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 1.000000 \n", | |
"8 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.110181 \n", | |
"9 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.229794 0.000000 \n", | |
"12 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.166546 0.000000 \n", | |
"13 0.114768 0.588903 0.0 0.000000 0.000000 0.095640 0.000000 0.000000 \n", | |
"14 0.042761 0.000000 0.0 0.000000 0.140333 0.000000 0.000000 0.000000 \n", | |
"15 0.052206 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"16 0.061457 0.120072 0.0 0.000000 0.000000 0.126701 0.000000 0.000000 \n", | |
"17 0.000000 0.061357 0.0 0.000000 0.000000 0.000000 0.000000 0.050789 \n", | |
"18 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.093006 \n", | |
"19 0.000000 0.000000 0.0 0.000000 0.000000 0.102003 0.000000 0.000000 \n", | |
"\n", | |
" 8 9 10 11 12 13 14 15 \\\n", | |
"0 0.000000 0.0 0.000000 0.000000 0.000000 0.114768 0.042761 0.052206 \n", | |
"1 0.000000 0.0 0.000000 0.000000 0.000000 0.588903 0.000000 0.000000 \n", | |
"2 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.140333 0.000000 \n", | |
"5 0.000000 0.0 0.000000 0.000000 0.000000 0.095640 0.000000 0.000000 \n", | |
"6 0.000000 0.0 0.000000 0.229794 0.166546 0.000000 0.000000 0.000000 \n", | |
"7 0.110181 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"8 1.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"9 0.000000 1.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.0 1.000000 0.000000 0.102587 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.0 0.000000 1.000000 0.219827 0.000000 0.000000 0.000000 \n", | |
"12 0.000000 0.0 0.102587 0.219827 1.000000 0.000000 0.000000 0.000000 \n", | |
"13 0.000000 0.0 0.000000 0.000000 0.000000 1.000000 0.054964 0.067105 \n", | |
"14 0.000000 0.0 0.000000 0.000000 0.000000 0.054964 1.000000 0.061854 \n", | |
"15 0.000000 0.0 0.000000 0.000000 0.000000 0.067105 0.061854 1.000000 \n", | |
"16 0.000000 0.0 0.000000 0.000000 0.000000 0.078996 0.072815 0.088898 \n", | |
"17 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"18 0.000000 0.0 0.000000 0.135044 0.000000 0.000000 0.000000 0.159339 \n", | |
"19 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.274167 \n", | |
"\n", | |
" 16 17 18 19 \n", | |
"0 0.061457 0.000000 0.000000 0.000000 \n", | |
"1 0.120072 0.061357 0.000000 0.000000 \n", | |
"2 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 0.000000 0.000000 \n", | |
"5 0.126701 0.000000 0.000000 0.102003 \n", | |
"6 0.000000 0.000000 0.000000 0.000000 \n", | |
"7 0.000000 0.050789 0.093006 0.000000 \n", | |
"8 0.000000 0.000000 0.000000 0.000000 \n", | |
"9 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.135044 0.000000 \n", | |
"12 0.000000 0.000000 0.000000 0.000000 \n", | |
"13 0.078996 0.000000 0.000000 0.000000 \n", | |
"14 0.072815 0.000000 0.000000 0.000000 \n", | |
"15 0.088898 0.000000 0.159339 0.274167 \n", | |
"16 1.000000 0.078824 0.000000 0.124183 \n", | |
"17 0.078824 1.000000 0.073761 0.000000 \n", | |
"18 0.000000 0.073761 1.000000 0.000000 \n", | |
"19 0.124183 0.000000 0.000000 1.000000 " | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfcos = pd.DataFrame(result)\n", | |
"dfcos" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "c8ac63bc", | |
"metadata": {}, | |
"source": [ | |
"####列名を追加" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "835f4877", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>10</th>\n", | |
" <th>11</th>\n", | |
" <th>12</th>\n", | |
" <th>13</th>\n", | |
" <th>14</th>\n", | |
" <th>15</th>\n", | |
" <th>16</th>\n", | |
" <th>17</th>\n", | |
" <th>18</th>\n", | |
" <th>19</th>\n", | |
" <th>20</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.074405</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.114768</td>\n", | |
" <td>0.042761</td>\n", | |
" <td>0.052206</td>\n", | |
" <td>0.061457</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.588903</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.120072</td>\n", | |
" <td>0.061357</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.172056</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.172056</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.140333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.074405</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.095640</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.126701</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102003</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.229794</td>\n", | |
" <td>0.166546</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.110181</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.050789</td>\n", | |
" <td>0.093006</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.110181</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102587</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.229794</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.219827</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.135044</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.166546</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.102587</td>\n", | |
" <td>0.219827</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0.114768</td>\n", | |
" <td>0.588903</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.095640</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.054964</td>\n", | |
" <td>0.067105</td>\n", | |
" <td>0.078996</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0.042761</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.140333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.054964</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.061854</td>\n", | |
" <td>0.072815</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>0.052206</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.067105</td>\n", | |
" <td>0.061854</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.088898</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.159339</td>\n", | |
" <td>0.274167</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0.061457</td>\n", | |
" <td>0.120072</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.126701</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.078996</td>\n", | |
" <td>0.072815</td>\n", | |
" <td>0.088898</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.078824</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.124183</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.061357</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.050789</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.078824</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.073761</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.093006</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.135044</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.159339</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.073761</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102003</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.274167</td>\n", | |
" <td>0.124183</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 1 2 3 4 5 6 7 8 \\\n", | |
"0 1.000000 0.000000 0.0 0.000000 0.000000 0.074405 0.000000 0.000000 \n", | |
"1 0.000000 1.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"2 0.000000 0.000000 1.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.0 1.000000 0.172056 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 0.0 0.172056 1.000000 0.000000 0.000000 0.000000 \n", | |
"5 0.074405 0.000000 0.0 0.000000 0.000000 1.000000 0.000000 0.000000 \n", | |
"6 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 1.000000 0.000000 \n", | |
"7 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 1.000000 \n", | |
"8 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.110181 \n", | |
"9 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.229794 0.000000 \n", | |
"12 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.166546 0.000000 \n", | |
"13 0.114768 0.588903 0.0 0.000000 0.000000 0.095640 0.000000 0.000000 \n", | |
"14 0.042761 0.000000 0.0 0.000000 0.140333 0.000000 0.000000 0.000000 \n", | |
"15 0.052206 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"16 0.061457 0.120072 0.0 0.000000 0.000000 0.126701 0.000000 0.000000 \n", | |
"17 0.000000 0.061357 0.0 0.000000 0.000000 0.000000 0.000000 0.050789 \n", | |
"18 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.093006 \n", | |
"19 0.000000 0.000000 0.0 0.000000 0.000000 0.102003 0.000000 0.000000 \n", | |
"\n", | |
" 9 10 11 12 13 14 15 16 \\\n", | |
"0 0.000000 0.0 0.000000 0.000000 0.000000 0.114768 0.042761 0.052206 \n", | |
"1 0.000000 0.0 0.000000 0.000000 0.000000 0.588903 0.000000 0.000000 \n", | |
"2 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.140333 0.000000 \n", | |
"5 0.000000 0.0 0.000000 0.000000 0.000000 0.095640 0.000000 0.000000 \n", | |
"6 0.000000 0.0 0.000000 0.229794 0.166546 0.000000 0.000000 0.000000 \n", | |
"7 0.110181 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"8 1.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"9 0.000000 1.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.0 1.000000 0.000000 0.102587 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.0 0.000000 1.000000 0.219827 0.000000 0.000000 0.000000 \n", | |
"12 0.000000 0.0 0.102587 0.219827 1.000000 0.000000 0.000000 0.000000 \n", | |
"13 0.000000 0.0 0.000000 0.000000 0.000000 1.000000 0.054964 0.067105 \n", | |
"14 0.000000 0.0 0.000000 0.000000 0.000000 0.054964 1.000000 0.061854 \n", | |
"15 0.000000 0.0 0.000000 0.000000 0.000000 0.067105 0.061854 1.000000 \n", | |
"16 0.000000 0.0 0.000000 0.000000 0.000000 0.078996 0.072815 0.088898 \n", | |
"17 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"18 0.000000 0.0 0.000000 0.135044 0.000000 0.000000 0.000000 0.159339 \n", | |
"19 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.274167 \n", | |
"\n", | |
" 17 18 19 20 \n", | |
"0 0.061457 0.000000 0.000000 0.000000 \n", | |
"1 0.120072 0.061357 0.000000 0.000000 \n", | |
"2 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 0.000000 0.000000 \n", | |
"5 0.126701 0.000000 0.000000 0.102003 \n", | |
"6 0.000000 0.000000 0.000000 0.000000 \n", | |
"7 0.000000 0.050789 0.093006 0.000000 \n", | |
"8 0.000000 0.000000 0.000000 0.000000 \n", | |
"9 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.135044 0.000000 \n", | |
"12 0.000000 0.000000 0.000000 0.000000 \n", | |
"13 0.078996 0.000000 0.000000 0.000000 \n", | |
"14 0.072815 0.000000 0.000000 0.000000 \n", | |
"15 0.088898 0.000000 0.159339 0.274167 \n", | |
"16 1.000000 0.078824 0.000000 0.124183 \n", | |
"17 0.078824 1.000000 0.073761 0.000000 \n", | |
"18 0.000000 0.073761 1.000000 0.000000 \n", | |
"19 0.124183 0.000000 0.000000 1.000000 " | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfcos.columns = a_dfcosdocid\n", | |
"dfcos" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "46241681", | |
"metadata": {}, | |
"source": [ | |
"####docidを結合" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "f5d00260", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>10</th>\n", | |
" <th>...</th>\n", | |
" <th>12</th>\n", | |
" <th>13</th>\n", | |
" <th>14</th>\n", | |
" <th>15</th>\n", | |
" <th>16</th>\n", | |
" <th>17</th>\n", | |
" <th>18</th>\n", | |
" <th>19</th>\n", | |
" <th>20</th>\n", | |
" <th>docid</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.074405</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.114768</td>\n", | |
" <td>0.042761</td>\n", | |
" <td>0.052206</td>\n", | |
" <td>0.061457</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.588903</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.120072</td>\n", | |
" <td>0.061357</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.172056</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.172056</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.140333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.074405</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.095640</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.126701</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102003</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.229794</td>\n", | |
" <td>0.166546</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.110181</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.050789</td>\n", | |
" <td>0.093006</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.110181</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102587</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.229794</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.219827</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.135044</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.166546</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.219827</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0.114768</td>\n", | |
" <td>0.588903</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.095640</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.054964</td>\n", | |
" <td>0.067105</td>\n", | |
" <td>0.078996</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0.042761</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.140333</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.054964</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.061854</td>\n", | |
" <td>0.072815</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>0.052206</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.067105</td>\n", | |
" <td>0.061854</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.088898</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.159339</td>\n", | |
" <td>0.274167</td>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0.061457</td>\n", | |
" <td>0.120072</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.126701</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.078996</td>\n", | |
" <td>0.072815</td>\n", | |
" <td>0.088898</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.078824</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.124183</td>\n", | |
" <td>17</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.061357</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.050789</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.078824</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.073761</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.093006</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.135044</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.159339</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.073761</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.102003</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.274167</td>\n", | |
" <td>0.124183</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>20 rows × 21 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 1 2 3 4 5 6 7 8 \\\n", | |
"0 1.000000 0.000000 0.0 0.000000 0.000000 0.074405 0.000000 0.000000 \n", | |
"1 0.000000 1.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"2 0.000000 0.000000 1.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.000000 0.0 1.000000 0.172056 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.000000 0.0 0.172056 1.000000 0.000000 0.000000 0.000000 \n", | |
"5 0.074405 0.000000 0.0 0.000000 0.000000 1.000000 0.000000 0.000000 \n", | |
"6 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 1.000000 0.000000 \n", | |
"7 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 1.000000 \n", | |
"8 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.110181 \n", | |
"9 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.229794 0.000000 \n", | |
"12 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.166546 0.000000 \n", | |
"13 0.114768 0.588903 0.0 0.000000 0.000000 0.095640 0.000000 0.000000 \n", | |
"14 0.042761 0.000000 0.0 0.000000 0.140333 0.000000 0.000000 0.000000 \n", | |
"15 0.052206 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"16 0.061457 0.120072 0.0 0.000000 0.000000 0.126701 0.000000 0.000000 \n", | |
"17 0.000000 0.061357 0.0 0.000000 0.000000 0.000000 0.000000 0.050789 \n", | |
"18 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.093006 \n", | |
"19 0.000000 0.000000 0.0 0.000000 0.000000 0.102003 0.000000 0.000000 \n", | |
"\n", | |
" 9 10 ... 12 13 14 15 16 \\\n", | |
"0 0.000000 0.0 ... 0.000000 0.000000 0.114768 0.042761 0.052206 \n", | |
"1 0.000000 0.0 ... 0.000000 0.000000 0.588903 0.000000 0.000000 \n", | |
"2 0.000000 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"3 0.000000 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"4 0.000000 0.0 ... 0.000000 0.000000 0.000000 0.140333 0.000000 \n", | |
"5 0.000000 0.0 ... 0.000000 0.000000 0.095640 0.000000 0.000000 \n", | |
"6 0.000000 0.0 ... 0.229794 0.166546 0.000000 0.000000 0.000000 \n", | |
"7 0.110181 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"8 1.000000 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"9 0.000000 1.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"10 0.000000 0.0 ... 0.000000 0.102587 0.000000 0.000000 0.000000 \n", | |
"11 0.000000 0.0 ... 1.000000 0.219827 0.000000 0.000000 0.000000 \n", | |
"12 0.000000 0.0 ... 0.219827 1.000000 0.000000 0.000000 0.000000 \n", | |
"13 0.000000 0.0 ... 0.000000 0.000000 1.000000 0.054964 0.067105 \n", | |
"14 0.000000 0.0 ... 0.000000 0.000000 0.054964 1.000000 0.061854 \n", | |
"15 0.000000 0.0 ... 0.000000 0.000000 0.067105 0.061854 1.000000 \n", | |
"16 0.000000 0.0 ... 0.000000 0.000000 0.078996 0.072815 0.088898 \n", | |
"17 0.000000 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"18 0.000000 0.0 ... 0.135044 0.000000 0.000000 0.000000 0.159339 \n", | |
"19 0.000000 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.274167 \n", | |
"\n", | |
" 17 18 19 20 docid \n", | |
"0 0.061457 0.000000 0.000000 0.000000 1 \n", | |
"1 0.120072 0.061357 0.000000 0.000000 2 \n", | |
"2 0.000000 0.000000 0.000000 0.000000 3 \n", | |
"3 0.000000 0.000000 0.000000 0.000000 4 \n", | |
"4 0.000000 0.000000 0.000000 0.000000 5 \n", | |
"5 0.126701 0.000000 0.000000 0.102003 6 \n", | |
"6 0.000000 0.000000 0.000000 0.000000 7 \n", | |
"7 0.000000 0.050789 0.093006 0.000000 8 \n", | |
"8 0.000000 0.000000 0.000000 0.000000 9 \n", | |
"9 0.000000 0.000000 0.000000 0.000000 10 \n", | |
"10 0.000000 0.000000 0.000000 0.000000 11 \n", | |
"11 0.000000 0.000000 0.135044 0.000000 12 \n", | |
"12 0.000000 0.000000 0.000000 0.000000 13 \n", | |
"13 0.078996 0.000000 0.000000 0.000000 14 \n", | |
"14 0.072815 0.000000 0.000000 0.000000 15 \n", | |
"15 0.088898 0.000000 0.159339 0.274167 16 \n", | |
"16 1.000000 0.078824 0.000000 0.124183 17 \n", | |
"17 0.078824 1.000000 0.073761 0.000000 18 \n", | |
"18 0.000000 0.073761 1.000000 0.000000 19 \n", | |
"19 0.124183 0.000000 0.000000 1.000000 20 \n", | |
"\n", | |
"[20 rows x 21 columns]" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfcos_a = pd.merge(dfcos, dfcosdocid, how='inner', left_index=True, right_index=True)\n", | |
"dfcos_a " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "da2993ba", | |
"metadata": {}, | |
"source": [ | |
"####縦持ちパラノイア" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "ded2ab75", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>docid1</th>\n", | |
" <th>docid2</th>\n", | |
" <th>score</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>395</th>\n", | |
" <td>16</td>\n", | |
" <td>20</td>\n", | |
" <td>0.274167</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>396</th>\n", | |
" <td>17</td>\n", | |
" <td>20</td>\n", | |
" <td>0.124183</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>397</th>\n", | |
" <td>18</td>\n", | |
" <td>20</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>398</th>\n", | |
" <td>19</td>\n", | |
" <td>20</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>399</th>\n", | |
" <td>20</td>\n", | |
" <td>20</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>400 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" docid1 docid2 score\n", | |
"0 1 1 1.000000\n", | |
"1 2 1 0.000000\n", | |
"2 3 1 0.000000\n", | |
"3 4 1 0.000000\n", | |
"4 5 1 0.000000\n", | |
".. ... ... ...\n", | |
"395 16 20 0.274167\n", | |
"396 17 20 0.124183\n", | |
"397 18 20 0.000000\n", | |
"398 19 20 0.000000\n", | |
"399 20 20 1.000000\n", | |
"\n", | |
"[400 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dfcos_vertico = pd.melt(dfcos_a, id_vars='docid')\n", | |
"dfcos_vertico.columns =['docid1','docid2','score']\n", | |
"dfcos_vertico" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment