Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jshirius/495fd41acbb91291d75a9f414e63c4e4 to your computer and use it in GitHub Desktop.
Save jshirius/495fd41acbb91291d75a9f414e63c4e4 to your computer and use it in GitHub Desktop.
テックキャンプの関連キーワードで共起語ネットワークを作ってみるとどうなる?
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## キーワードの共起語ネットワークを作る\n",
"- 関連キーワードツールで作成した関連キーワードのCSVを読み込む\n",
"- 関連キーワードを元にpyvisを使って共起語ネットワークを作ってみる\n",
"- その結果、ネットワークから重要と思われるキーワード「評判」「料金」「転職」が認知できたといえる"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import pprint\n",
"import datetime\n",
"import pandas as pd\n",
"import os \n",
"import gspread\n",
"import math\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>No</th>\n",
" <th>階層</th>\n",
" <th>単語数</th>\n",
" <th>キーワード</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>テックキャンプ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>101</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>テックキャンプ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>102</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 料金</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>103</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>104</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ まこなり</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>105</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 炎上</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>106</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 転職</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>107</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ ひどい</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>108</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>テックキャンプとは</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>109</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 就職先</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 転職できない</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>200</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 料金</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>201</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 料金改定</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>202</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 料金 分割</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>203</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 料金 オンライン</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>300</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>301</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 評判 悪い</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>302</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 評判 転職</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>303</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ イナズマ 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>304</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ デザイナー 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>305</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 名古屋 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>306</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ プログラミング教養 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>307</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ オンライン 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>308</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 梅田 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>309</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 卒業生 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>400</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ まこなり</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>401</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ まこなり 炎上</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>402</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ まこなり社長</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>403</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>マコなり社長 テックキャンプ 評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>404</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>マコなり社長 テックキャンプ 値段</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>748</th>\n",
" <td>71200</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 2週間前</td>\n",
" </tr>\n",
" <tr>\n",
" <th>749</th>\n",
" <td>71300</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 2ヶ月</td>\n",
" </tr>\n",
" <tr>\n",
" <th>750</th>\n",
" <td>71400</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 30代</td>\n",
" </tr>\n",
" <tr>\n",
" <th>751</th>\n",
" <td>71500</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 35歳</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752</th>\n",
" <td>71600</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 30代後半</td>\n",
" </tr>\n",
" <tr>\n",
" <th>753</th>\n",
" <td>71700</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 3ヶ月</td>\n",
" </tr>\n",
" <tr>\n",
" <th>754</th>\n",
" <td>71800</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 36歳</td>\n",
" </tr>\n",
" <tr>\n",
" <th>755</th>\n",
" <td>71900</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 30歳</td>\n",
" </tr>\n",
" <tr>\n",
" <th>756</th>\n",
" <td>72000</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ 転職 30代</td>\n",
" </tr>\n",
" <tr>\n",
" <th>757</th>\n",
" <td>72100</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 40代</td>\n",
" </tr>\n",
" <tr>\n",
" <th>758</th>\n",
" <td>72200</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 45歳</td>\n",
" </tr>\n",
" <tr>\n",
" <th>759</th>\n",
" <td>72300</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>760</th>\n",
" <td>72400</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 50代</td>\n",
" </tr>\n",
" <tr>\n",
" <th>761</th>\n",
" <td>72500</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 5ch</td>\n",
" </tr>\n",
" <tr>\n",
" <th>762</th>\n",
" <td>72600</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 5ちゃんねる</td>\n",
" </tr>\n",
" <tr>\n",
" <th>763</th>\n",
" <td>72700</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 5万円</td>\n",
" </tr>\n",
" <tr>\n",
" <th>764</th>\n",
" <td>72800</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 600時間</td>\n",
" </tr>\n",
" <tr>\n",
" <th>765</th>\n",
" <td>72900</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 7つの闇</td>\n",
" </tr>\n",
" <tr>\n",
" <th>766</th>\n",
" <td>73000</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 7日間</td>\n",
" </tr>\n",
" <tr>\n",
" <th>767</th>\n",
" <td>73100</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 70万</td>\n",
" </tr>\n",
" <tr>\n",
" <th>768</th>\n",
" <td>73200</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 83期</td>\n",
" </tr>\n",
" <tr>\n",
" <th>769</th>\n",
" <td>73300</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 80期</td>\n",
" </tr>\n",
" <tr>\n",
" <th>770</th>\n",
" <td>73400</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 81期</td>\n",
" </tr>\n",
" <tr>\n",
" <th>771</th>\n",
" <td>73500</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>772</th>\n",
" <td>73600</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>テックキャンプ チーム開発 80期</td>\n",
" </tr>\n",
" <tr>\n",
" <th>773</th>\n",
" <td>73700</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>774</th>\n",
" <td>73800</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 99期</td>\n",
" </tr>\n",
" <tr>\n",
" <th>775</th>\n",
" <td>73900</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 95期</td>\n",
" </tr>\n",
" <tr>\n",
" <th>776</th>\n",
" <td>74000</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 98期</td>\n",
" </tr>\n",
" <tr>\n",
" <th>777</th>\n",
" <td>74100</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>テックキャンプ 96期</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>778 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" No 階層 単語数 キーワード\n",
"0 100 1 1 テックキャンプ\n",
"1 101 2 1 テックキャンプ\n",
"2 102 2 2 テックキャンプ 料金\n",
"3 103 2 2 テックキャンプ 評判\n",
"4 104 2 2 テックキャンプ まこなり\n",
"5 105 2 2 テックキャンプ 炎上\n",
"6 106 2 2 テックキャンプ 転職\n",
"7 107 2 2 テックキャンプ ひどい\n",
"8 108 2 1 テックキャンプとは\n",
"9 109 2 2 テックキャンプ 就職先\n",
"10 110 2 2 テックキャンプ 転職できない\n",
"11 200 1 2 テックキャンプ 料金\n",
"12 201 2 2 テックキャンプ 料金改定\n",
"13 202 2 3 テックキャンプ 料金 分割\n",
"14 203 2 3 テックキャンプ 料金 オンライン\n",
"15 300 1 2 テックキャンプ 評判\n",
"16 301 2 3 テックキャンプ 評判 悪い\n",
"17 302 2 3 テックキャンプ 評判 転職\n",
"18 303 2 3 テックキャンプ イナズマ 評判\n",
"19 304 2 3 テックキャンプ デザイナー 評判\n",
"20 305 2 3 テックキャンプ 名古屋 評判\n",
"21 306 2 3 テックキャンプ プログラミング教養 評判\n",
"22 307 2 3 テックキャンプ オンライン 評判\n",
"23 308 2 3 テックキャンプ 梅田 評判\n",
"24 309 2 3 テックキャンプ 卒業生 評判\n",
"25 400 1 2 テックキャンプ まこなり\n",
"26 401 2 3 テックキャンプ まこなり 炎上\n",
"27 402 2 2 テックキャンプ まこなり社長\n",
"28 403 2 3 マコなり社長 テックキャンプ 評判\n",
"29 404 2 3 マコなり社長 テックキャンプ 値段\n",
".. ... .. ... ...\n",
"748 71200 1 2 テックキャンプ 2週間前\n",
"749 71300 1 2 テックキャンプ 2ヶ月\n",
"750 71400 1 2 テックキャンプ 30代\n",
"751 71500 1 2 テックキャンプ 35歳\n",
"752 71600 1 2 テックキャンプ 30代後半\n",
"753 71700 1 2 テックキャンプ 3ヶ月\n",
"754 71800 1 2 テックキャンプ 36歳\n",
"755 71900 1 2 テックキャンプ 30歳\n",
"756 72000 1 3 テックキャンプ 転職 30代\n",
"757 72100 1 2 テックキャンプ 40代\n",
"758 72200 1 2 テックキャンプ 45歳\n",
"759 72300 1 2 テックキャンプ 4\n",
"760 72400 1 2 テックキャンプ 50代\n",
"761 72500 1 2 テックキャンプ 5ch\n",
"762 72600 1 2 テックキャンプ 5ちゃんねる\n",
"763 72700 1 2 テックキャンプ 5万円\n",
"764 72800 1 2 テックキャンプ 600時間\n",
"765 72900 1 2 テックキャンプ 7つの闇\n",
"766 73000 1 2 テックキャンプ 7日間\n",
"767 73100 1 2 テックキャンプ 70万\n",
"768 73200 1 2 テックキャンプ 83期\n",
"769 73300 1 2 テックキャンプ 80期\n",
"770 73400 1 2 テックキャンプ 81期\n",
"771 73500 1 2 テックキャンプ 8\n",
"772 73600 1 3 テックキャンプ チーム開発 80期\n",
"773 73700 1 2 テックキャンプ 99\n",
"774 73800 1 2 テックキャンプ 99期\n",
"775 73900 1 2 テックキャンプ 95期\n",
"776 74000 1 2 テックキャンプ 98期\n",
"777 74100 1 2 テックキャンプ 96期\n",
"\n",
"[778 rows x 4 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#関連キーワードツールで作成したCSVを読み込む\n",
"#https://related-keywords.com/\n",
"#csvファイルを読み込む\n",
"df = pd.read_csv(\"rakkokeyword_202142475859.csv\", sep='\\t')\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ネットワークを作成する"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#キーワードの一覧を取得する\n",
"keywords = df[\"キーワード\"].values.tolist()\n",
"\n",
"keyword_list = []\n",
"for i in keywords:\n",
" w =i.split(\" \")\n",
" keyword_list.append(w)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pyvis\n",
"# ネットワーク描画用の関数(可視化のために使用する)\n",
"#参考記事を例に作成\n",
"#[自然言語処理/NLP] pyvisライブラリを使って共起ネットワークを簡単に描画してみる(SageMaker使用)\n",
"#https://dev.classmethod.jp/articles/mrmo-20190930/\n",
"\n",
"def kyoki_word_network(limit = 2000):\n",
" from pyvis.network import Network\n",
" import pandas as pd\n",
"\n",
" got_net = Network(height=\"1000px\", width=\"95%\", bgcolor=\"#FFFFFF\", font_color=\"black\", notebook=True)\n",
"\n",
" # set the physics layout of the network\n",
" got_net.force_atlas_2based()\n",
" got_data = pd.read_csv(\"kyoki.csv\")[:limit]\n",
"\n",
" sources = got_data['first']#count\n",
" targets = got_data['second']#first\n",
" weights = got_data['count']#second\n",
"\n",
" edge_data = zip(sources, targets, weights)\n",
"\n",
" for e in edge_data:\n",
" src = e[0]\n",
" dst = e[1]\n",
" w = e[2]\n",
"\n",
" got_net.add_node(src, src, title=src)\n",
" got_net.add_node(dst, dst, title=dst)\n",
" got_net.add_edge(src, dst, value=w)\n",
"\n",
" neighbor_map = got_net.get_adj_list()\n",
"\n",
" # add neighbor data to node hover data\n",
" for node in got_net.nodes:\n",
" node[\"title\"] += \" Neighbors:<br>\" + \"<br>\".join(neighbor_map[node[\"id\"]])\n",
" node[\"value\"] = len(neighbor_map[node[\"id\"]])\n",
"\n",
"\n",
" got_net.show_buttons(filter_=['physics'])\n",
" return got_net\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>first</th>\n",
" <th>second</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>テックキャンプ</td>\n",
" <td>評判</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>26</td>\n",
" <td>テックキャンプ</td>\n",
" <td>料金</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13</td>\n",
" <td>テックキャンプ</td>\n",
" <td>転職</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12</td>\n",
" <td>エンジニア転職</td>\n",
" <td>テックキャンプ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10</td>\n",
" <td>イナズマ</td>\n",
" <td>テックキャンプ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>8</td>\n",
" <td>まこなり</td>\n",
" <td>テックキャンプ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8</td>\n",
" <td>テックキャンプ</td>\n",
" <td>炎上</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>テックキャンプ</td>\n",
" <td>デザイナー</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>7</td>\n",
" <td>テックキャンプ</td>\n",
" <td>就職先</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>7</td>\n",
" <td>テックキャンプ</td>\n",
" <td>プログラミング教養</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>6</td>\n",
" <td>テックキャンプ</td>\n",
" <td>転職できない</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>6</td>\n",
" <td>オンライン</td>\n",
" <td>テックキャンプ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>6</td>\n",
" <td>テックキャンプ</td>\n",
" <td>卒業生</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>6</td>\n",
" <td>cm</td>\n",
" <td>テックキャンプ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>6</td>\n",
" <td>テックキャンプ</td>\n",
" <td>言語</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count first second\n",
"0 30 テックキャンプ 評判\n",
"1 26 テックキャンプ 料金\n",
"2 13 テックキャンプ 転職\n",
"3 12 エンジニア転職 テックキャンプ\n",
"4 10 イナズマ テックキャンプ\n",
"5 8 まこなり テックキャンプ\n",
"6 8 テックキャンプ 炎上\n",
"7 8 テックキャンプ デザイナー\n",
"8 7 テックキャンプ 就職先\n",
"9 7 テックキャンプ プログラミング教養\n",
"10 6 テックキャンプ 転職できない\n",
"11 6 オンライン テックキャンプ\n",
"12 6 テックキャンプ 卒業生\n",
"13 6 cm テックキャンプ\n",
"14 6 テックキャンプ 言語"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import itertools\n",
"import collections\n",
"\n",
"\n",
"#2語の組み合わせを作る\n",
"sentences = keyword_list\n",
"sentence_combinations = [list(itertools.combinations(sentence, 2)) for sentence in sentences]\n",
"sentence_combinations = [[tuple(sorted(words)) for words in sentence] for sentence in sentence_combinations]\n",
"target_combinations = []\n",
"for sentence in sentence_combinations:\n",
" target_combinations.extend(sentence)\n",
" \n",
"\n",
"ct = collections.Counter(target_combinations)\n",
"common_list = ct.most_common()\n",
"\n",
"\n",
"\n",
"df = pd.DataFrame([{'first' : i[0][0], 'second' : i[0][1], 'count' : i[1]} for i in common_list])\n",
"df.to_csv('kyoki.csv', index=False)\n",
"df.head(15)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('テックキャンプ', 470),\n",
" ('評判', 13),\n",
" ('料金', 13),\n",
" ('エンジニア転職', 6),\n",
" ('イナズマ', 6),\n",
" ('オンライン', 4),\n",
" ('cm', 4),\n",
" ('デザイナー', 4),\n",
" ('転職', 4),\n",
" ('うざい', 3)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import collections \n",
"#単語単位でカウントする(各キーワード毎に、関連キーワードを持っている個数を求める)\n",
"text_list = df[\"first\"].values.tolist()\n",
"text_list.extend(df[\"second\"].values.tolist())\n",
"text_list = collections.Counter(text_list)\n",
"text_list = text_list.most_common()\n",
"text_list[0:10]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <iframe\n",
" width=\"95%\"\n",
" height=\"1000px\"\n",
" src=\"kyoki.html\"\n",
" frameborder=\"0\"\n",
" allowfullscreen\n",
" ></iframe>\n",
" "
],
"text/plain": [
"<IPython.lib.display.IFrame at 0x10a307748>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#pyvisを使って共起語のネットワークを作成する\n",
"got_net = kyoki_word_network()\n",
"got_net.show(\"kyoki.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment