-
-
Save seibe2/74e47ede37e312f9ee200213118820e9 to your computer and use it in GitHub Desktop.
トゥート分析2021年
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 549, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# import\n", | |
"\n", | |
"import pandas as pd\n", | |
"import re\n", | |
"import matplotlib\n", | |
"import matplotlib.pyplot as plt\n", | |
"from IPython.display import display\n", | |
"from IPython.core.interactiveshell import InteractiveShell\n", | |
"InteractiveShell.ast_node_interactivity = \"all\"\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 550, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "<Figure size 640x480 with 0 Axes>" | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# 図の表示設定\n", | |
"plt.style.use('default')\n", | |
"fig = plt.figure()\n", | |
"fig.patch.set_alpha(0)" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 551, | |
"outputs": [], | |
"source": [ | |
"def conditional_freq_series(data_to_be_analyzed, filter_of_data, element_regexp):\n", | |
" \"\"\"\n", | |
" ある条件下でのある正規表現の度数を出す\n", | |
" :param data_to_be_analyzed:\n", | |
" :param filter_of_data:\n", | |
" :param element_regexp:\n", | |
" :return:\n", | |
" \"\"\"\n", | |
" filtered = data_to_be_analyzed[filter_of_data]\n", | |
" column_name = \"counts\"\n", | |
" regexp_with_column_name = f\"(?P<{column_name}>{element_regexp})\"\n", | |
" extracted = filtered.str.extractall(regexp_with_column_name)\n", | |
" return extracted[column_name].value_counts()\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 552, | |
"outputs": [], | |
"source": [ | |
"def sanitize(toots, my_name):\n", | |
" \"\"\"\n", | |
" sanitize toots dataframe for data analysis\n", | |
" :param my_name:\n", | |
" :param toots:\n", | |
" :return:\n", | |
" \"\"\"\n", | |
" # null取り\n", | |
" toots = toots.fillna(\"\")\n", | |
"\n", | |
" # HTMLタグ外し\n", | |
" toots['content'] = toots['content'].str.replace(r\"<[^>]*?>\", \"\", regex=True)\n", | |
"\n", | |
" # 自身のトゥートだけ\n", | |
" toots = toots[toots[\"name\"] == my_name]\n", | |
"\n", | |
" return toots\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 553, | |
"outputs": [], | |
"source": [ | |
"def filter_by_year(toots, year: str):\n", | |
" \"\"\"\n", | |
" 年ごとでフィルター\n", | |
" :param toots:\n", | |
" :param year:\n", | |
" :return:\n", | |
" \"\"\"\n", | |
" start_year = f\"{year}-01-01\"\n", | |
" end_year = f\"{year}-12-31\"\n", | |
" toots_datetime = toots[\"datetime\"]\n", | |
" toots_year_filter = (start_year <= toots_datetime) & (toots_datetime <= end_year)\n", | |
" return toots[toots_year_filter]\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 554, | |
"outputs": [], | |
"source": [ | |
"id_regex = r\"@[A-Za-z0-9._@]+\"" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 555, | |
"outputs": [], | |
"source": [ | |
"def make_empty_reply_ranking(toots):\n", | |
" \"\"\"\n", | |
" 空リプライランキング\n", | |
" :param toots:\n", | |
" :return:\n", | |
" \"\"\"\n", | |
" id_regex = r\"@[A-Za-z0-9._@]+\"\n", | |
" toots_id_deleted = toots[\"content\"].str.replace(id_regex, \"\", regex=True)\n", | |
" toots_empty_reply_filter = toots_id_deleted.str.contains(\"[^ ]\") == False\n", | |
" toots_empty_reply_ranking = conditional_freq_series(toots['content'], toots_empty_reply_filter,\n", | |
" id_regex)\n", | |
" return toots_empty_reply_ranking\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 556, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": " id \\\n0 https://handon.club/users/seibe/statuses/512 \n1 https://handon.club/users/seibe/statuses/514 \n2 https://handon.club/users/seibe/statuses/515 \n3 https://handon.club/users/seibe/statuses/518 \n4 https://handon.club/users/seibe/statuses/519 \n... ... \n141121 https://handon.club/users/seibe/statuses/10741... \n141122 https://handon.club/users/seibe/statuses/10741... \n141123 https://handon.club/users/seibe/statuses/10741... \n141124 https://handon.club/users/seibe/statuses/10741... \n141125 https://handon.club/users/seibe/statuses/10741... \n\n datetime name \\\n0 2017-04-16 12:25:41+09 せいべ \n1 2017-04-16 12:27:06+09 せいべ \n2 2017-04-16 12:27:35+09 せいべ \n3 2017-04-16 12:33:01+09 せいべ \n4 2017-04-16 12:33:16+09 せいべ \n... ... ... \n141121 2021-12-08 19:00:55+09 せいべ \n141122 2021-12-08 20:05:33+09 せいべ \n141123 2021-12-08 21:00:19+09 せいべ \n141124 2021-12-08 21:09:21+09 せいべ \n141125 2021-12-08 21:43:51+09 せいべ \n\n content \n0 <p>haaaaaaaaaan</p> \n1 <p>未収載</p> \n2 <p>未収載はローカルタイムラインにも乗らないのか</p> \n3 <p><span class=\"h-card\"><a href=\"https://hando... \n4 <p>空リプでけへんの</p> \n... ... \n141121 <p><span class=\"h-card\"><a href=\"https://hando... \n141122 <p>たはお4944</p> \n141123 <p>あのひとエオルゼアも破壊してる…</p> \n141124 <p>なんとかかんとかグラコロ!</p> \n141125 <p>箱根</p> \n\n[141126 rows x 4 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>https://handon.club/users/seibe/statuses/512</td>\n <td>2017-04-16 12:25:41+09</td>\n <td>せいべ</td>\n <td><p>haaaaaaaaaan</p></td>\n </tr>\n <tr>\n <th>1</th>\n <td>https://handon.club/users/seibe/statuses/514</td>\n <td>2017-04-16 12:27:06+09</td>\n <td>せいべ</td>\n <td><p>未収載</p></td>\n </tr>\n <tr>\n <th>2</th>\n <td>https://handon.club/users/seibe/statuses/515</td>\n <td>2017-04-16 12:27:35+09</td>\n <td>せいべ</td>\n <td><p>未収載はローカルタイムラインにも乗らないのか</p></td>\n </tr>\n <tr>\n <th>3</th>\n <td>https://handon.club/users/seibe/statuses/518</td>\n <td>2017-04-16 12:33:01+09</td>\n <td>せいべ</td>\n <td><p><span class=\"h-card\"><a href=\"https://hando...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>https://handon.club/users/seibe/statuses/519</td>\n <td>2017-04-16 12:33:16+09</td>\n <td>せいべ</td>\n <td><p>空リプでけへんの</p></td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>141121</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 19:00:55+09</td>\n <td>せいべ</td>\n <td><p><span class=\"h-card\"><a href=\"https://hando...</td>\n </tr>\n <tr>\n <th>141122</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 20:05:33+09</td>\n <td>せいべ</td>\n <td><p>たはお4944</p></td>\n </tr>\n <tr>\n <th>141123</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:00:19+09</td>\n <td>せいべ</td>\n <td><p>あのひとエオルゼアも破壊してる…</p></td>\n </tr>\n <tr>\n <th>141124</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:09:21+09</td>\n <td>せいべ</td>\n <td><p>なんとかかんとかグラコロ!</p></td>\n </tr>\n <tr>\n <th>141125</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:43:51+09</td>\n <td>せいべ</td>\n <td><p>箱根</p></td>\n </tr>\n </tbody>\n</table>\n<p>141126 rows × 4 columns</p>\n</div>" | |
}, | |
"execution_count": 556, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"toots = pd.read_csv('./data/user_9615_note.csv')\n", | |
"toots" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 557, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": " id \\\n0 https://handon.club/users/seibe/statuses/512 \n1 https://handon.club/users/seibe/statuses/514 \n2 https://handon.club/users/seibe/statuses/515 \n3 https://handon.club/users/seibe/statuses/518 \n4 https://handon.club/users/seibe/statuses/519 \n... ... \n141121 https://handon.club/users/seibe/statuses/10741... \n141122 https://handon.club/users/seibe/statuses/10741... \n141123 https://handon.club/users/seibe/statuses/10741... \n141124 https://handon.club/users/seibe/statuses/10741... \n141125 https://handon.club/users/seibe/statuses/10741... \n\n datetime name content \n0 2017-04-16 12:25:41+09 せいべ haaaaaaaaaan \n1 2017-04-16 12:27:06+09 せいべ 未収載 \n2 2017-04-16 12:27:35+09 せいべ 未収載はローカルタイムラインにも乗らないのか \n3 2017-04-16 12:33:01+09 せいべ @komog \n4 2017-04-16 12:33:16+09 せいべ 空リプでけへんの \n... ... ... ... \n141121 2021-12-08 19:00:55+09 せいべ @S_iRe_N \n141122 2021-12-08 20:05:33+09 せいべ たはお4944 \n141123 2021-12-08 21:00:19+09 せいべ あのひとエオルゼアも破壊してる… \n141124 2021-12-08 21:09:21+09 せいべ なんとかかんとかグラコロ! \n141125 2021-12-08 21:43:51+09 せいべ 箱根 \n\n[141126 rows x 4 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>https://handon.club/users/seibe/statuses/512</td>\n <td>2017-04-16 12:25:41+09</td>\n <td>せいべ</td>\n <td>haaaaaaaaaan</td>\n </tr>\n <tr>\n <th>1</th>\n <td>https://handon.club/users/seibe/statuses/514</td>\n <td>2017-04-16 12:27:06+09</td>\n <td>せいべ</td>\n <td>未収載</td>\n </tr>\n <tr>\n <th>2</th>\n <td>https://handon.club/users/seibe/statuses/515</td>\n <td>2017-04-16 12:27:35+09</td>\n <td>せいべ</td>\n <td>未収載はローカルタイムラインにも乗らないのか</td>\n </tr>\n <tr>\n <th>3</th>\n <td>https://handon.club/users/seibe/statuses/518</td>\n <td>2017-04-16 12:33:01+09</td>\n <td>せいべ</td>\n <td>@komog</td>\n </tr>\n <tr>\n <th>4</th>\n <td>https://handon.club/users/seibe/statuses/519</td>\n <td>2017-04-16 12:33:16+09</td>\n <td>せいべ</td>\n <td>空リプでけへんの</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>141121</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 19:00:55+09</td>\n <td>せいべ</td>\n <td>@S_iRe_N</td>\n </tr>\n <tr>\n <th>141122</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 20:05:33+09</td>\n <td>せいべ</td>\n <td>たはお4944</td>\n </tr>\n <tr>\n <th>141123</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:00:19+09</td>\n <td>せいべ</td>\n <td>あのひとエオルゼアも破壊してる…</td>\n </tr>\n <tr>\n <th>141124</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:09:21+09</td>\n <td>せいべ</td>\n <td>なんとかかんとかグラコロ!</td>\n </tr>\n <tr>\n <th>141125</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:43:51+09</td>\n <td>せいべ</td>\n <td>箱根</td>\n </tr>\n </tbody>\n</table>\n<p>141126 rows × 4 columns</p>\n</div>" | |
}, | |
"execution_count": 557, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#sanitize\n", | |
"my_name = \"せいべ\"\n", | |
"toots = sanitize(toots, my_name)\n", | |
"toots" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 558, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": " id \\\n59308 https://handon.club/users/seibe/statuses/10340... \n59309 https://handon.club/users/seibe/statuses/10340... \n59310 https://handon.club/users/seibe/statuses/10340... \n59311 https://handon.club/users/seibe/statuses/10340... \n59312 https://handon.club/users/seibe/statuses/10340... \n... ... \n107576 https://handon.club/users/seibe/statuses/10546... \n107577 https://handon.club/users/seibe/statuses/10546... \n107578 https://handon.club/users/seibe/statuses/10546... \n107579 https://handon.club/users/seibe/statuses/10546... \n107580 https://handon.club/users/seibe/statuses/10546... \n\n datetime name \\\n59308 2020-01-01 08:22:10+09 せいべ \n59309 2020-01-01 08:23:00+09 せいべ \n59310 2020-01-01 08:23:25+09 せいべ \n59311 2020-01-01 08:56:08+09 せいべ \n59312 2020-01-01 08:56:24+09 せいべ \n... ... ... \n107576 2020-12-30 23:34:09+09 せいべ \n107577 2020-12-30 23:35:00+09 せいべ \n107578 2020-12-30 23:42:34+09 せいべ \n107579 2020-12-30 23:48:05+09 せいべ \n107580 2020-12-30 23:49:05+09 せいべ \n\n content \n59308 うあああああけおめ!! \n59309 2020年のあなたの運勢は大吉です!今年は楽しいはんどんライフが送れるでしょう!#どんみくじ... \n59310 ナイスあけおめおハンバーグ! \n59311 うおおおお \n59312 unix! \n... ... \n107576 これホラーゲームなのか・・ \n107577 見えてるw \n107578 クソって言わないって言った直後にクソっていうの草 \n107579 500円なら寝る前に一言書き置きしたいときとかダイイングメッセージ残しておきたい時用にいたる... \n107580 これのことです \n\n[48273 rows x 4 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>59308</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:22:10+09</td>\n <td>せいべ</td>\n <td>うあああああけおめ!!</td>\n </tr>\n <tr>\n <th>59309</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:23:00+09</td>\n <td>せいべ</td>\n <td>2020年のあなたの運勢は大吉です!今年は楽しいはんどんライフが送れるでしょう!#どんみくじ...</td>\n </tr>\n <tr>\n <th>59310</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:23:25+09</td>\n <td>せいべ</td>\n <td>ナイスあけおめおハンバーグ!</td>\n </tr>\n <tr>\n <th>59311</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:56:08+09</td>\n <td>せいべ</td>\n <td>うおおおお</td>\n </tr>\n <tr>\n <th>59312</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:56:24+09</td>\n <td>せいべ</td>\n <td>unix!</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>107576</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:34:09+09</td>\n <td>せいべ</td>\n <td>これホラーゲームなのか・・</td>\n </tr>\n <tr>\n <th>107577</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:35:00+09</td>\n <td>せいべ</td>\n <td>見えてるw</td>\n </tr>\n <tr>\n <th>107578</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:42:34+09</td>\n <td>せいべ</td>\n <td>クソって言わないって言った直後にクソっていうの草</td>\n </tr>\n <tr>\n <th>107579</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:48:05+09</td>\n <td>せいべ</td>\n <td>500円なら寝る前に一言書き置きしたいときとかダイイングメッセージ残しておきたい時用にいたる...</td>\n </tr>\n <tr>\n <th>107580</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:49:05+09</td>\n <td>せいべ</td>\n <td>これのことです</td>\n </tr>\n </tbody>\n</table>\n<p>48273 rows × 4 columns</p>\n</div>" | |
}, | |
"execution_count": 558, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"text/plain": " id \\\n107825 https://handon.club/users/seibe/statuses/10547... \n107826 https://handon.club/users/seibe/statuses/10547... \n107827 https://handon.club/users/seibe/statuses/10547... \n107828 https://handon.club/users/seibe/statuses/10547... \n107829 https://handon.club/users/seibe/statuses/10547... \n... ... \n141121 https://handon.club/users/seibe/statuses/10741... \n141122 https://handon.club/users/seibe/statuses/10741... \n141123 https://handon.club/users/seibe/statuses/10741... \n141124 https://handon.club/users/seibe/statuses/10741... \n141125 https://handon.club/users/seibe/statuses/10741... \n\n datetime name content \n107825 2021-01-01 03:55:16+09 せいべ 朝 \n107826 2021-01-01 04:22:46+09 せいべ せんべいではないです \n107827 2021-01-01 04:27:43+09 せいべ お年玉どこ \n107828 2021-01-01 04:27:53+09 せいべ あ、せんべいではないです \n107829 2021-01-01 04:36:03+09 せいべ @uzuky \n... ... ... ... \n141121 2021-12-08 19:00:55+09 せいべ @S_iRe_N \n141122 2021-12-08 20:05:33+09 せいべ たはお4944 \n141123 2021-12-08 21:00:19+09 せいべ あのひとエオルゼアも破壊してる… \n141124 2021-12-08 21:09:21+09 せいべ なんとかかんとかグラコロ! \n141125 2021-12-08 21:43:51+09 せいべ 箱根 \n\n[33301 rows x 4 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>107825</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 03:55:16+09</td>\n <td>せいべ</td>\n <td>朝</td>\n </tr>\n <tr>\n <th>107826</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:22:46+09</td>\n <td>せいべ</td>\n <td>せんべいではないです</td>\n </tr>\n <tr>\n <th>107827</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:27:43+09</td>\n <td>せいべ</td>\n <td>お年玉どこ</td>\n </tr>\n <tr>\n <th>107828</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:27:53+09</td>\n <td>せいべ</td>\n <td>あ、せんべいではないです</td>\n </tr>\n <tr>\n <th>107829</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:36:03+09</td>\n <td>せいべ</td>\n <td>@uzuky</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>141121</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 19:00:55+09</td>\n <td>せいべ</td>\n <td>@S_iRe_N</td>\n </tr>\n <tr>\n <th>141122</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 20:05:33+09</td>\n <td>せいべ</td>\n <td>たはお4944</td>\n </tr>\n <tr>\n <th>141123</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:00:19+09</td>\n <td>せいべ</td>\n <td>あのひとエオルゼアも破壊してる…</td>\n </tr>\n <tr>\n <th>141124</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:09:21+09</td>\n <td>せいべ</td>\n <td>なんとかかんとかグラコロ!</td>\n </tr>\n <tr>\n <th>141125</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:43:51+09</td>\n <td>せいべ</td>\n <td>箱根</td>\n </tr>\n </tbody>\n</table>\n<p>33301 rows × 4 columns</p>\n</div>" | |
}, | |
"execution_count": 558, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2020年と2021年の比較\n", | |
"toots_2020 = filter_by_year(toots, \"2020\")\n", | |
"toots_2021 = filter_by_year(toots, \"2021\")\n", | |
"toots_2020\n", | |
"toots_2021" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 559, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "48273" | |
}, | |
"execution_count": 559, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"text/plain": "33301" | |
}, | |
"execution_count": 559, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# toot数\n", | |
"len(toots_2020)\n", | |
"len(toots_2021)" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 560, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "<AxesSubplot:>" | |
}, | |
"execution_count": 560, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"text/plain": "<AxesSubplot:>" | |
}, | |
"execution_count": 560, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# トゥートの長さで箱ひげ図\n", | |
"# 2020\n", | |
"ax = fig.add_subplot(1, 2, 1)\n", | |
"toots_2020['content'].str.len().plot.box(ax=ax)\n", | |
"ax = fig.add_subplot(1, 2, 2)\n", | |
"toots_2021['content'].str.len().plot.box(ax=ax)\n", | |
"plt.show()\n", | |
"plt.close('all')" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 561, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "<AxesSubplot:>" | |
}, | |
"execution_count": 561, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# まとめて(うまくいかなさそう)\n", | |
"fig = plt.figure()\n", | |
"plt_box = pd.DataFrame({\n", | |
" '2020': toots_2020['content'].str.len(),\n", | |
" '2021': toots_2021['content'].str.len()\n", | |
"})\n", | |
"plt_box.plot.box(ylim=(0, 50))\n", | |
"plt.savefig('out/content_len_box', dpi=400)\n", | |
"plt.close('all')" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 562, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "count 48273.000000\nmean 17.757587\nstd 18.446089\nmin 0.000000\n25% 7.000000\n50% 13.000000\n75% 22.000000\nmax 479.000000\nName: content, dtype: float64" | |
}, | |
"execution_count": 562, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 数値情報で\n", | |
"# 2020\n", | |
"toots_2020['content'].str.len().describe()\n", | |
"\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 563, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "count 33301.000000\nmean 17.724753\nstd 19.064513\nmin 0.000000\n25% 7.000000\n50% 13.000000\n75% 22.000000\nmax 478.000000\nName: content, dtype: float64" | |
}, | |
"execution_count": 563, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2021\n", | |
"toots_2021['content'].str.len().describe()" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 564, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "7 2555\n6 2343\n10 2329\n9 2183\n5 2148\n ... \n199 1\n135 1\n148 1\n163 1\n152 1\nName: content, Length: 213, dtype: int64" | |
}, | |
"execution_count": 564, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 長さ頻度分析\n", | |
"# 2020\n", | |
"toots_2020['content'].str.len().value_counts()\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 565, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "7 1803\n10 1545\n9 1486\n6 1470\n8 1401\n ... \n265 1\n157 1\n220 1\n140 1\n176 1\nName: content, Length: 204, dtype: int64" | |
}, | |
"execution_count": 565, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2021\n", | |
"toots_2021['content'].str.len().value_counts()" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 566, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "<AxesSubplot:>" | |
}, | |
"execution_count": 566, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# areaで\n", | |
"plt_area = pd.DataFrame({\n", | |
" '2020_len': toots_2020['content'].str.len().value_counts(),\n", | |
" '2021_len': toots_2021['content'].str.len().value_counts()\n", | |
"})\n", | |
"plt_area.plot(linewidth=1)\n", | |
"plt.savefig('out/content_len_line', dpi=400)\n", | |
"plt.close('all')" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 567, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "<AxesSubplot:>" | |
}, | |
"execution_count": 567, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 50までを拡大\n", | |
"plt_area = pd.DataFrame({\n", | |
" '2020_len': toots_2020['content'].str.len().value_counts(),\n", | |
" '2021_len': toots_2021['content'].str.len().value_counts()\n", | |
"})\n", | |
"plt_area.plot(xlim=(0,50), linewidth=1)\n", | |
"plt.savefig('out/content_len_line_50', dpi=400)\n", | |
"plt.close('all')" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 568, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "@uzuky 274\n@Eucritical 221\n@rio_tc 199\n@S_iRe_N 160\n@highemerly 145\n ... \n@desk_crusher 1\n@moonpaste 1\n@pgo 1\n@maemae 1\n@monyoNERVA 1\nName: counts, Length: 91, dtype: int64" | |
}, | |
"execution_count": 568, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 空リプライライキング\n", | |
"# 2020\n", | |
"toots_2020_empty_reply = make_empty_reply_ranking(toots_2020)\n", | |
"toots_2020_empty_reply\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 569, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "@rio_tc 177\n@S_iRe_N 141\n@uzuky 121\n@y_f_ 104\n@yunoka 100\n ... \n@blindwalk 1\n@X 1\n@xenop 1\n@sysecond 1\n@kd 1\nName: counts, Length: 80, dtype: int64" | |
}, | |
"execution_count": 569, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2021\n", | |
"toots_2021_empty_reply = make_empty_reply_ranking(toots_2021)\n", | |
"toots_2021_empty_reply" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 570, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "@rio_tc 1822\n@highemerly 743\n@yuhina 638\n@Eucritical 563\n@mysterytrick 471\n ... \n@unitendon 1\n@hinoyu 1\n@nyoro 1\n@kxn4t 1\n@monyoNERVA 1\nName: counts, Length: 143, dtype: int64" | |
}, | |
"execution_count": 570, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 普通のリプライランキング\n", | |
"# 2020\n", | |
"toot_2020_all = toots_2020['content'].str.contains('')\n", | |
"toots_2020_reply = conditional_freq_series(toots_2020['content'], toot_2020_all, id_regex)\n", | |
"toots_2020_reply\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 571, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "@rio_tc 1640\n@S_iRe_N 327\n@yuhina 291\n@y_f_ 229\n@Eucritical 216\n ... \n@kuizy_net 1\n@Wakupedia 1\n@higure 1\n@ck 1\n@@@@@@@@@@@@@@@@@@@@@@@@@ 1\nName: counts, Length: 123, dtype: int64" | |
}, | |
"execution_count": 571, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2021\n", | |
"toot_2021_all = toots_2021['content'].str.contains('')\n", | |
"toots_2021_reply = conditional_freq_series(toots_2021['content'], toot_2021_all, id_regex)\n", | |
"toots_2021_reply" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 572, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": " empty normal prop\n@uzuky 274.0 383 0.715405\n@okunom 102.0 167 0.610778\n@yunoka 69.0 142 0.485915\n@y_f_ 142.0 306 0.464052\n@S_iRe_N 160.0 375 0.426667\n@Eucritical 221.0 563 0.392540\n@femm 75.0 196 0.382653\n@mysterytrick 136.0 471 0.288747\n@pom_matsu 48.0 174 0.275862\n@maemaestra 50.0 214 0.233645\n@ac_key 43.0 186 0.231183\n@highemerly 145.0 743 0.195155\n@heimusu 14.0 73 0.191781\n@kd 12.0 70 0.171429\n@zero_zaki_ghost 41.0 249 0.164659\n@u2mk 23.0 146 0.157534\n@misogi 26.0 166 0.156627\n@toshi_a 15.0 106 0.141509\n@rio_tc 199.0 1822 0.109221\n@4pk 8.0 99 0.080808\n@meliza 9.0 121 0.074380\n@hijouguchi 6.0 123 0.048780\n@LeLievre 6.0 125 0.048000\n@seibe 1.0 70 0.014286\n@desk_crusher 1.0 79 0.012658\n@suma 1.0 142 0.007042\n@yuhina 3.0 638 0.004702\n@toku2 NaN 198 NaN\n@yamatema NaN 75 NaN", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>empty</th>\n <th>normal</th>\n <th>prop</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>@uzuky</th>\n <td>274.0</td>\n <td>383</td>\n <td>0.715405</td>\n </tr>\n <tr>\n <th>@okunom</th>\n <td>102.0</td>\n <td>167</td>\n <td>0.610778</td>\n </tr>\n <tr>\n <th>@yunoka</th>\n <td>69.0</td>\n <td>142</td>\n <td>0.485915</td>\n </tr>\n <tr>\n <th>@y_f_</th>\n <td>142.0</td>\n <td>306</td>\n <td>0.464052</td>\n </tr>\n <tr>\n <th>@S_iRe_N</th>\n <td>160.0</td>\n <td>375</td>\n <td>0.426667</td>\n </tr>\n <tr>\n <th>@Eucritical</th>\n <td>221.0</td>\n <td>563</td>\n <td>0.392540</td>\n </tr>\n <tr>\n <th>@femm</th>\n <td>75.0</td>\n <td>196</td>\n <td>0.382653</td>\n </tr>\n <tr>\n <th>@mysterytrick</th>\n <td>136.0</td>\n <td>471</td>\n <td>0.288747</td>\n </tr>\n <tr>\n <th>@pom_matsu</th>\n <td>48.0</td>\n <td>174</td>\n <td>0.275862</td>\n </tr>\n <tr>\n <th>@maemaestra</th>\n <td>50.0</td>\n <td>214</td>\n <td>0.233645</td>\n </tr>\n <tr>\n <th>@ac_key</th>\n <td>43.0</td>\n <td>186</td>\n <td>0.231183</td>\n </tr>\n <tr>\n <th>@highemerly</th>\n <td>145.0</td>\n <td>743</td>\n <td>0.195155</td>\n </tr>\n <tr>\n <th>@heimusu</th>\n <td>14.0</td>\n <td>73</td>\n <td>0.191781</td>\n </tr>\n <tr>\n <th>@kd</th>\n <td>12.0</td>\n <td>70</td>\n <td>0.171429</td>\n </tr>\n <tr>\n <th>@zero_zaki_ghost</th>\n <td>41.0</td>\n <td>249</td>\n <td>0.164659</td>\n </tr>\n <tr>\n <th>@u2mk</th>\n <td>23.0</td>\n <td>146</td>\n <td>0.157534</td>\n </tr>\n <tr>\n <th>@misogi</th>\n <td>26.0</td>\n <td>166</td>\n <td>0.156627</td>\n </tr>\n <tr>\n <th>@toshi_a</th>\n <td>15.0</td>\n <td>106</td>\n <td>0.141509</td>\n </tr>\n <tr>\n <th>@rio_tc</th>\n <td>199.0</td>\n <td>1822</td>\n <td>0.109221</td>\n </tr>\n <tr>\n <th>@4pk</th>\n <td>8.0</td>\n <td>99</td>\n <td>0.080808</td>\n </tr>\n <tr>\n <th>@meliza</th>\n <td>9.0</td>\n <td>121</td>\n <td>0.074380</td>\n </tr>\n <tr>\n <th>@hijouguchi</th>\n <td>6.0</td>\n <td>123</td>\n <td>0.048780</td>\n </tr>\n <tr>\n <th>@LeLievre</th>\n <td>6.0</td>\n <td>125</td>\n <td>0.048000</td>\n </tr>\n <tr>\n <th>@seibe</th>\n <td>1.0</td>\n <td>70</td>\n <td>0.014286</td>\n </tr>\n <tr>\n <th>@desk_crusher</th>\n <td>1.0</td>\n <td>79</td>\n <td>0.012658</td>\n </tr>\n <tr>\n <th>@suma</th>\n <td>1.0</td>\n <td>142</td>\n <td>0.007042</td>\n </tr>\n <tr>\n <th>@yuhina</th>\n <td>3.0</td>\n <td>638</td>\n <td>0.004702</td>\n </tr>\n <tr>\n <th>@toku2</th>\n <td>NaN</td>\n <td>198</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>@yamatema</th>\n <td>NaN</td>\n <td>75</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"execution_count": 572, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 空リプ割合(あとで。できたら)\n", | |
"# 2020\n", | |
"toots_2020_forprop = pd.DataFrame({\n", | |
" 'empty': toots_2020_empty_reply,\n", | |
" 'normal': toots_2020_reply\n", | |
"})\n", | |
"toots_2020_forprop['prop'] = toots_2020_forprop['empty'] / toots_2020_forprop['normal']\n", | |
"toots_2020_forprop[toots_2020_forprop['normal'] > toots_2020_forprop['normal'].mean()].sort_values('prop',\n", | |
" ascending=False)" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 573, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": " empty normal prop\n@uzuky 121.0 138 0.876812\n@okunom 97.0 132 0.734848\n@inabap 46.0 74 0.621622\n@D_HELL 82.0 142 0.577465\n@femm 31.0 58 0.534483\n@yunoka 100.0 213 0.469484\n@y_f_ 104.0 229 0.454148\n@S_iRe_N 141.0 327 0.431193\n@maemaestra 68.0 202 0.336634\n@ac_key 17.0 59 0.288136\n@mysterytrick 52.0 203 0.256158\n@osoba 20.0 80 0.250000\n@zero_zaki_ghost 29.0 201 0.144279\n@rio_tc 177.0 1640 0.107927\n@toshi_a 8.0 78 0.102564\n@Eucritical 21.0 216 0.097222\n@highemerly 13.0 160 0.081250\n@4pk 4.0 62 0.064516\n@shijin 2.0 66 0.030303\n@yuhina 5.0 291 0.017182\n@Citrine 1.0 156 0.006410\n@desk_crusher NaN 80 NaN", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>empty</th>\n <th>normal</th>\n <th>prop</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>@uzuky</th>\n <td>121.0</td>\n <td>138</td>\n <td>0.876812</td>\n </tr>\n <tr>\n <th>@okunom</th>\n <td>97.0</td>\n <td>132</td>\n <td>0.734848</td>\n </tr>\n <tr>\n <th>@inabap</th>\n <td>46.0</td>\n <td>74</td>\n <td>0.621622</td>\n </tr>\n <tr>\n <th>@D_HELL</th>\n <td>82.0</td>\n <td>142</td>\n <td>0.577465</td>\n </tr>\n <tr>\n <th>@femm</th>\n <td>31.0</td>\n <td>58</td>\n <td>0.534483</td>\n </tr>\n <tr>\n <th>@yunoka</th>\n <td>100.0</td>\n <td>213</td>\n <td>0.469484</td>\n </tr>\n <tr>\n <th>@y_f_</th>\n <td>104.0</td>\n <td>229</td>\n <td>0.454148</td>\n </tr>\n <tr>\n <th>@S_iRe_N</th>\n <td>141.0</td>\n <td>327</td>\n <td>0.431193</td>\n </tr>\n <tr>\n <th>@maemaestra</th>\n <td>68.0</td>\n <td>202</td>\n <td>0.336634</td>\n </tr>\n <tr>\n <th>@ac_key</th>\n <td>17.0</td>\n <td>59</td>\n <td>0.288136</td>\n </tr>\n <tr>\n <th>@mysterytrick</th>\n <td>52.0</td>\n <td>203</td>\n <td>0.256158</td>\n </tr>\n <tr>\n <th>@osoba</th>\n <td>20.0</td>\n <td>80</td>\n <td>0.250000</td>\n </tr>\n <tr>\n <th>@zero_zaki_ghost</th>\n <td>29.0</td>\n <td>201</td>\n <td>0.144279</td>\n </tr>\n <tr>\n <th>@rio_tc</th>\n <td>177.0</td>\n <td>1640</td>\n <td>0.107927</td>\n </tr>\n <tr>\n <th>@toshi_a</th>\n <td>8.0</td>\n <td>78</td>\n <td>0.102564</td>\n </tr>\n <tr>\n <th>@Eucritical</th>\n <td>21.0</td>\n <td>216</td>\n <td>0.097222</td>\n </tr>\n <tr>\n <th>@highemerly</th>\n <td>13.0</td>\n <td>160</td>\n <td>0.081250</td>\n </tr>\n <tr>\n <th>@4pk</th>\n <td>4.0</td>\n <td>62</td>\n <td>0.064516</td>\n </tr>\n <tr>\n <th>@shijin</th>\n <td>2.0</td>\n <td>66</td>\n <td>0.030303</td>\n </tr>\n <tr>\n <th>@yuhina</th>\n <td>5.0</td>\n <td>291</td>\n <td>0.017182</td>\n </tr>\n <tr>\n <th>@Citrine</th>\n <td>1.0</td>\n <td>156</td>\n <td>0.006410</td>\n </tr>\n <tr>\n <th>@desk_crusher</th>\n <td>NaN</td>\n <td>80</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"execution_count": 573, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2021\n", | |
"toots_2021_forprop = pd.DataFrame({\n", | |
" 'empty': toots_2021_empty_reply,\n", | |
" 'normal': toots_2021_reply\n", | |
"})\n", | |
"toots_2021_forprop['prop'] = toots_2021_forprop['empty'] / toots_2021_forprop['normal']\n", | |
"toots_2021_forprop[toots_2021_forprop['normal'] > toots_2021_forprop['normal'].mean()].sort_values('prop',\n", | |
" ascending=False)" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 574, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "9772" | |
}, | |
"execution_count": 574, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"text/plain": "2452" | |
}, | |
"execution_count": 574, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"text/plain": "5956" | |
}, | |
"execution_count": 574, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"text/plain": "1533" | |
}, | |
"execution_count": 574, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# リプライ総数と空リプ総数を出す\n", | |
"# 2020\n", | |
"toots_2020_reply.sum()\n", | |
"toots_2020_empty_reply.sum()\n", | |
"# 2021\n", | |
"toots_2021_reply.sum()\n", | |
"toots_2021_empty_reply.sum()" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 575, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": ":str_iiyo: 82\n:str_uoo: 38\n:str_popopopa: 30\n:str_erait: 28\n:str_guaa: 23\n ..\n:adobe_acrobat: 1\n:autocad: 1\n:ansible: 1\n:brobsword: 1\n:str_eee: 1\nName: counts, Length: 140, dtype: int64" | |
}, | |
"execution_count": 575, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# カスタム絵文字\n", | |
"# 2020\n", | |
"toot_2020_all_filter = toots_2020['content'].str.contains('')\n", | |
"conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\":[\\d\\w]+:\")\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 576, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": ":str_iiyo: 51\n:str_popopopa: 44\n:pui: 23\n:str_senbeidehanai: 17\n:stra_fold: 17\n ..\n:str_arigato: 1\n:str_yabai: 1\n:str_wayo: 1\n:str_logbo: 1\n:str_damedayo: 1\nName: counts, Length: 105, dtype: int64" | |
}, | |
"execution_count": 576, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2021\n", | |
"toot_2021_all_filter = toots_2021['content'].str.contains('')\n", | |
"conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\":[\\d\\w]+:\")\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 577, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"593\n", | |
"359\n" | |
] | |
} | |
], | |
"source": [ | |
"# カスタム絵文字、総数\n", | |
"# 2020\n", | |
"toot_2020_all_filter = toots_2020['content'].str.contains('')\n", | |
"print(conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\":[\\d\\w]+:\").sum())\n", | |
"# 2021\n", | |
"toot_2021_all_filter = toots_2021['content'].str.contains('')\n", | |
"print(conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\":[\\d\\w]+:\").sum())\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 578, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "せんべいではないです 460\nName: counts, dtype: int64" | |
}, | |
"execution_count": 578, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# せんべいではないです\n", | |
"# 2020\n", | |
"toot_2020_all_filter = toots_2020['content'].str.contains('')\n", | |
"conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\"せんべいではないです\")\n", | |
"\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 579, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "せんべいではないです 383\nName: counts, dtype: int64" | |
}, | |
"execution_count": 579, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 2021\n", | |
"toot_2021_all_filter = toots_2021['content'].str.contains('')\n", | |
"conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\"せんべいではないです\")" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 580, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "あ 341\nえ 58\na 43\n夜 38\n、 38\n ... \n0 1\n肉 1\n∀ 1\n味 1\n外 1\nName: counts, Length: 241, dtype: int64" | |
}, | |
"execution_count": 580, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 1文字トゥート\n", | |
"# 2020\n", | |
"toot_2020_all_filter = toots_2020['content'].str.len() == 1\n", | |
"# toots_2020['content'][toot_2020_all_filter].value_counts()\n", | |
"conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\".+\")\n" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 581, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "あ 462\nえ 112\nう 51\n? 30\n昼 21\n ... \nカ 1\n水 1\nシ 1\n神 1\nコ 1\nName: counts, Length: 183, dtype: int64" | |
}, | |
"execution_count": 581, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#2021\n", | |
"toot_2021_all_filter = toots_2021['content'].str.len() == 1\n", | |
"conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\".+\")" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 581, | |
"outputs": [], | |
"source": [], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 581, | |
"outputs": [], | |
"source": [], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 581, | |
"outputs": [], | |
"source": [], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 581, | |
"outputs": [], | |
"source": [], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 581, | |
"outputs": [], | |
"source": [], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment