Skip to content

Instantly share code, notes, and snippets.

@seibe2
Created December 15, 2021 11:59
Show Gist options
  • Save seibe2/74e47ede37e312f9ee200213118820e9 to your computer and use it in GitHub Desktop.
Save seibe2/74e47ede37e312f9ee200213118820e9 to your computer and use it in GitHub Desktop.
トゥート分析2021年
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 549,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# import\n",
"\n",
"import pandas as pd\n",
"import re\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import display\n",
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = \"all\"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 550,
"outputs": [
{
"data": {
"text/plain": "<Figure size 640x480 with 0 Axes>"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 図の表示設定\n",
"plt.style.use('default')\n",
"fig = plt.figure()\n",
"fig.patch.set_alpha(0)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 551,
"outputs": [],
"source": [
"def conditional_freq_series(data_to_be_analyzed, filter_of_data, element_regexp):\n",
" \"\"\"\n",
" ある条件下でのある正規表現の度数を出す\n",
" :param data_to_be_analyzed:\n",
" :param filter_of_data:\n",
" :param element_regexp:\n",
" :return:\n",
" \"\"\"\n",
" filtered = data_to_be_analyzed[filter_of_data]\n",
" column_name = \"counts\"\n",
" regexp_with_column_name = f\"(?P<{column_name}>{element_regexp})\"\n",
" extracted = filtered.str.extractall(regexp_with_column_name)\n",
" return extracted[column_name].value_counts()\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 552,
"outputs": [],
"source": [
"def sanitize(toots, my_name):\n",
" \"\"\"\n",
" sanitize toots dataframe for data analysis\n",
" :param my_name:\n",
" :param toots:\n",
" :return:\n",
" \"\"\"\n",
" # null取り\n",
" toots = toots.fillna(\"\")\n",
"\n",
" # HTMLタグ外し\n",
" toots['content'] = toots['content'].str.replace(r\"<[^>]*?>\", \"\", regex=True)\n",
"\n",
" # 自身のトゥートだけ\n",
" toots = toots[toots[\"name\"] == my_name]\n",
"\n",
" return toots\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 553,
"outputs": [],
"source": [
"def filter_by_year(toots, year: str):\n",
" \"\"\"\n",
" 年ごとでフィルター\n",
" :param toots:\n",
" :param year:\n",
" :return:\n",
" \"\"\"\n",
" start_year = f\"{year}-01-01\"\n",
" end_year = f\"{year}-12-31\"\n",
" toots_datetime = toots[\"datetime\"]\n",
" toots_year_filter = (start_year <= toots_datetime) & (toots_datetime <= end_year)\n",
" return toots[toots_year_filter]\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 554,
"outputs": [],
"source": [
"id_regex = r\"@[A-Za-z0-9._@]+\""
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 555,
"outputs": [],
"source": [
"def make_empty_reply_ranking(toots):\n",
" \"\"\"\n",
" 空リプライランキング\n",
" :param toots:\n",
" :return:\n",
" \"\"\"\n",
" id_regex = r\"@[A-Za-z0-9._@]+\"\n",
" toots_id_deleted = toots[\"content\"].str.replace(id_regex, \"\", regex=True)\n",
" toots_empty_reply_filter = toots_id_deleted.str.contains(\"[^ ]\") == False\n",
" toots_empty_reply_ranking = conditional_freq_series(toots['content'], toots_empty_reply_filter,\n",
" id_regex)\n",
" return toots_empty_reply_ranking\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 556,
"outputs": [
{
"data": {
"text/plain": " id \\\n0 https://handon.club/users/seibe/statuses/512 \n1 https://handon.club/users/seibe/statuses/514 \n2 https://handon.club/users/seibe/statuses/515 \n3 https://handon.club/users/seibe/statuses/518 \n4 https://handon.club/users/seibe/statuses/519 \n... ... \n141121 https://handon.club/users/seibe/statuses/10741... \n141122 https://handon.club/users/seibe/statuses/10741... \n141123 https://handon.club/users/seibe/statuses/10741... \n141124 https://handon.club/users/seibe/statuses/10741... \n141125 https://handon.club/users/seibe/statuses/10741... \n\n datetime name \\\n0 2017-04-16 12:25:41+09 せいべ \n1 2017-04-16 12:27:06+09 せいべ \n2 2017-04-16 12:27:35+09 せいべ \n3 2017-04-16 12:33:01+09 せいべ \n4 2017-04-16 12:33:16+09 せいべ \n... ... ... \n141121 2021-12-08 19:00:55+09 せいべ \n141122 2021-12-08 20:05:33+09 せいべ \n141123 2021-12-08 21:00:19+09 せいべ \n141124 2021-12-08 21:09:21+09 せいべ \n141125 2021-12-08 21:43:51+09 せいべ \n\n content \n0 <p>haaaaaaaaaan</p> \n1 <p>未収載</p> \n2 <p>未収載はローカルタイムラインにも乗らないのか</p> \n3 <p><span class=\"h-card\"><a href=\"https://hando... \n4 <p>空リプでけへんの</p> \n... ... \n141121 <p><span class=\"h-card\"><a href=\"https://hando... \n141122 <p>たはお4944</p> \n141123 <p>あのひとエオルゼアも破壊してる…</p> \n141124 <p>なんとかかんとかグラコロ!</p> \n141125 <p>箱根</p> \n\n[141126 rows x 4 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>https://handon.club/users/seibe/statuses/512</td>\n <td>2017-04-16 12:25:41+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;haaaaaaaaaan&lt;/p&gt;</td>\n </tr>\n <tr>\n <th>1</th>\n <td>https://handon.club/users/seibe/statuses/514</td>\n <td>2017-04-16 12:27:06+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;未収載&lt;/p&gt;</td>\n </tr>\n <tr>\n <th>2</th>\n <td>https://handon.club/users/seibe/statuses/515</td>\n <td>2017-04-16 12:27:35+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;未収載はローカルタイムラインにも乗らないのか&lt;/p&gt;</td>\n </tr>\n <tr>\n <th>3</th>\n <td>https://handon.club/users/seibe/statuses/518</td>\n <td>2017-04-16 12:33:01+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;&lt;span class=\"h-card\"&gt;&lt;a href=\"https://hando...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>https://handon.club/users/seibe/statuses/519</td>\n <td>2017-04-16 12:33:16+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;空リプでけへんの&lt;/p&gt;</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>141121</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 19:00:55+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;&lt;span class=\"h-card\"&gt;&lt;a href=\"https://hando...</td>\n </tr>\n <tr>\n <th>141122</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 20:05:33+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;たはお4944&lt;/p&gt;</td>\n </tr>\n <tr>\n <th>141123</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:00:19+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;あのひとエオルゼアも破壊してる…&lt;/p&gt;</td>\n </tr>\n <tr>\n <th>141124</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:09:21+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;なんとかかんとかグラコロ!&lt;/p&gt;</td>\n </tr>\n <tr>\n <th>141125</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:43:51+09</td>\n <td>せいべ</td>\n <td>&lt;p&gt;箱根&lt;/p&gt;</td>\n </tr>\n </tbody>\n</table>\n<p>141126 rows × 4 columns</p>\n</div>"
},
"execution_count": 556,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toots = pd.read_csv('./data/user_9615_note.csv')\n",
"toots"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 557,
"outputs": [
{
"data": {
"text/plain": " id \\\n0 https://handon.club/users/seibe/statuses/512 \n1 https://handon.club/users/seibe/statuses/514 \n2 https://handon.club/users/seibe/statuses/515 \n3 https://handon.club/users/seibe/statuses/518 \n4 https://handon.club/users/seibe/statuses/519 \n... ... \n141121 https://handon.club/users/seibe/statuses/10741... \n141122 https://handon.club/users/seibe/statuses/10741... \n141123 https://handon.club/users/seibe/statuses/10741... \n141124 https://handon.club/users/seibe/statuses/10741... \n141125 https://handon.club/users/seibe/statuses/10741... \n\n datetime name content \n0 2017-04-16 12:25:41+09 せいべ haaaaaaaaaan \n1 2017-04-16 12:27:06+09 せいべ 未収載 \n2 2017-04-16 12:27:35+09 せいべ 未収載はローカルタイムラインにも乗らないのか \n3 2017-04-16 12:33:01+09 せいべ @komog \n4 2017-04-16 12:33:16+09 せいべ 空リプでけへんの \n... ... ... ... \n141121 2021-12-08 19:00:55+09 せいべ @S_iRe_N \n141122 2021-12-08 20:05:33+09 せいべ たはお4944 \n141123 2021-12-08 21:00:19+09 せいべ あのひとエオルゼアも破壊してる… \n141124 2021-12-08 21:09:21+09 せいべ なんとかかんとかグラコロ! \n141125 2021-12-08 21:43:51+09 せいべ 箱根 \n\n[141126 rows x 4 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>https://handon.club/users/seibe/statuses/512</td>\n <td>2017-04-16 12:25:41+09</td>\n <td>せいべ</td>\n <td>haaaaaaaaaan</td>\n </tr>\n <tr>\n <th>1</th>\n <td>https://handon.club/users/seibe/statuses/514</td>\n <td>2017-04-16 12:27:06+09</td>\n <td>せいべ</td>\n <td>未収載</td>\n </tr>\n <tr>\n <th>2</th>\n <td>https://handon.club/users/seibe/statuses/515</td>\n <td>2017-04-16 12:27:35+09</td>\n <td>せいべ</td>\n <td>未収載はローカルタイムラインにも乗らないのか</td>\n </tr>\n <tr>\n <th>3</th>\n <td>https://handon.club/users/seibe/statuses/518</td>\n <td>2017-04-16 12:33:01+09</td>\n <td>せいべ</td>\n <td>@komog</td>\n </tr>\n <tr>\n <th>4</th>\n <td>https://handon.club/users/seibe/statuses/519</td>\n <td>2017-04-16 12:33:16+09</td>\n <td>せいべ</td>\n <td>空リプでけへんの</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>141121</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 19:00:55+09</td>\n <td>せいべ</td>\n <td>@S_iRe_N</td>\n </tr>\n <tr>\n <th>141122</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 20:05:33+09</td>\n <td>せいべ</td>\n <td>たはお4944</td>\n </tr>\n <tr>\n <th>141123</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:00:19+09</td>\n <td>せいべ</td>\n <td>あのひとエオルゼアも破壊してる…</td>\n </tr>\n <tr>\n <th>141124</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:09:21+09</td>\n <td>せいべ</td>\n <td>なんとかかんとかグラコロ!</td>\n </tr>\n <tr>\n <th>141125</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:43:51+09</td>\n <td>せいべ</td>\n <td>箱根</td>\n </tr>\n </tbody>\n</table>\n<p>141126 rows × 4 columns</p>\n</div>"
},
"execution_count": 557,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#sanitize\n",
"my_name = \"せいべ\"\n",
"toots = sanitize(toots, my_name)\n",
"toots"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 558,
"outputs": [
{
"data": {
"text/plain": " id \\\n59308 https://handon.club/users/seibe/statuses/10340... \n59309 https://handon.club/users/seibe/statuses/10340... \n59310 https://handon.club/users/seibe/statuses/10340... \n59311 https://handon.club/users/seibe/statuses/10340... \n59312 https://handon.club/users/seibe/statuses/10340... \n... ... \n107576 https://handon.club/users/seibe/statuses/10546... \n107577 https://handon.club/users/seibe/statuses/10546... \n107578 https://handon.club/users/seibe/statuses/10546... \n107579 https://handon.club/users/seibe/statuses/10546... \n107580 https://handon.club/users/seibe/statuses/10546... \n\n datetime name \\\n59308 2020-01-01 08:22:10+09 せいべ \n59309 2020-01-01 08:23:00+09 せいべ \n59310 2020-01-01 08:23:25+09 せいべ \n59311 2020-01-01 08:56:08+09 せいべ \n59312 2020-01-01 08:56:24+09 せいべ \n... ... ... \n107576 2020-12-30 23:34:09+09 せいべ \n107577 2020-12-30 23:35:00+09 せいべ \n107578 2020-12-30 23:42:34+09 せいべ \n107579 2020-12-30 23:48:05+09 せいべ \n107580 2020-12-30 23:49:05+09 せいべ \n\n content \n59308 うあああああけおめ!! \n59309 2020年のあなたの運勢は大吉です!今年は楽しいはんどんライフが送れるでしょう!#どんみくじ... \n59310 ナイスあけおめおハンバーグ! \n59311 うおおおお \n59312 unix! \n... ... \n107576 これホラーゲームなのか・・ \n107577 見えてるw \n107578 クソって言わないって言った直後にクソっていうの草 \n107579 500円なら寝る前に一言書き置きしたいときとかダイイングメッセージ残しておきたい時用にいたる... \n107580 これのことです \n\n[48273 rows x 4 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>59308</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:22:10+09</td>\n <td>せいべ</td>\n <td>うあああああけおめ!!</td>\n </tr>\n <tr>\n <th>59309</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:23:00+09</td>\n <td>せいべ</td>\n <td>2020年のあなたの運勢は大吉です!今年は楽しいはんどんライフが送れるでしょう!#どんみくじ...</td>\n </tr>\n <tr>\n <th>59310</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:23:25+09</td>\n <td>せいべ</td>\n <td>ナイスあけおめおハンバーグ!</td>\n </tr>\n <tr>\n <th>59311</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:56:08+09</td>\n <td>せいべ</td>\n <td>うおおおお</td>\n </tr>\n <tr>\n <th>59312</th>\n <td>https://handon.club/users/seibe/statuses/10340...</td>\n <td>2020-01-01 08:56:24+09</td>\n <td>せいべ</td>\n <td>unix!</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>107576</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:34:09+09</td>\n <td>せいべ</td>\n <td>これホラーゲームなのか・・</td>\n </tr>\n <tr>\n <th>107577</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:35:00+09</td>\n <td>せいべ</td>\n <td>見えてるw</td>\n </tr>\n <tr>\n <th>107578</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:42:34+09</td>\n <td>せいべ</td>\n <td>クソって言わないって言った直後にクソっていうの草</td>\n </tr>\n <tr>\n <th>107579</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:48:05+09</td>\n <td>せいべ</td>\n <td>500円なら寝る前に一言書き置きしたいときとかダイイングメッセージ残しておきたい時用にいたる...</td>\n </tr>\n <tr>\n <th>107580</th>\n <td>https://handon.club/users/seibe/statuses/10546...</td>\n <td>2020-12-30 23:49:05+09</td>\n <td>せいべ</td>\n <td>これのことです</td>\n </tr>\n </tbody>\n</table>\n<p>48273 rows × 4 columns</p>\n</div>"
},
"execution_count": 558,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": " id \\\n107825 https://handon.club/users/seibe/statuses/10547... \n107826 https://handon.club/users/seibe/statuses/10547... \n107827 https://handon.club/users/seibe/statuses/10547... \n107828 https://handon.club/users/seibe/statuses/10547... \n107829 https://handon.club/users/seibe/statuses/10547... \n... ... \n141121 https://handon.club/users/seibe/statuses/10741... \n141122 https://handon.club/users/seibe/statuses/10741... \n141123 https://handon.club/users/seibe/statuses/10741... \n141124 https://handon.club/users/seibe/statuses/10741... \n141125 https://handon.club/users/seibe/statuses/10741... \n\n datetime name content \n107825 2021-01-01 03:55:16+09 せいべ 朝 \n107826 2021-01-01 04:22:46+09 せいべ せんべいではないです \n107827 2021-01-01 04:27:43+09 せいべ お年玉どこ \n107828 2021-01-01 04:27:53+09 せいべ あ、せんべいではないです \n107829 2021-01-01 04:36:03+09 せいべ @uzuky \n... ... ... ... \n141121 2021-12-08 19:00:55+09 せいべ @S_iRe_N \n141122 2021-12-08 20:05:33+09 せいべ たはお4944 \n141123 2021-12-08 21:00:19+09 せいべ あのひとエオルゼアも破壊してる… \n141124 2021-12-08 21:09:21+09 せいべ なんとかかんとかグラコロ! \n141125 2021-12-08 21:43:51+09 せいべ 箱根 \n\n[33301 rows x 4 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>datetime</th>\n <th>name</th>\n <th>content</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>107825</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 03:55:16+09</td>\n <td>せいべ</td>\n <td>朝</td>\n </tr>\n <tr>\n <th>107826</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:22:46+09</td>\n <td>せいべ</td>\n <td>せんべいではないです</td>\n </tr>\n <tr>\n <th>107827</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:27:43+09</td>\n <td>せいべ</td>\n <td>お年玉どこ</td>\n </tr>\n <tr>\n <th>107828</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:27:53+09</td>\n <td>せいべ</td>\n <td>あ、せんべいではないです</td>\n </tr>\n <tr>\n <th>107829</th>\n <td>https://handon.club/users/seibe/statuses/10547...</td>\n <td>2021-01-01 04:36:03+09</td>\n <td>せいべ</td>\n <td>@uzuky</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>141121</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 19:00:55+09</td>\n <td>せいべ</td>\n <td>@S_iRe_N</td>\n </tr>\n <tr>\n <th>141122</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 20:05:33+09</td>\n <td>せいべ</td>\n <td>たはお4944</td>\n </tr>\n <tr>\n <th>141123</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:00:19+09</td>\n <td>せいべ</td>\n <td>あのひとエオルゼアも破壊してる…</td>\n </tr>\n <tr>\n <th>141124</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:09:21+09</td>\n <td>せいべ</td>\n <td>なんとかかんとかグラコロ!</td>\n </tr>\n <tr>\n <th>141125</th>\n <td>https://handon.club/users/seibe/statuses/10741...</td>\n <td>2021-12-08 21:43:51+09</td>\n <td>せいべ</td>\n <td>箱根</td>\n </tr>\n </tbody>\n</table>\n<p>33301 rows × 4 columns</p>\n</div>"
},
"execution_count": 558,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2020年と2021年の比較\n",
"toots_2020 = filter_by_year(toots, \"2020\")\n",
"toots_2021 = filter_by_year(toots, \"2021\")\n",
"toots_2020\n",
"toots_2021"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 559,
"outputs": [
{
"data": {
"text/plain": "48273"
},
"execution_count": 559,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": "33301"
},
"execution_count": 559,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# toot数\n",
"len(toots_2020)\n",
"len(toots_2021)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 560,
"outputs": [
{
"data": {
"text/plain": "<AxesSubplot:>"
},
"execution_count": 560,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": "<AxesSubplot:>"
},
"execution_count": 560,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# トゥートの長さで箱ひげ図\n",
"# 2020\n",
"ax = fig.add_subplot(1, 2, 1)\n",
"toots_2020['content'].str.len().plot.box(ax=ax)\n",
"ax = fig.add_subplot(1, 2, 2)\n",
"toots_2021['content'].str.len().plot.box(ax=ax)\n",
"plt.show()\n",
"plt.close('all')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 561,
"outputs": [
{
"data": {
"text/plain": "<AxesSubplot:>"
},
"execution_count": 561,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# まとめて(うまくいかなさそう)\n",
"fig = plt.figure()\n",
"plt_box = pd.DataFrame({\n",
" '2020': toots_2020['content'].str.len(),\n",
" '2021': toots_2021['content'].str.len()\n",
"})\n",
"plt_box.plot.box(ylim=(0, 50))\n",
"plt.savefig('out/content_len_box', dpi=400)\n",
"plt.close('all')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 562,
"outputs": [
{
"data": {
"text/plain": "count 48273.000000\nmean 17.757587\nstd 18.446089\nmin 0.000000\n25% 7.000000\n50% 13.000000\n75% 22.000000\nmax 479.000000\nName: content, dtype: float64"
},
"execution_count": 562,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 数値情報で\n",
"# 2020\n",
"toots_2020['content'].str.len().describe()\n",
"\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 563,
"outputs": [
{
"data": {
"text/plain": "count 33301.000000\nmean 17.724753\nstd 19.064513\nmin 0.000000\n25% 7.000000\n50% 13.000000\n75% 22.000000\nmax 478.000000\nName: content, dtype: float64"
},
"execution_count": 563,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2021\n",
"toots_2021['content'].str.len().describe()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 564,
"outputs": [
{
"data": {
"text/plain": "7 2555\n6 2343\n10 2329\n9 2183\n5 2148\n ... \n199 1\n135 1\n148 1\n163 1\n152 1\nName: content, Length: 213, dtype: int64"
},
"execution_count": 564,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 長さ頻度分析\n",
"# 2020\n",
"toots_2020['content'].str.len().value_counts()\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 565,
"outputs": [
{
"data": {
"text/plain": "7 1803\n10 1545\n9 1486\n6 1470\n8 1401\n ... \n265 1\n157 1\n220 1\n140 1\n176 1\nName: content, Length: 204, dtype: int64"
},
"execution_count": 565,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2021\n",
"toots_2021['content'].str.len().value_counts()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 566,
"outputs": [
{
"data": {
"text/plain": "<AxesSubplot:>"
},
"execution_count": 566,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# areaで\n",
"plt_area = pd.DataFrame({\n",
" '2020_len': toots_2020['content'].str.len().value_counts(),\n",
" '2021_len': toots_2021['content'].str.len().value_counts()\n",
"})\n",
"plt_area.plot(linewidth=1)\n",
"plt.savefig('out/content_len_line', dpi=400)\n",
"plt.close('all')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 567,
"outputs": [
{
"data": {
"text/plain": "<AxesSubplot:>"
},
"execution_count": 567,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 50までを拡大\n",
"plt_area = pd.DataFrame({\n",
" '2020_len': toots_2020['content'].str.len().value_counts(),\n",
" '2021_len': toots_2021['content'].str.len().value_counts()\n",
"})\n",
"plt_area.plot(xlim=(0,50), linewidth=1)\n",
"plt.savefig('out/content_len_line_50', dpi=400)\n",
"plt.close('all')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 568,
"outputs": [
{
"data": {
"text/plain": "@uzuky 274\n@Eucritical 221\n@rio_tc 199\n@S_iRe_N 160\n@highemerly 145\n ... \n@desk_crusher 1\n@moonpaste 1\n@pgo 1\n@maemae 1\n@monyoNERVA 1\nName: counts, Length: 91, dtype: int64"
},
"execution_count": 568,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 空リプライライキング\n",
"# 2020\n",
"toots_2020_empty_reply = make_empty_reply_ranking(toots_2020)\n",
"toots_2020_empty_reply\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 569,
"outputs": [
{
"data": {
"text/plain": "@rio_tc 177\n@S_iRe_N 141\n@uzuky 121\n@y_f_ 104\n@yunoka 100\n ... \n@blindwalk 1\n@X 1\n@xenop 1\n@sysecond 1\n@kd 1\nName: counts, Length: 80, dtype: int64"
},
"execution_count": 569,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2021\n",
"toots_2021_empty_reply = make_empty_reply_ranking(toots_2021)\n",
"toots_2021_empty_reply"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 570,
"outputs": [
{
"data": {
"text/plain": "@rio_tc 1822\n@highemerly 743\n@yuhina 638\n@Eucritical 563\n@mysterytrick 471\n ... \n@unitendon 1\n@hinoyu 1\n@nyoro 1\n@kxn4t 1\n@monyoNERVA 1\nName: counts, Length: 143, dtype: int64"
},
"execution_count": 570,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 普通のリプライランキング\n",
"# 2020\n",
"toot_2020_all = toots_2020['content'].str.contains('')\n",
"toots_2020_reply = conditional_freq_series(toots_2020['content'], toot_2020_all, id_regex)\n",
"toots_2020_reply\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 571,
"outputs": [
{
"data": {
"text/plain": "@rio_tc 1640\n@S_iRe_N 327\n@yuhina 291\n@y_f_ 229\n@Eucritical 216\n ... \n@kuizy_net 1\n@Wakupedia 1\n@higure 1\n@ck 1\n@@@@@@@@@@@@@@@@@@@@@@@@@ 1\nName: counts, Length: 123, dtype: int64"
},
"execution_count": 571,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2021\n",
"toot_2021_all = toots_2021['content'].str.contains('')\n",
"toots_2021_reply = conditional_freq_series(toots_2021['content'], toot_2021_all, id_regex)\n",
"toots_2021_reply"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 572,
"outputs": [
{
"data": {
"text/plain": " empty normal prop\n@uzuky 274.0 383 0.715405\n@okunom 102.0 167 0.610778\n@yunoka 69.0 142 0.485915\n@y_f_ 142.0 306 0.464052\n@S_iRe_N 160.0 375 0.426667\n@Eucritical 221.0 563 0.392540\n@femm 75.0 196 0.382653\n@mysterytrick 136.0 471 0.288747\n@pom_matsu 48.0 174 0.275862\n@maemaestra 50.0 214 0.233645\n@ac_key 43.0 186 0.231183\n@highemerly 145.0 743 0.195155\n@heimusu 14.0 73 0.191781\n@kd 12.0 70 0.171429\n@zero_zaki_ghost 41.0 249 0.164659\n@u2mk 23.0 146 0.157534\n@misogi 26.0 166 0.156627\n@toshi_a 15.0 106 0.141509\n@rio_tc 199.0 1822 0.109221\n@4pk 8.0 99 0.080808\n@meliza 9.0 121 0.074380\n@hijouguchi 6.0 123 0.048780\n@LeLievre 6.0 125 0.048000\n@seibe 1.0 70 0.014286\n@desk_crusher 1.0 79 0.012658\n@suma 1.0 142 0.007042\n@yuhina 3.0 638 0.004702\n@toku2 NaN 198 NaN\n@yamatema NaN 75 NaN",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>empty</th>\n <th>normal</th>\n <th>prop</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>@uzuky</th>\n <td>274.0</td>\n <td>383</td>\n <td>0.715405</td>\n </tr>\n <tr>\n <th>@okunom</th>\n <td>102.0</td>\n <td>167</td>\n <td>0.610778</td>\n </tr>\n <tr>\n <th>@yunoka</th>\n <td>69.0</td>\n <td>142</td>\n <td>0.485915</td>\n </tr>\n <tr>\n <th>@y_f_</th>\n <td>142.0</td>\n <td>306</td>\n <td>0.464052</td>\n </tr>\n <tr>\n <th>@S_iRe_N</th>\n <td>160.0</td>\n <td>375</td>\n <td>0.426667</td>\n </tr>\n <tr>\n <th>@Eucritical</th>\n <td>221.0</td>\n <td>563</td>\n <td>0.392540</td>\n </tr>\n <tr>\n <th>@femm</th>\n <td>75.0</td>\n <td>196</td>\n <td>0.382653</td>\n </tr>\n <tr>\n <th>@mysterytrick</th>\n <td>136.0</td>\n <td>471</td>\n <td>0.288747</td>\n </tr>\n <tr>\n <th>@pom_matsu</th>\n <td>48.0</td>\n <td>174</td>\n <td>0.275862</td>\n </tr>\n <tr>\n <th>@maemaestra</th>\n <td>50.0</td>\n <td>214</td>\n <td>0.233645</td>\n </tr>\n <tr>\n <th>@ac_key</th>\n <td>43.0</td>\n <td>186</td>\n <td>0.231183</td>\n </tr>\n <tr>\n <th>@highemerly</th>\n <td>145.0</td>\n <td>743</td>\n <td>0.195155</td>\n </tr>\n <tr>\n <th>@heimusu</th>\n <td>14.0</td>\n <td>73</td>\n <td>0.191781</td>\n </tr>\n <tr>\n <th>@kd</th>\n <td>12.0</td>\n <td>70</td>\n <td>0.171429</td>\n </tr>\n <tr>\n <th>@zero_zaki_ghost</th>\n <td>41.0</td>\n <td>249</td>\n <td>0.164659</td>\n </tr>\n <tr>\n <th>@u2mk</th>\n <td>23.0</td>\n <td>146</td>\n <td>0.157534</td>\n </tr>\n <tr>\n <th>@misogi</th>\n <td>26.0</td>\n <td>166</td>\n <td>0.156627</td>\n </tr>\n <tr>\n <th>@toshi_a</th>\n <td>15.0</td>\n <td>106</td>\n <td>0.141509</td>\n </tr>\n <tr>\n <th>@rio_tc</th>\n <td>199.0</td>\n <td>1822</td>\n <td>0.109221</td>\n </tr>\n <tr>\n <th>@4pk</th>\n <td>8.0</td>\n <td>99</td>\n <td>0.080808</td>\n </tr>\n <tr>\n <th>@meliza</th>\n <td>9.0</td>\n <td>121</td>\n <td>0.074380</td>\n </tr>\n <tr>\n <th>@hijouguchi</th>\n <td>6.0</td>\n <td>123</td>\n <td>0.048780</td>\n </tr>\n <tr>\n <th>@LeLievre</th>\n <td>6.0</td>\n <td>125</td>\n <td>0.048000</td>\n </tr>\n <tr>\n <th>@seibe</th>\n <td>1.0</td>\n <td>70</td>\n <td>0.014286</td>\n </tr>\n <tr>\n <th>@desk_crusher</th>\n <td>1.0</td>\n <td>79</td>\n <td>0.012658</td>\n </tr>\n <tr>\n <th>@suma</th>\n <td>1.0</td>\n <td>142</td>\n <td>0.007042</td>\n </tr>\n <tr>\n <th>@yuhina</th>\n <td>3.0</td>\n <td>638</td>\n <td>0.004702</td>\n </tr>\n <tr>\n <th>@toku2</th>\n <td>NaN</td>\n <td>198</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>@yamatema</th>\n <td>NaN</td>\n <td>75</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 572,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 空リプ割合(あとで。できたら)\n",
"# 2020\n",
"toots_2020_forprop = pd.DataFrame({\n",
" 'empty': toots_2020_empty_reply,\n",
" 'normal': toots_2020_reply\n",
"})\n",
"toots_2020_forprop['prop'] = toots_2020_forprop['empty'] / toots_2020_forprop['normal']\n",
"toots_2020_forprop[toots_2020_forprop['normal'] > toots_2020_forprop['normal'].mean()].sort_values('prop',\n",
" ascending=False)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 573,
"outputs": [
{
"data": {
"text/plain": " empty normal prop\n@uzuky 121.0 138 0.876812\n@okunom 97.0 132 0.734848\n@inabap 46.0 74 0.621622\n@D_HELL 82.0 142 0.577465\n@femm 31.0 58 0.534483\n@yunoka 100.0 213 0.469484\n@y_f_ 104.0 229 0.454148\n@S_iRe_N 141.0 327 0.431193\n@maemaestra 68.0 202 0.336634\n@ac_key 17.0 59 0.288136\n@mysterytrick 52.0 203 0.256158\n@osoba 20.0 80 0.250000\n@zero_zaki_ghost 29.0 201 0.144279\n@rio_tc 177.0 1640 0.107927\n@toshi_a 8.0 78 0.102564\n@Eucritical 21.0 216 0.097222\n@highemerly 13.0 160 0.081250\n@4pk 4.0 62 0.064516\n@shijin 2.0 66 0.030303\n@yuhina 5.0 291 0.017182\n@Citrine 1.0 156 0.006410\n@desk_crusher NaN 80 NaN",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>empty</th>\n <th>normal</th>\n <th>prop</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>@uzuky</th>\n <td>121.0</td>\n <td>138</td>\n <td>0.876812</td>\n </tr>\n <tr>\n <th>@okunom</th>\n <td>97.0</td>\n <td>132</td>\n <td>0.734848</td>\n </tr>\n <tr>\n <th>@inabap</th>\n <td>46.0</td>\n <td>74</td>\n <td>0.621622</td>\n </tr>\n <tr>\n <th>@D_HELL</th>\n <td>82.0</td>\n <td>142</td>\n <td>0.577465</td>\n </tr>\n <tr>\n <th>@femm</th>\n <td>31.0</td>\n <td>58</td>\n <td>0.534483</td>\n </tr>\n <tr>\n <th>@yunoka</th>\n <td>100.0</td>\n <td>213</td>\n <td>0.469484</td>\n </tr>\n <tr>\n <th>@y_f_</th>\n <td>104.0</td>\n <td>229</td>\n <td>0.454148</td>\n </tr>\n <tr>\n <th>@S_iRe_N</th>\n <td>141.0</td>\n <td>327</td>\n <td>0.431193</td>\n </tr>\n <tr>\n <th>@maemaestra</th>\n <td>68.0</td>\n <td>202</td>\n <td>0.336634</td>\n </tr>\n <tr>\n <th>@ac_key</th>\n <td>17.0</td>\n <td>59</td>\n <td>0.288136</td>\n </tr>\n <tr>\n <th>@mysterytrick</th>\n <td>52.0</td>\n <td>203</td>\n <td>0.256158</td>\n </tr>\n <tr>\n <th>@osoba</th>\n <td>20.0</td>\n <td>80</td>\n <td>0.250000</td>\n </tr>\n <tr>\n <th>@zero_zaki_ghost</th>\n <td>29.0</td>\n <td>201</td>\n <td>0.144279</td>\n </tr>\n <tr>\n <th>@rio_tc</th>\n <td>177.0</td>\n <td>1640</td>\n <td>0.107927</td>\n </tr>\n <tr>\n <th>@toshi_a</th>\n <td>8.0</td>\n <td>78</td>\n <td>0.102564</td>\n </tr>\n <tr>\n <th>@Eucritical</th>\n <td>21.0</td>\n <td>216</td>\n <td>0.097222</td>\n </tr>\n <tr>\n <th>@highemerly</th>\n <td>13.0</td>\n <td>160</td>\n <td>0.081250</td>\n </tr>\n <tr>\n <th>@4pk</th>\n <td>4.0</td>\n <td>62</td>\n <td>0.064516</td>\n </tr>\n <tr>\n <th>@shijin</th>\n <td>2.0</td>\n <td>66</td>\n <td>0.030303</td>\n </tr>\n <tr>\n <th>@yuhina</th>\n <td>5.0</td>\n <td>291</td>\n <td>0.017182</td>\n </tr>\n <tr>\n <th>@Citrine</th>\n <td>1.0</td>\n <td>156</td>\n <td>0.006410</td>\n </tr>\n <tr>\n <th>@desk_crusher</th>\n <td>NaN</td>\n <td>80</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 573,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2021\n",
"toots_2021_forprop = pd.DataFrame({\n",
" 'empty': toots_2021_empty_reply,\n",
" 'normal': toots_2021_reply\n",
"})\n",
"toots_2021_forprop['prop'] = toots_2021_forprop['empty'] / toots_2021_forprop['normal']\n",
"toots_2021_forprop[toots_2021_forprop['normal'] > toots_2021_forprop['normal'].mean()].sort_values('prop',\n",
" ascending=False)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 574,
"outputs": [
{
"data": {
"text/plain": "9772"
},
"execution_count": 574,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": "2452"
},
"execution_count": 574,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": "5956"
},
"execution_count": 574,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": "1533"
},
"execution_count": 574,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# リプライ総数と空リプ総数を出す\n",
"# 2020\n",
"toots_2020_reply.sum()\n",
"toots_2020_empty_reply.sum()\n",
"# 2021\n",
"toots_2021_reply.sum()\n",
"toots_2021_empty_reply.sum()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 575,
"outputs": [
{
"data": {
"text/plain": ":str_iiyo: 82\n:str_uoo: 38\n:str_popopopa: 30\n:str_erait: 28\n:str_guaa: 23\n ..\n:adobe_acrobat: 1\n:autocad: 1\n:ansible: 1\n:brobsword: 1\n:str_eee: 1\nName: counts, Length: 140, dtype: int64"
},
"execution_count": 575,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# カスタム絵文字\n",
"# 2020\n",
"toot_2020_all_filter = toots_2020['content'].str.contains('')\n",
"conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\":[\\d\\w]+:\")\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 576,
"outputs": [
{
"data": {
"text/plain": ":str_iiyo: 51\n:str_popopopa: 44\n:pui: 23\n:str_senbeidehanai: 17\n:stra_fold: 17\n ..\n:str_arigato: 1\n:str_yabai: 1\n:str_wayo: 1\n:str_logbo: 1\n:str_damedayo: 1\nName: counts, Length: 105, dtype: int64"
},
"execution_count": 576,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2021\n",
"toot_2021_all_filter = toots_2021['content'].str.contains('')\n",
"conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\":[\\d\\w]+:\")\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 577,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"593\n",
"359\n"
]
}
],
"source": [
"# カスタム絵文字、総数\n",
"# 2020\n",
"toot_2020_all_filter = toots_2020['content'].str.contains('')\n",
"print(conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\":[\\d\\w]+:\").sum())\n",
"# 2021\n",
"toot_2021_all_filter = toots_2021['content'].str.contains('')\n",
"print(conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\":[\\d\\w]+:\").sum())\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 578,
"outputs": [
{
"data": {
"text/plain": "せんべいではないです 460\nName: counts, dtype: int64"
},
"execution_count": 578,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# せんべいではないです\n",
"# 2020\n",
"toot_2020_all_filter = toots_2020['content'].str.contains('')\n",
"conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\"せんべいではないです\")\n",
"\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 579,
"outputs": [
{
"data": {
"text/plain": "せんべいではないです 383\nName: counts, dtype: int64"
},
"execution_count": 579,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2021\n",
"toot_2021_all_filter = toots_2021['content'].str.contains('')\n",
"conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\"せんべいではないです\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 580,
"outputs": [
{
"data": {
"text/plain": "あ 341\nえ 58\na 43\n夜 38\n、 38\n ... \n0 1\n肉 1\n∀ 1\n味 1\n外 1\nName: counts, Length: 241, dtype: int64"
},
"execution_count": 580,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 1文字トゥート\n",
"# 2020\n",
"toot_2020_all_filter = toots_2020['content'].str.len() == 1\n",
"# toots_2020['content'][toot_2020_all_filter].value_counts()\n",
"conditional_freq_series(toots_2020['content'], toot_2020_all_filter, r\".+\")\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 581,
"outputs": [
{
"data": {
"text/plain": "あ 462\nえ 112\nう 51\n? 30\n昼 21\n ... \nカ 1\n水 1\nシ 1\n神 1\nコ 1\nName: counts, Length: 183, dtype: int64"
},
"execution_count": 581,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#2021\n",
"toot_2021_all_filter = toots_2021['content'].str.len() == 1\n",
"conditional_freq_series(toots_2021['content'], toot_2021_all_filter, r\".+\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 581,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 581,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 581,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 581,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 581,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment