Skip to content

Instantly share code, notes, and snippets.

@slate-dev
Last active January 24, 2022 08:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slate-dev/8f59772a790e5a3ed70788fab70d5343 to your computer and use it in GitHub Desktop.
Save slate-dev/8f59772a790e5a3ed70788fab70d5343 to your computer and use it in GitHub Desktop.

What does Twitter Say about Self-Regulated Learning? Mapping Tweets from 2011 to 2021

Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f1fc655e-15e1-4ae6-8866-79964ceebff3",
"metadata": {},
"outputs": [],
"source": [
"# this script for generating the word cloud\n\n",
"import numpy as np\n",
"import pandas as pd\n",
"from wordcloud import WordCloud\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee70fafd-3437-487c-9249-608bdfd20f76",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_feather('tweets_lemmatization.fz').explode('lemma_tokens')\n",
"df = df.loc[df['lemma_tokens'].str.len() > 3]\n",
"df['created_at'] = pd.to_datetime(df['created_at'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5754367-cc5e-4f3e-81e2-b83f8ee987cf",
"metadata": {},
"outputs": [],
"source": [
"replacement_mapping_dict = {\n",
" \"selfregulated\": \"self-regulated learning\",\n",
" \"selfregulate\": \"self-regulated learning\",\n",
" \"selfregulation\": \"self-regulated learning\",\n",
" \"regulation\": \"self-regulated learning\",\n",
" \"self\": \"self-regulated learning\",\n",
" \"regulate\": \"self-regulated learning\",\n",
" \"learning\": \"self-regulated learning\",\n",
" \"learn\": \"self-regulated learning\"\n",
"}\n",
"df[\"lemma_tokens\"].replace(to_replace = replacement_mapping_dict, inplace = True)\n",
"df = df.loc[~df['lemma_tokens'].isin(['self-regulated learning', 'student', 'help'])]\n",
"df = df.loc[~df['lemma_tokens'].str.contains('\\d', regex=True)]\n",
"df['year'] = df['created_at'].dt.year\n",
"df = df[['year', 'lemma_tokens']]\n",
"\n",
"df = (df\n",
" .groupby(['year', 'lemma_tokens'])\n",
" .size()\n",
" .to_frame('size')\n",
" .sort_values(by=['year', 'size'], ascending=False)\n",
" .reset_index()\n",
" .groupby(['year'])\n",
" .head(100)\n",
" .sort_values(by=['year', 'size'], ascending=True)\n",
")\n",
"\n",
"print(df)\n",
"\n",
"years = df['year'].drop_duplicates().sort_values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "378cf99e-76d2-40a4-98fc-7ef96bfb03d6",
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('wordcloud.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c972f04-af2c-4bda-aa7a-5510a70d9d43",
"metadata": {},
"outputs": [],
"source": [
"wordclouds = []\n",
"min = df['size'].min()\n",
"max = df['size'].max()\n",
"d = np.log(10 + max/min)\n",
"for year in years:\n",
" conf = df.loc[df['year'] == year]\n",
" data = {}\n",
" prev = 0\n",
" for r in conf.iterrows():\n",
" key = r[1]['lemma_tokens']\n",
" count = r[1]['size']\n",
" prev = prev + np.log(10 + count - prev)\n",
" data[key] = np.log2(2 + count)\n",
" width = 1240\n",
" height = 584\n",
" if (year == 2021): \n",
" width = 2480\n",
" height = 584\n",
" wordcloud = WordCloud(relative_scaling=0.5, width=width,height=height, max_words=100, background_color=\"white\").generate_from_frequencies(data)\n",
" wordclouds.append([year, wordcloud])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "68dedbe8-6d47-43f4-b61c-6dec1b483626",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"# Create and generate a word cloud image:\n",
"# lower max_font_size, change the maximum number of word and lighten the background:\n",
"fig = plt.figure(figsize=(8.3, 11.7), dpi=300, facecolor=\"white\")\n",
"plt.axis(\"off\")\n",
"\n",
"x = 1\n",
"y = 0\n",
"j = 1 + len(wordclouds) / 2\n",
"for w in wordclouds:\n",
" c = 2\n",
" n = x\n",
" if (w[0] == 2021): \n",
" c = 1\n",
" n = 6\n",
" plt.subplot(int(j), c, n).set_title(w[0], y = -0.18, fontsize=12)\n",
" plt.plot()\n",
" plt.axis(\"off\")\n",
" x = x + 1\n",
" plt.imshow(w[1], interpolation=\"bilinear\")\n",
"# plt.tight_layout(h_pad = 1.5, w_pad = 0)\n",
"# plt.tight_layout(rect=(0,0,0.75,1), pad = 0)\n",
"plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0.2)\n",
"plt.savefig('wordcloud.png', pad_inches = 0, bbox_inches='tight', facecolor=fig.get_facecolor(), edgecolor='none')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment