alexeygrigorev/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Help with moving data from Slack to DataTalks.Club website!

Fork the DataTalks.Club website repo
Run the code in this notebook

Check this video if you need any help (https://www.loom.com/share/e7a25dff9fba42e6b1bc6991e1c7e7cd)


Create a PR with code changes


## export-book-of-the-week.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import json\n",
    "import yaml\n",
    "\n",
    "from zipfile import ZipFile\n",
    "from datetime import datetime, timedelta\n",
    "from pathlib import Path\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "import clipboard\n",
    "import frontmatter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Some prep work\n",
    "\n",
    "Defininig some variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dtc_website_dir = Path('c:/Users/alexe/git/datatalksclub.github.io')\n",
    "\n",
    "dump_zip_file = 'DataTalks.Club Slack export Sep 19 2020 - Sep 19 2022.zip'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Loading emojis:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_emojis = pd.read_csv(dtc_website_dir / 'scripts' / 'emojis.csv')\n",
    "emoji_map = dict(zip(df_emojis.code, df_emojis.emoji))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fidning books without answers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "dtc_website_dir / '_books'\n",
    "book_files = list((dtc_website_dir / '_books').glob('202*.md'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "no_archive = []\n",
    "\n",
    "for book_file in book_files:\n",
    "    if book_file.parts[-1] == '20210315-database-internals.md':\n",
    "        continue\n",
    "\n",
    "    post = frontmatter.load(book_file)\n",
    "\n",
    "    # looking only at events that finished\n",
    "    if datetime.today() < post['end']:\n",
    "        continue\n",
    "\n",
    "    if 'archive' not in post.keys():\n",
    "        no_archive.append((book_file, post.to_dict()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Some helper functions\n",
    "\n",
    "We'll need them for parsing slack dump"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def repl_user_callback(match):\n",
    "    user_id = match.group(1)\n",
    "    user_name = users[user_id]\n",
    "    return user_name['name']\n",
    "\n",
    "\n",
    "user_pattern = re.compile(r'<@(.+?)>')\n",
    "link_pattern_text = re.compile(r'<(http.+?)\\|(.+?)>')\n",
    "link_pattern = re.compile(r'<(http.+?)>')\n",
    "emoji_pattern = re.compile(r':([-+0-9_a-z]+):(:[-+0-9_a-z]:)?')\n",
    "\n",
    "\n",
    "def replace_emoji_callback(match):\n",
    "    code = match.group(1)\n",
    "    if code in emoji_map:\n",
    "        return emoji_map[code]\n",
    "    print('cannot find %s' % code)\n",
    "    return \":%s:\" % code\n",
    "\n",
    "\n",
    "def prepare_text(text):\n",
    "    text = text.replace('\\xa0', ' ').replace('ΓÇó', '-').replace('\\n\\n', '\\n')\n",
    "    text = user_pattern.sub(repl_user_callback, text)\n",
    "    text = emoji_pattern.sub(replace_emoji_callback, text)\n",
    "    text = link_pattern_text.sub(r'[\\2](\\1)', text)\n",
    "    text = link_pattern.sub(r'[\\1](\\1)', text)\n",
    "    return text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_docs_zipfile(zipfile, files):\n",
    "    all_docs = []\n",
    "\n",
    "    for f in files:\n",
    "        with zipfile.open(f) as f_in:\n",
    "            docs = json.load(f_in)\n",
    "            all_docs.extend(docs)\n",
    "\n",
    "    return all_docs\n",
    "\n",
    "\n",
    "def clean_user(d):\n",
    "    p = d['profile']\n",
    "    name = p['display_name']\n",
    "    if len(name) == 0:\n",
    "        name = p['real_name']\n",
    "    return {\n",
    "        'name': name,\n",
    "        'image': p['image_72']\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading slack data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, load the users:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "dump = ZipFile(dump_zip_file, 'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "with dump.open('users.json') as f_in:\n",
    "    all_users = json.load(f_in)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "users = {d['id']: clean_user(d) for d in all_users}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's find all the json files in the dump of the book-of-the-week channel:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_files = []\n",
    "\n",
    "for filename in dump.namelist():\n",
    "    if not filename.startswith('book-of-the-week'):\n",
    "        continue\n",
    "    if not filename.endswith('.json'):\n",
    "        continue\n",
    "    all_files.append(filename)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "scrolled": true
   },
   "source": [
    "And load all messages (we'll need them for making threads):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_messages_docs = load_docs_zipfile(dump, all_files)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Processing\n",
    "\n",
    "Keep running this until run out of books =)\n",
    "\n",
    "Now let's take one of the books:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "pop from empty list",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_3032/1075073259.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mbook_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbook\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mno_archive\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbook_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparts\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mIndexError\u001b[0m: pop from empty list"
     ]
    }
   ],
   "source": [
    "book_file, book = no_archive.pop()\n",
    "print(book_file.parts[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2022-01-09 00:00:00\n",
      "2022-01-16 00:00:00\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['book-of-the-week/2022-01-10.json', 'book-of-the-week/2022-01-16.json']"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "start = book['start'] - timedelta(days=1)\n",
    "end = start + timedelta(days=7)\n",
    "print(start)\n",
    "print(end)\n",
    "\n",
    "question_files = []\n",
    "\n",
    "for filename in all_files:\n",
    "    date = Path(filename).parts[-1]\n",
    "    date = datetime.strptime(date, '%Y-%m-%d.json')\n",
    "    if start <= date and date <= end:\n",
    "        question_files.append(filename)\n",
    "        \n",
    "question_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "question_messages_docs = load_docs_zipfile(dump, question_files)\n",
    "\n",
    "top_messages = [d for d in question_messages_docs if 'parent_user_id' not in d]\n",
    "thread_replies = [d for d in all_messages_docs if 'parent_user_id' in d]\n",
    "replies_idx = {(d['user'], d['ts']): d for d in all_messages_docs}\n",
    "\n",
    "top_messages = [d for d in top_messages if d.get('subtype') not in ('thread_broadcast', 'channel_join')]\n",
    "\n",
    "\n",
    "threads = []\n",
    "\n",
    "for top_message in top_messages:\n",
    "    user_id = top_message['user']\n",
    "    if user_id == 'USLACKBOT':\n",
    "        continue\n",
    "    \n",
    "    top_name = users[user_id]['name']\n",
    "    \n",
    "    if top_name == 'Francis Terence Amit':\n",
    "        continue\n",
    "\n",
    "    top_text = prepare_text(top_message['text']).strip()\n",
    "    \n",
    "    if 'Hello, everyone!' in top_text and 'The book of this week is' in top_text:\n",
    "        continue\n",
    "    \n",
    "    if 'The lucky winners' in top_text:\n",
    "        continue\n",
    "    \n",
    "    if 'Please send me your emails in DM' in top_text:\n",
    "        continue\n",
    "    \n",
    "    replies = []\n",
    "\n",
    "    for p in top_message.get('replies', []):\n",
    "        reply_id = (p['user'], p['ts'])\n",
    "        reply = replies_idx[reply_id]\n",
    "        name = users[p['user']]['name']\n",
    "        text = prepare_text(reply['text']).strip()\n",
    "\n",
    "        replies.append({'name': name, 'text': text})\n",
    "\n",
    "    thread = {\n",
    "        'name': top_name,\n",
    "        'text': top_text,\n",
    "        'replies': replies\n",
    "    }\n",
    "    \n",
    "    threads.append(thread)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "yaml_snippet = yaml.dump({'archive': threads}, sort_keys=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "archive:\n",
      "- name: Alexey Grigorev\n",
      "  text: I didn't get a confirmation from the author for this week, but we'll have\n",
      "    something next week!\n",
      "  replies:\n",
      "  - name: Varun Nayyar\n",
      "    text: \"Why not call someone back again? If possible?\\U0001F914 Maybe some people\\\n",
      "      \\ can get their questions in now i\n"
     ]
    }
   ],
   "source": [
    "print(yaml_snippet[:300])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "clipboard.copy(yaml_snippet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "!code {book_file}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import re\n",
	"import json\n",
	"import yaml\n",
	"\n",
	"from zipfile import ZipFile\n",
	"from datetime import datetime, timedelta\n",
	"from pathlib import Path\n",
	"\n",
	"import pandas as pd\n",
	"\n",
	"import clipboard\n",
	"import frontmatter"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Some prep work\n",
	"\n",
	"Defininig some variables"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"dtc_website_dir = Path('c:/Users/alexe/git/datatalksclub.github.io')\n",
	"\n",
	"dump_zip_file = 'DataTalks.Club Slack export Sep 19 2020 - Sep 19 2022.zip'"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Loading emojis:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_emojis = pd.read_csv(dtc_website_dir / 'scripts' / 'emojis.csv')\n",
	"emoji_map = dict(zip(df_emojis.code, df_emojis.emoji))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Fidning books without answers:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"dtc_website_dir / '_books'\n",
	"book_files = list((dtc_website_dir / '_books').glob('202*.md'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"no_archive = []\n",
	"\n",
	"for book_file in book_files:\n",
	" if book_file.parts[-1] == '20210315-database-internals.md':\n",
	" continue\n",
	"\n",
	" post = frontmatter.load(book_file)\n",
	"\n",
	" # looking only at events that finished\n",
	" if datetime.today() < post['end']:\n",
	" continue\n",
	"\n",
	" if 'archive' not in post.keys():\n",
	" no_archive.append((book_file, post.to_dict()))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Some helper functions\n",
	"\n",
	"We'll need them for parsing slack dump"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"def repl_user_callback(match):\n",
	" user_id = match.group(1)\n",
	" user_name = users[user_id]\n",
	" return user_name['name']\n",
	"\n",
	"\n",
	"user_pattern = re.compile(r'<@(.+?)>')\n",
	"link_pattern_text = re.compile(r'<(http.+?)\\\|(.+?)>')\n",
	"link_pattern = re.compile(r'<(http.+?)>')\n",
	"emoji_pattern = re.compile(r':([-+0-9_a-z]+):(:[-+0-9_a-z]:)?')\n",
	"\n",
	"\n",
	"def replace_emoji_callback(match):\n",
	" code = match.group(1)\n",
	" if code in emoji_map:\n",
	" return emoji_map[code]\n",
	" print('cannot find %s' % code)\n",
	" return \":%s:\" % code\n",
	"\n",
	"\n",
	"def prepare_text(text):\n",
	" text = text.replace('\\xa0', ' ').replace('ΓÇó', '-').replace('\\n\\n', '\\n')\n",
	" text = user_pattern.sub(repl_user_callback, text)\n",
	" text = emoji_pattern.sub(replace_emoji_callback, text)\n",
	" text = link_pattern_text.sub(r'[\\2](\\1)', text)\n",
	" text = link_pattern.sub(r'[\\1](\\1)', text)\n",
	" return text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"def load_docs_zipfile(zipfile, files):\n",
	" all_docs = []\n",
	"\n",
	" for f in files:\n",
	" with zipfile.open(f) as f_in:\n",
	" docs = json.load(f_in)\n",
	" all_docs.extend(docs)\n",
	"\n",
	" return all_docs\n",
	"\n",
	"\n",
	"def clean_user(d):\n",
	" p = d['profile']\n",
	" name = p['display_name']\n",
	" if len(name) == 0:\n",
	" name = p['real_name']\n",
	" return {\n",
	" 'name': name,\n",
	" 'image': p['image_72']\n",
	" }"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Loading slack data"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"First, load the users:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"dump = ZipFile(dump_zip_file, 'r')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"with dump.open('users.json') as f_in:\n",
	" all_users = json.load(f_in)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"users = {d['id']: clean_user(d) for d in all_users}"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now let's find all the json files in the dump of the book-of-the-week channel:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"all_files = []\n",
	"\n",
	"for filename in dump.namelist():\n",
	" if not filename.startswith('book-of-the-week'):\n",
	" continue\n",
	" if not filename.endswith('.json'):\n",
	" continue\n",
	" all_files.append(filename)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"scrolled": true
	},
	"source": [
	"And load all messages (we'll need them for making threads):"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"all_messages_docs = load_docs_zipfile(dump, all_files)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Processing\n",
	"\n",
	"Keep running this until run out of books =)\n",
	"\n",
	"Now let's take one of the books:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 64,
	"metadata": {},
	"outputs": [
	{
	"ename": "IndexError",
	"evalue": "pop from empty list",
	"output_type": "error",
	"traceback": [
	"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)",
	"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_3032/1075073259.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mbook_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbook\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mno_archive\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbook_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparts\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
	"\u001b[1;31mIndexError\u001b[0m: pop from empty list"
	]
	}
	],
	"source": [
	"book_file, book = no_archive.pop()\n",
	"print(book_file.parts[-1])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 58,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2022-01-09 00:00:00\n",
	"2022-01-16 00:00:00\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"['book-of-the-week/2022-01-10.json', 'book-of-the-week/2022-01-16.json']"
	]
	},
	"execution_count": 58,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"start = book['start'] - timedelta(days=1)\n",
	"end = start + timedelta(days=7)\n",
	"print(start)\n",
	"print(end)\n",
	"\n",
	"question_files = []\n",
	"\n",
	"for filename in all_files:\n",
	" date = Path(filename).parts[-1]\n",
	" date = datetime.strptime(date, '%Y-%m-%d.json')\n",
	" if start <= date and date <= end:\n",
	" question_files.append(filename)\n",
	" \n",
	"question_files"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 59,
	"metadata": {},
	"outputs": [],
	"source": [
	"question_messages_docs = load_docs_zipfile(dump, question_files)\n",
	"\n",
	"top_messages = [d for d in question_messages_docs if 'parent_user_id' not in d]\n",
	"thread_replies = [d for d in all_messages_docs if 'parent_user_id' in d]\n",
	"replies_idx = {(d['user'], d['ts']): d for d in all_messages_docs}\n",
	"\n",
	"top_messages = [d for d in top_messages if d.get('subtype') not in ('thread_broadcast', 'channel_join')]\n",
	"\n",
	"\n",
	"threads = []\n",
	"\n",
	"for top_message in top_messages:\n",
	" user_id = top_message['user']\n",
	" if user_id == 'USLACKBOT':\n",
	" continue\n",
	" \n",
	" top_name = users[user_id]['name']\n",
	" \n",
	" if top_name == 'Francis Terence Amit':\n",
	" continue\n",
	"\n",
	" top_text = prepare_text(top_message['text']).strip()\n",
	" \n",
	" if 'Hello, everyone!' in top_text and 'The book of this week is' in top_text:\n",
	" continue\n",
	" \n",
	" if 'The lucky winners' in top_text:\n",
	" continue\n",
	" \n",
	" if 'Please send me your emails in DM' in top_text:\n",
	" continue\n",
	" \n",
	" replies = []\n",
	"\n",
	" for p in top_message.get('replies', []):\n",
	" reply_id = (p['user'], p['ts'])\n",
	" reply = replies_idx[reply_id]\n",
	" name = users[p['user']]['name']\n",
	" text = prepare_text(reply['text']).strip()\n",
	"\n",
	" replies.append({'name': name, 'text': text})\n",
	"\n",
	" thread = {\n",
	" 'name': top_name,\n",
	" 'text': top_text,\n",
	" 'replies': replies\n",
	" }\n",
	" \n",
	" threads.append(thread)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 60,
	"metadata": {
	"scrolled": false
	},
	"outputs": [],
	"source": [
	"yaml_snippet = yaml.dump({'archive': threads}, sort_keys=False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 61,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"archive:\n",
	"- name: Alexey Grigorev\n",
	" text: I didn't get a confirmation from the author for this week, but we'll have\n",
	" something next week!\n",
	" replies:\n",
	" - name: Varun Nayyar\n",
	" text: \"Why not call someone back again? If possible?\\U0001F914 Maybe some people\\\n",
	" \\ can get their questions in now i\n"
	]
	}
	],
	"source": [
	"print(yaml_snippet[:300])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 62,
	"metadata": {},
	"outputs": [],
	"source": [
	"clipboard.copy(yaml_snippet)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 63,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n"
	]
	}
	],
	"source": [
	"!code {book_file}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.9.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}