Skip to content

Instantly share code, notes, and snippets.

@cast42
Created March 21, 2022 17:46
Show Gist options
  • Save cast42/95768e00a36aaa995f107241af93554b to your computer and use it in GitHub Desktop.
Save cast42/95768e00a36aaa995f107241af93554b to your computer and use it in GitHub Desktop.
Section_2_1_fake_news.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Section_2_1_fake_news.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyNvU0Q+Gjeg/AR+UFWfuAKN",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/cast42/95768e00a36aaa995f107241af93554b/section_2_1_fake_news.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zc4EM76NbZFS",
"outputId": "bf900634-5bdb-47a9-a127-c53c9a535ab2"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: pyreadr in /usr/local/lib/python3.7/dist-packages (0.4.4)\n",
"Requirement already satisfied: pandas>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from pyreadr) (1.3.5)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.2.0->pyreadr) (2018.9)\n",
"Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.2.0->pyreadr) (1.21.5)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.2.0->pyreadr) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=1.2.0->pyreadr) (1.15.0)\n",
"Requirement already satisfied: wget in /usr/local/lib/python3.7/dist-packages (3.2)\n"
]
}
],
"source": [
"!pip install pyreadr\n",
"!pip install wget"
]
},
{
"cell_type": "code",
"source": [
"import wget\n",
"import pandas as pd\n",
"import pyreadr\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
],
"metadata": {
"id": "A4LNehG6e76-"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!wget -O fake_news.rda https://github.com/bayes-rules/bayesrules/blob/master/data/fake_news.rda?raw=true"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DB4n18uZmUWd",
"outputId": "41f0fdd2-88ec-4503-8ed9-43983f02a294"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2022-03-21 11:35:36-- https://github.com/bayes-rules/bayesrules/blob/master/data/fake_news.rda?raw=true\n",
"Resolving github.com (github.com)... 140.82.112.4\n",
"Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://github.com/bayes-rules/bayesrules/raw/master/data/fake_news.rda [following]\n",
"--2022-03-21 11:35:37-- https://github.com/bayes-rules/bayesrules/raw/master/data/fake_news.rda\n",
"Reusing existing connection to github.com:443.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/bayes-rules/bayesrules/master/data/fake_news.rda [following]\n",
"--2022-03-21 11:35:37-- https://raw.githubusercontent.com/bayes-rules/bayesrules/master/data/fake_news.rda\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 163161 (159K) [application/octet-stream]\n",
"Saving to: ‘fake_news.rda’\n",
"\n",
"fake_news.rda 100%[===================>] 159.34K --.-KB/s in 0.02s \n",
"\n",
"2022-03-21 11:35:37 (10.1 MB/s) - ‘fake_news.rda’ saved [163161/163161]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# fake_news_url = r'https://github.com/bayes-rules/bayesrules/blob/master/data/fake_news.rda?raw=true' \n",
"# wget.download(fake_news_url, 'fake_news.rda')\n",
"result = pyreadr.read_r('/content/fake_news.rda')"
],
"metadata": {
"id": "C-Y4aoWTeUG7"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = result['fake_news']"
],
"metadata": {
"id": "JQv_DgLiqW1k"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 734
},
"id": "-062YnYsrdvC",
"outputId": "c083c5ff-764d-4dc7-a56c-c32cbab75782"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" title \\\n",
"0 Clinton's Exploited Haiti Earthquake ‘to Steal... \n",
"1 Donald Trump Jr. Just Pinned A Tweet So Stupid... \n",
"2 Michelle Obama NOT Leaving The White House – H... \n",
"3 Trump hits Clinton on Islamic State: ‘It is ti... \n",
"4 Australia Voted To Ban Muslims And Liberals Ar... \n",
"\n",
" text \\\n",
"0 0 SHARES Facebook Twitter\\n\\nBernard Sansaricq... \n",
"1 \\n\\nPosted by Frank Wilkenmeyer on 19 Sep 2016... \n",
"2 Michelle Obama NOT Leaving The White House – H... \n",
"3 “Crooked Hillary has been fighting ISIS, or wh... \n",
"4 When Donald Trump said that he wanted to ban M... \n",
"\n",
" url authors \\\n",
"0 http://freedomdaily.com/former-haitian-senate-... NaN \n",
"1 http://winningdemocrats.com/donald-trump-jr-ju... NaN \n",
"2 http://rightwingnews.com/top-news/michelle-oba... Sierra Marlee \n",
"3 http://politi.co/2cEsAnL Jack Shafer,Nolan D \n",
"4 https://goo.gl/p7jFZG Blair Patterson \n",
"\n",
" type title_words text_words title_char text_char title_caps ... \\\n",
"0 fake 17 219 110 1444 0 ... \n",
"1 real 18 509 95 3016 0 ... \n",
"2 fake 16 494 96 2881 1 ... \n",
"3 real 11 268 60 1674 0 ... \n",
"4 fake 9 479 54 2813 0 ... \n",
"\n",
" disgust fear joy sadness surprise trust negative positive \\\n",
"0 2.54 3.81 1.27 4.66 2.12 2.97 8.47 3.81 \n",
"1 1.90 1.90 1.71 1.33 1.14 4.17 4.74 4.93 \n",
"2 0.98 1.57 1.96 0.78 1.18 3.73 3.33 5.49 \n",
"3 1.79 4.30 0.36 1.79 1.79 2.51 6.09 2.15 \n",
"4 0.41 0.82 1.23 0.82 0.82 2.46 2.66 4.30 \n",
"\n",
" text_syllables text_syllables_per_word \n",
"0 395 1.803653 \n",
"1 845 1.660118 \n",
"2 806 1.631579 \n",
"3 461 1.720149 \n",
"4 761 1.588727 \n",
"\n",
"[5 rows x 30 columns]"
],
"text/html": [
"\n",
" <div id=\"df-c6a4e85c-8511-4741-9a9e-dd4faaacdf79\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>text</th>\n",
" <th>url</th>\n",
" <th>authors</th>\n",
" <th>type</th>\n",
" <th>title_words</th>\n",
" <th>text_words</th>\n",
" <th>title_char</th>\n",
" <th>text_char</th>\n",
" <th>title_caps</th>\n",
" <th>...</th>\n",
" <th>disgust</th>\n",
" <th>fear</th>\n",
" <th>joy</th>\n",
" <th>sadness</th>\n",
" <th>surprise</th>\n",
" <th>trust</th>\n",
" <th>negative</th>\n",
" <th>positive</th>\n",
" <th>text_syllables</th>\n",
" <th>text_syllables_per_word</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Clinton's Exploited Haiti Earthquake ‘to Steal...</td>\n",
" <td>0 SHARES Facebook Twitter\\n\\nBernard Sansaricq...</td>\n",
" <td>http://freedomdaily.com/former-haitian-senate-...</td>\n",
" <td>NaN</td>\n",
" <td>fake</td>\n",
" <td>17</td>\n",
" <td>219</td>\n",
" <td>110</td>\n",
" <td>1444</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>2.54</td>\n",
" <td>3.81</td>\n",
" <td>1.27</td>\n",
" <td>4.66</td>\n",
" <td>2.12</td>\n",
" <td>2.97</td>\n",
" <td>8.47</td>\n",
" <td>3.81</td>\n",
" <td>395</td>\n",
" <td>1.803653</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Donald Trump Jr. Just Pinned A Tweet So Stupid...</td>\n",
" <td>\\n\\nPosted by Frank Wilkenmeyer on 19 Sep 2016...</td>\n",
" <td>http://winningdemocrats.com/donald-trump-jr-ju...</td>\n",
" <td>NaN</td>\n",
" <td>real</td>\n",
" <td>18</td>\n",
" <td>509</td>\n",
" <td>95</td>\n",
" <td>3016</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1.90</td>\n",
" <td>1.90</td>\n",
" <td>1.71</td>\n",
" <td>1.33</td>\n",
" <td>1.14</td>\n",
" <td>4.17</td>\n",
" <td>4.74</td>\n",
" <td>4.93</td>\n",
" <td>845</td>\n",
" <td>1.660118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Michelle Obama NOT Leaving The White House – H...</td>\n",
" <td>Michelle Obama NOT Leaving The White House – H...</td>\n",
" <td>http://rightwingnews.com/top-news/michelle-oba...</td>\n",
" <td>Sierra Marlee</td>\n",
" <td>fake</td>\n",
" <td>16</td>\n",
" <td>494</td>\n",
" <td>96</td>\n",
" <td>2881</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.98</td>\n",
" <td>1.57</td>\n",
" <td>1.96</td>\n",
" <td>0.78</td>\n",
" <td>1.18</td>\n",
" <td>3.73</td>\n",
" <td>3.33</td>\n",
" <td>5.49</td>\n",
" <td>806</td>\n",
" <td>1.631579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Trump hits Clinton on Islamic State: ‘It is ti...</td>\n",
" <td>“Crooked Hillary has been fighting ISIS, or wh...</td>\n",
" <td>http://politi.co/2cEsAnL</td>\n",
" <td>Jack Shafer,Nolan D</td>\n",
" <td>real</td>\n",
" <td>11</td>\n",
" <td>268</td>\n",
" <td>60</td>\n",
" <td>1674</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1.79</td>\n",
" <td>4.30</td>\n",
" <td>0.36</td>\n",
" <td>1.79</td>\n",
" <td>1.79</td>\n",
" <td>2.51</td>\n",
" <td>6.09</td>\n",
" <td>2.15</td>\n",
" <td>461</td>\n",
" <td>1.720149</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Australia Voted To Ban Muslims And Liberals Ar...</td>\n",
" <td>When Donald Trump said that he wanted to ban M...</td>\n",
" <td>https://goo.gl/p7jFZG</td>\n",
" <td>Blair Patterson</td>\n",
" <td>fake</td>\n",
" <td>9</td>\n",
" <td>479</td>\n",
" <td>54</td>\n",
" <td>2813</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0.41</td>\n",
" <td>0.82</td>\n",
" <td>1.23</td>\n",
" <td>0.82</td>\n",
" <td>0.82</td>\n",
" <td>2.46</td>\n",
" <td>2.66</td>\n",
" <td>4.30</td>\n",
" <td>761</td>\n",
" <td>1.588727</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 30 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c6a4e85c-8511-4741-9a9e-dd4faaacdf79')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-c6a4e85c-8511-4741-9a9e-dd4faaacdf79 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-c6a4e85c-8511-4741-9a9e-dd4faaacdf79');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"pd.concat([df['type'].value_counts().rename('n'), df['type'].value_counts(normalize=True).rename('percent')], axis=1)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 112
},
"id": "qFA_-sfzrs9o",
"outputId": "b16560f5-621d-429a-a1f7-93ce92663686"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" n percent\n",
"real 90 0.6\n",
"fake 60 0.4"
],
"text/html": [
"\n",
" <div id=\"df-31eb3ce0-91d1-43a5-bbd3-2849b2ec9244\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n</th>\n",
" <th>percent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>real</th>\n",
" <td>90</td>\n",
" <td>0.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fake</th>\n",
" <td>60</td>\n",
" <td>0.4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-31eb3ce0-91d1-43a5-bbd3-2849b2ec9244')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-31eb3ce0-91d1-43a5-bbd3-2849b2ec9244 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-31eb3ce0-91d1-43a5-bbd3-2849b2ec9244');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"pd.crosstab(df['title_has_excl'], df['type'], margins=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "U_f7EOJSsceo",
"outputId": "6000c7b2-322d-4041-b266-56b87b010d9a"
},
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"type fake real All\n",
"title_has_excl \n",
"False 44 88 132\n",
"True 16 2 18\n",
"All 60 90 150"
],
"text/html": [
"\n",
" <div id=\"df-1f628f27-09e0-4ba4-b642-48ea3a6d64bb\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>type</th>\n",
" <th>fake</th>\n",
" <th>real</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>title_has_excl</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>False</th>\n",
" <td>44</td>\n",
" <td>88</td>\n",
" <td>132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>True</th>\n",
" <td>16</td>\n",
" <td>2</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>60</td>\n",
" <td>90</td>\n",
" <td>150</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1f628f27-09e0-4ba4-b642-48ea3a6d64bb')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-1f628f27-09e0-4ba4-b642-48ea3a6d64bb button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-1f628f27-09e0-4ba4-b642-48ea3a6d64bb');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [
"article = pd.Series([\"real\", \"fake\"], dtype=\"category\")"
],
"metadata": {
"id": "CSD3J_nAtKy_"
},
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"source": [
"prior = pd.Series([0.6, 0.4])"
],
"metadata": {
"id": "Fdcm9bzXt5WW"
},
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df_sample = pd.DataFrame({'type': article, 'weights' : prior})"
],
"metadata": {
"id": "druWCfh8uw3a"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df_sample.sample(3, replace=True, random_state=1301, weights='weights')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143
},
"id": "SE2DSMdyuBRB",
"outputId": "9335f1a6-53f8-4c26-f83c-0f0a910263b1"
},
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" type weights\n",
"0 real 0.6\n",
"1 fake 0.4\n",
"0 real 0.6"
],
"text/html": [
"\n",
" <div id=\"df-5c75dedd-b3a3-483b-8d42-271e5f344537\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>weights</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>real</td>\n",
" <td>0.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fake</td>\n",
" <td>0.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>real</td>\n",
" <td>0.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5c75dedd-b3a3-483b-8d42-271e5f344537')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-5c75dedd-b3a3-483b-8d42-271e5f344537 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-5c75dedd-b3a3-483b-8d42-271e5f344537');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"article_sim = df_sample.sample(10_000, replace=True, random_state=84735, weights='weights')"
],
"metadata": {
"id": "sjay1_dbuHvg"
},
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"source": [
"ax = article_sim['type'].value_counts().plot.bar(rot=0);\n",
"ax.set_xlabel('type');\n",
"ax.set(title='FIGURE 2.2: A bar plot of the fake vs real status of 10,000 simulated articles.');"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295
},
"id": "5mn1iRE7vhzZ",
"outputId": "c57bff56-795d-4e13-acd0-bc2d0be9ba1f"
},
"execution_count": 14,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"source": [
"article_sim['type'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BblZxGL_vrdZ",
"outputId": "2be398f7-3977-4df8-f434-77f1ae3f140a"
},
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"real 6062\n",
"fake 3938\n",
"Name: type, dtype: int64"
]
},
"metadata": {},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"source": [
"article_sim['data_model'] = article_sim['type'].map({'fake': 0.2667, 'real':0.0222})\n",
"article_sim.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "J0-OLIKowG5Z",
"outputId": "6aed8059-7438-4ec8-dfdc-325be0a76a0f"
},
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" type weights data_model\n",
"0 real 0.6 0.0222\n",
"1 fake 0.4 0.2667\n",
"1 fake 0.4 0.2667\n",
"1 fake 0.4 0.2667\n",
"1 fake 0.4 0.2667"
],
"text/html": [
"\n",
" <div id=\"df-60313778-4173-46c0-983f-a504e8f2d5e8\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>weights</th>\n",
" <th>data_model</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>real</td>\n",
" <td>0.6</td>\n",
" <td>0.0222</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fake</td>\n",
" <td>0.4</td>\n",
" <td>0.2667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fake</td>\n",
" <td>0.4</td>\n",
" <td>0.2667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fake</td>\n",
" <td>0.4</td>\n",
" <td>0.2667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fake</td>\n",
" <td>0.4</td>\n",
" <td>0.2667</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-60313778-4173-46c0-983f-a504e8f2d5e8')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-60313778-4173-46c0-983f-a504e8f2d5e8 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-60313778-4173-46c0-983f-a504e8f2d5e8');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 16
}
]
},
{
"cell_type": "code",
"source": [
"import random"
],
"metadata": {
"id": "2d9dPV9exsgl"
},
"execution_count": 17,
"outputs": []
},
{
"cell_type": "code",
"source": [
"N_fake = (article_sim['type']=='fake').sum() # The number of artices that are fake\n",
"fake_yes = 0.2667 * N_fake # Number of fake article that have exclamation mark usage\n",
"fake_no = (1-0.2667) * N_fake # Number of fake article that not have exclamations markts uaage\n",
"N_real = (article_sim['type']=='real').sum() # The number of artices that are real\n",
"real_yes = 0.0222 * N_real # Number of real articles that have exclamation mark usage\n",
"real_no = (1 - 0.0222) * N_real # Number of real articles that not have exclamation mark usage\n",
"article_sim.loc[article_sim['type']=='fake', 'usage'] = random.choices(['no', 'yes'], weights=[fake_no, fake_yes], k=(article_sim['type']=='fake').sum())\n",
"article_sim.loc[article_sim['type']=='real', 'usage'] = random.choices(['no', 'yes'], weights=[real_no, real_yes], k=(article_sim['type']=='real').sum())"
],
"metadata": {
"id": "w3Hjyq7TA65m"
},
"execution_count": 18,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pd.crosstab(article_sim['usage'], article_sim['type'], margins=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 175
},
"id": "O4csnKnyySAh",
"outputId": "f6a62234-8a8a-452b-fd09-b983b8b55b3b"
},
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"type fake real All\n",
"usage \n",
"no 2874 5929 8803\n",
"yes 1064 133 1197\n",
"All 3938 6062 10000"
],
"text/html": [
"\n",
" <div id=\"df-b5190e4e-1aeb-4ca0-a42d-9937cad498fa\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>type</th>\n",
" <th>fake</th>\n",
" <th>real</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>usage</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>no</th>\n",
" <td>2874</td>\n",
" <td>5929</td>\n",
" <td>8803</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yes</th>\n",
" <td>1064</td>\n",
" <td>133</td>\n",
" <td>1197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>3938</td>\n",
" <td>6062</td>\n",
" <td>10000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b5190e4e-1aeb-4ca0-a42d-9937cad498fa')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-b5190e4e-1aeb-4ca0-a42d-9937cad498fa button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-b5190e4e-1aeb-4ca0-a42d-9937cad498fa');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"source": [
"fix, ax = plt.subplots(1, 2, figsize=(8, 4.5))\n",
"pd.crosstab(article_sim['type'], article_sim['usage']).plot(kind='bar', stacked=True, rot=0, ax=ax[0]);\n",
"sns.countplot(x='usage', data=article_sim, ax=ax[1]);\n",
"# article_sim['usage'].value_counts().plot.bar(rot=0, ax=ax[1]);\n",
"plt.suptitle('FIGURE 2.3: Bar plots of exclamation point usage, both within fake vs real news and overall.');\n",
"plt.subplots_adjust(top=0.15)\n",
"plt.tight_layout();"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 324
},
"id": "YaXrLDKByeaR",
"outputId": "3e9ea604-3920-429e-ab2c-0c2eb58ed5c4"
},
"execution_count": 20,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 576x324 with 2 Axes>"
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"source": [
"# ax = article_sim['usage'].value_counts().plot.bar(rot=0);\n",
"# ax.set_xlabel('usage');"
],
"metadata": {
"id": "HyZ7H4-kz10L"
},
"execution_count": 21,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# sns.catplot(x='usage', kind='count', data=article_sim);"
],
"metadata": {
"id": "sFn6iRUS1mBS"
},
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"source": [
"sns.catplot(x='usage', hue='type', data=article_sim, kind='count');\n",
"plt.title('FIGURE 2.4: Bar plots of real vs fake news, broken down by exclamation point usage.');"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 385
},
"id": "dJX10TgL1YSI",
"outputId": "77ea1b7c-ab2c-4ce4-c7a8-c46cfa5c9db7"
},
"execution_count": 23,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 417.25x360 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "PYNFr8UN20-N"
},
"execution_count": 23,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment