PiotrZakrzewski/IMDB-5000-ETL.ipynb

## IMDB-5000-ETL.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LICENSE                \u001b[1m\u001b[36mmacrobase.egg-info\u001b[m\u001b[m/    setup.py\r\n",
      "README.md              \u001b[1m\u001b[36mmb_lite\u001b[m\u001b[m/               test.csv\r\n",
      "Showcase.ipynb         movies.csv             tmdb_5000_credits.csv\r\n",
      "\u001b[1m\u001b[36mbuild\u001b[m\u001b[m/                 movies_etl.ipynb       tmdb_5000_movies.csv\r\n",
      "\u001b[1m\u001b[36mdist\u001b[m\u001b[m/                  requirements-dev.txt\r\n",
      "\u001b[1m\u001b[36menv\u001b[m\u001b[m/                   requirements.txt\r\n"
     ]
    }
   ],
   "source": [
    "ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"tmdb_5000_movies.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',\n",
       "       'original_title', 'overview', 'popularity', 'production_companies',\n",
       "       'production_countries', 'release_date', 'revenue', 'runtime',\n",
       "       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',\n",
       "       'vote_count'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "875.581305"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max(df[\"popularity\"]) - min(df[\"popularity\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       150.437577\n",
       "1       139.082615\n",
       "2       107.376788\n",
       "3       112.312950\n",
       "4        43.926995\n",
       "           ...    \n",
       "4798     14.269792\n",
       "4799      0.642552\n",
       "4800      1.444476\n",
       "4801      0.857008\n",
       "4802      1.929883\n",
       "Name: popularity, Length: 4803, dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"popularity\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_reduced = df[[\"title\" ,\"spoken_languages\" ,\"production_companies\", \"production_countries\", \"genres\", \"keywords\", \"original_language\", \"popularity\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       United States of America\n",
       "1       United States of America\n",
       "2       United States of America\n",
       "3       United States of America\n",
       "4       United States of America\n",
       "                  ...           \n",
       "4798    United States of America\n",
       "4799                         N/A\n",
       "4800    United States of America\n",
       "4801    United States of America\n",
       "4802    United States of America\n",
       "Name: production_countries, Length: 4803, dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "from collections import Counter\n",
    "def de_json_column(dataframe, column_name):\n",
    "    all_jsons = dataframe[column_name]\n",
    "    all_names = []\n",
    "    for jsn in all_jsons:\n",
    "        jsn = json.loads(jsn)\n",
    "        for el in jsn:\n",
    "            all_names.append(el[\"name\"])\n",
    "    names_count = Counter(all_names)\n",
    "    def dejson(x):\n",
    "        jsn = json.loads(x)\n",
    "        highest = 'N/A'\n",
    "        c = 0 \n",
    "        for el in jsn:\n",
    "            name = el[\"name\"]\n",
    "            current_c = names_count[name]\n",
    "            if current_c > c:\n",
    "                highest = name\n",
    "                c = current_c\n",
    "        return highest\n",
    "    return dataframe[column_name].map(dejson)\n",
    "de_json_column(df, 'production_countries')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>popularity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Avatar</td>\n",
       "      <td>English</td>\n",
       "      <td>Twentieth Century Fox Film Corporation</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Action</td>\n",
       "      <td>3d</td>\n",
       "      <td>en</td>\n",
       "      <td>150.437577</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>English</td>\n",
       "      <td>Walt Disney Pictures</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Action</td>\n",
       "      <td>aftercreditsstinger</td>\n",
       "      <td>en</td>\n",
       "      <td>139.082615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spectre</td>\n",
       "      <td>English</td>\n",
       "      <td>Columbia Pictures</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Action</td>\n",
       "      <td>based on novel</td>\n",
       "      <td>en</td>\n",
       "      <td>107.376788</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>English</td>\n",
       "      <td>Warner Bros.</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Drama</td>\n",
       "      <td>superhero</td>\n",
       "      <td>en</td>\n",
       "      <td>112.312950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>John Carter</td>\n",
       "      <td>English</td>\n",
       "      <td>Walt Disney Pictures</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Action</td>\n",
       "      <td>based on novel</td>\n",
       "      <td>en</td>\n",
       "      <td>43.926995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4798</th>\n",
       "      <td>El Mariachi</td>\n",
       "      <td>Español</td>\n",
       "      <td>Columbia Pictures</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Thriller</td>\n",
       "      <td>united states–mexico barrier</td>\n",
       "      <td>es</td>\n",
       "      <td>14.269792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4799</th>\n",
       "      <td>Newlyweds</td>\n",
       "      <td>N/A</td>\n",
       "      <td>N/A</td>\n",
       "      <td>N/A</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>N/A</td>\n",
       "      <td>en</td>\n",
       "      <td>0.642552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4800</th>\n",
       "      <td>Signed, Sealed, Delivered</td>\n",
       "      <td>English</td>\n",
       "      <td>Muse Entertainment Enterprises</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Drama</td>\n",
       "      <td>investigation</td>\n",
       "      <td>en</td>\n",
       "      <td>1.444476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4801</th>\n",
       "      <td>Shanghai Calling</td>\n",
       "      <td>English</td>\n",
       "      <td>N/A</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>N/A</td>\n",
       "      <td>N/A</td>\n",
       "      <td>en</td>\n",
       "      <td>0.857008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4802</th>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>English</td>\n",
       "      <td>rusty bear entertainment</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>obsession</td>\n",
       "      <td>en</td>\n",
       "      <td>1.929883</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4803 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         title spoken_languages  \\\n",
       "0                                       Avatar          English   \n",
       "1     Pirates of the Caribbean: At World's End          English   \n",
       "2                                      Spectre          English   \n",
       "3                        The Dark Knight Rises          English   \n",
       "4                                  John Carter          English   \n",
       "...                                        ...              ...   \n",
       "4798                               El Mariachi          Español   \n",
       "4799                                 Newlyweds              N/A   \n",
       "4800                 Signed, Sealed, Delivered          English   \n",
       "4801                          Shanghai Calling          English   \n",
       "4802                         My Date with Drew          English   \n",
       "\n",
       "                        production_companies      production_countries  \\\n",
       "0     Twentieth Century Fox Film Corporation  United States of America   \n",
       "1                       Walt Disney Pictures  United States of America   \n",
       "2                          Columbia Pictures  United States of America   \n",
       "3                               Warner Bros.  United States of America   \n",
       "4                       Walt Disney Pictures  United States of America   \n",
       "...                                      ...                       ...   \n",
       "4798                       Columbia Pictures  United States of America   \n",
       "4799                                     N/A                       N/A   \n",
       "4800          Muse Entertainment Enterprises  United States of America   \n",
       "4801                                     N/A  United States of America   \n",
       "4802                rusty bear entertainment  United States of America   \n",
       "\n",
       "           genres                      keywords original_language  popularity  \n",
       "0          Action                            3d                en  150.437577  \n",
       "1          Action           aftercreditsstinger                en  139.082615  \n",
       "2          Action                based on novel                en  107.376788  \n",
       "3           Drama                     superhero                en  112.312950  \n",
       "4          Action                based on novel                en   43.926995  \n",
       "...           ...                           ...               ...         ...  \n",
       "4798     Thriller  united states–mexico barrier                es   14.269792  \n",
       "4799       Comedy                           N/A                en    0.642552  \n",
       "4800        Drama                 investigation                en    1.444476  \n",
       "4801          N/A                           N/A                en    0.857008  \n",
       "4802  Documentary                     obsession                en    1.929883  \n",
       "\n",
       "[4803 rows x 8 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.options.mode.chained_assignment = None\n",
    "df_reduced[\"spoken_languages\"] = de_json_column(df_reduced, \"spoken_languages\")\n",
    "df_reduced[\"production_countries\"] = de_json_column(df_reduced, \"production_countries\")\n",
    "df_reduced[\"production_companies\"] = de_json_column(df_reduced, \"production_companies\")\n",
    "df_reduced[\"keywords\"] = de_json_column(df_reduced, \"keywords\" )\n",
    "df_reduced[\"genres\"] = de_json_column(df_reduced, \"genres\" )\n",
    "df_reduced"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_reduced.to_csv(\"movies.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"LICENSE \u001b[1m\u001b[36mmacrobase.egg-info\u001b[m\u001b[m/ setup.py\r\n",
	"README.md \u001b[1m\u001b[36mmb_lite\u001b[m\u001b[m/ test.csv\r\n",
	"Showcase.ipynb movies.csv tmdb_5000_credits.csv\r\n",
	"\u001b[1m\u001b[36mbuild\u001b[m\u001b[m/ movies_etl.ipynb tmdb_5000_movies.csv\r\n",
	"\u001b[1m\u001b[36mdist\u001b[m\u001b[m/ requirements-dev.txt\r\n",
	"\u001b[1m\u001b[36menv\u001b[m\u001b[m/ requirements.txt\r\n"
	]
	}
	],
	"source": [
	"ls"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.read_csv(\"tmdb_5000_movies.csv\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',\n",
	" 'original_title', 'overview', 'popularity', 'production_companies',\n",
	" 'production_countries', 'release_date', 'revenue', 'runtime',\n",
	" 'spoken_languages', 'status', 'tagline', 'title', 'vote_average',\n",
	" 'vote_count'],\n",
	" dtype='object')"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"875.581305"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"max(df[\"popularity\"]) - min(df[\"popularity\"])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0 150.437577\n",
	"1 139.082615\n",
	"2 107.376788\n",
	"3 112.312950\n",
	"4 43.926995\n",
	" ... \n",
	"4798 14.269792\n",
	"4799 0.642552\n",
	"4800 1.444476\n",
	"4801 0.857008\n",
	"4802 1.929883\n",
	"Name: popularity, Length: 4803, dtype: float64"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df[\"popularity\"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_reduced = df[[\"title\" ,\"spoken_languages\" ,\"production_companies\", \"production_countries\", \"genres\", \"keywords\", \"original_language\", \"popularity\"]]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0 United States of America\n",
	"1 United States of America\n",
	"2 United States of America\n",
	"3 United States of America\n",
	"4 United States of America\n",
	" ... \n",
	"4798 United States of America\n",
	"4799 N/A\n",
	"4800 United States of America\n",
	"4801 United States of America\n",
	"4802 United States of America\n",
	"Name: production_countries, Length: 4803, dtype: object"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import json\n",
	"from collections import Counter\n",
	"def de_json_column(dataframe, column_name):\n",
	" all_jsons = dataframe[column_name]\n",
	" all_names = []\n",
	" for jsn in all_jsons:\n",
	" jsn = json.loads(jsn)\n",
	" for el in jsn:\n",
	" all_names.append(el[\"name\"])\n",
	" names_count = Counter(all_names)\n",
	" def dejson(x):\n",
	" jsn = json.loads(x)\n",
	" highest = 'N/A'\n",
	" c = 0 \n",
	" for el in jsn:\n",
	" name = el[\"name\"]\n",
	" current_c = names_count[name]\n",
	" if current_c > c:\n",
	" highest = name\n",
	" c = current_c\n",
	" return highest\n",
	" return dataframe[column_name].map(dejson)\n",
	"de_json_column(df, 'production_countries')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>title</th>\n",
	" <th>spoken_languages</th>\n",
	" <th>production_companies</th>\n",
	" <th>production_countries</th>\n",
	" <th>genres</th>\n",
	" <th>keywords</th>\n",
	" <th>original_language</th>\n",
	" <th>popularity</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>Avatar</td>\n",
	" <td>English</td>\n",
	" <td>Twentieth Century Fox Film Corporation</td>\n",
	" <td>United States of America</td>\n",
	" <td>Action</td>\n",
	" <td>3d</td>\n",
	" <td>en</td>\n",
	" <td>150.437577</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>Pirates of the Caribbean: At World's End</td>\n",
	" <td>English</td>\n",
	" <td>Walt Disney Pictures</td>\n",
	" <td>United States of America</td>\n",
	" <td>Action</td>\n",
	" <td>aftercreditsstinger</td>\n",
	" <td>en</td>\n",
	" <td>139.082615</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>Spectre</td>\n",
	" <td>English</td>\n",
	" <td>Columbia Pictures</td>\n",
	" <td>United States of America</td>\n",
	" <td>Action</td>\n",
	" <td>based on novel</td>\n",
	" <td>en</td>\n",
	" <td>107.376788</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>The Dark Knight Rises</td>\n",
	" <td>English</td>\n",
	" <td>Warner Bros.</td>\n",
	" <td>United States of America</td>\n",
	" <td>Drama</td>\n",
	" <td>superhero</td>\n",
	" <td>en</td>\n",
	" <td>112.312950</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>John Carter</td>\n",
	" <td>English</td>\n",
	" <td>Walt Disney Pictures</td>\n",
	" <td>United States of America</td>\n",
	" <td>Action</td>\n",
	" <td>based on novel</td>\n",
	" <td>en</td>\n",
	" <td>43.926995</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>...</th>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4798</th>\n",
	" <td>El Mariachi</td>\n",
	" <td>Español</td>\n",
	" <td>Columbia Pictures</td>\n",
	" <td>United States of America</td>\n",
	" <td>Thriller</td>\n",
	" <td>united states–mexico barrier</td>\n",
	" <td>es</td>\n",
	" <td>14.269792</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4799</th>\n",
	" <td>Newlyweds</td>\n",
	" <td>N/A</td>\n",
	" <td>N/A</td>\n",
	" <td>N/A</td>\n",
	" <td>Comedy</td>\n",
	" <td>N/A</td>\n",
	" <td>en</td>\n",
	" <td>0.642552</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4800</th>\n",
	" <td>Signed, Sealed, Delivered</td>\n",
	" <td>English</td>\n",
	" <td>Muse Entertainment Enterprises</td>\n",
	" <td>United States of America</td>\n",
	" <td>Drama</td>\n",
	" <td>investigation</td>\n",
	" <td>en</td>\n",
	" <td>1.444476</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4801</th>\n",
	" <td>Shanghai Calling</td>\n",
	" <td>English</td>\n",
	" <td>N/A</td>\n",
	" <td>United States of America</td>\n",
	" <td>N/A</td>\n",
	" <td>N/A</td>\n",
	" <td>en</td>\n",
	" <td>0.857008</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4802</th>\n",
	" <td>My Date with Drew</td>\n",
	" <td>English</td>\n",
	" <td>rusty bear entertainment</td>\n",
	" <td>United States of America</td>\n",
	" <td>Documentary</td>\n",
	" <td>obsession</td>\n",
	" <td>en</td>\n",
	" <td>1.929883</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"<p>4803 rows × 8 columns</p>\n",
	"</div>"
	],
	"text/plain": [
	" title spoken_languages \\\n",
	"0 Avatar English \n",
	"1 Pirates of the Caribbean: At World's End English \n",
	"2 Spectre English \n",
	"3 The Dark Knight Rises English \n",
	"4 John Carter English \n",
	"... ... ... \n",
	"4798 El Mariachi Español \n",
	"4799 Newlyweds N/A \n",
	"4800 Signed, Sealed, Delivered English \n",
	"4801 Shanghai Calling English \n",
	"4802 My Date with Drew English \n",
	"\n",
	" production_companies production_countries \\\n",
	"0 Twentieth Century Fox Film Corporation United States of America \n",
	"1 Walt Disney Pictures United States of America \n",
	"2 Columbia Pictures United States of America \n",
	"3 Warner Bros. United States of America \n",
	"4 Walt Disney Pictures United States of America \n",
	"... ... ... \n",
	"4798 Columbia Pictures United States of America \n",
	"4799 N/A N/A \n",
	"4800 Muse Entertainment Enterprises United States of America \n",
	"4801 N/A United States of America \n",
	"4802 rusty bear entertainment United States of America \n",
	"\n",
	" genres keywords original_language popularity \n",
	"0 Action 3d en 150.437577 \n",
	"1 Action aftercreditsstinger en 139.082615 \n",
	"2 Action based on novel en 107.376788 \n",
	"3 Drama superhero en 112.312950 \n",
	"4 Action based on novel en 43.926995 \n",
	"... ... ... ... ... \n",
	"4798 Thriller united states–mexico barrier es 14.269792 \n",
	"4799 Comedy N/A en 0.642552 \n",
	"4800 Drama investigation en 1.444476 \n",
	"4801 N/A N/A en 0.857008 \n",
	"4802 Documentary obsession en 1.929883 \n",
	"\n",
	"[4803 rows x 8 columns]"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"pd.options.mode.chained_assignment = None\n",
	"df_reduced[\"spoken_languages\"] = de_json_column(df_reduced, \"spoken_languages\")\n",
	"df_reduced[\"production_countries\"] = de_json_column(df_reduced, \"production_countries\")\n",
	"df_reduced[\"production_companies\"] = de_json_column(df_reduced, \"production_companies\")\n",
	"df_reduced[\"keywords\"] = de_json_column(df_reduced, \"keywords\" )\n",
	"df_reduced[\"genres\"] = de_json_column(df_reduced, \"genres\" )\n",
	"df_reduced"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_reduced.to_csv(\"movies.csv\", index=False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}