Skip to content

Instantly share code, notes, and snippets.

@shindishella
Created February 27, 2020 02:58
Show Gist options
  • Save shindishella/862636402282b2742c811367730851e2 to your computer and use it in GitHub Desktop.
Save shindishella/862636402282b2742c811367730851e2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import pandas and numpy\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class \n",
"0 7.9 Good \n",
"1 7.1 Good \n",
"2 6.8 Good \n",
"3 8.5 Excellent \n",
"4 6.6 Good "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#import dataset\n",
"movie = pd.read_csv(\"movie_metadata.csv\")\n",
"movie.head() #head of data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>156.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>258000000.0</td>\n",
" <td>336530303.0</td>\n",
" <td>383056</td>\n",
" <td>1902.0</td>\n",
" <td>392.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>24000.0</td>\n",
" <td>11000.0</td>\n",
" <td>4000.0</td>\n",
" <td>46055</td>\n",
" <td>6.2</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>100.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>260000000.0</td>\n",
" <td>200807262.0</td>\n",
" <td>294810</td>\n",
" <td>387.0</td>\n",
" <td>324.0</td>\n",
" <td>29000</td>\n",
" <td>15.0</td>\n",
" <td>799.0</td>\n",
" <td>553.0</td>\n",
" <td>284.0</td>\n",
" <td>2036</td>\n",
" <td>7.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>141.0</td>\n",
" <td>4.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>458991599.0</td>\n",
" <td>462669</td>\n",
" <td>1117.0</td>\n",
" <td>635.0</td>\n",
" <td>118000</td>\n",
" <td>0.0</td>\n",
" <td>26000.0</td>\n",
" <td>21000.0</td>\n",
" <td>19000.0</td>\n",
" <td>92000</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>153.0</td>\n",
" <td>3.0</td>\n",
" <td>UK</td>\n",
" <td>250000000.0</td>\n",
" <td>301956980.0</td>\n",
" <td>321795</td>\n",
" <td>973.0</td>\n",
" <td>375.0</td>\n",
" <td>10000</td>\n",
" <td>282.0</td>\n",
" <td>25000.0</td>\n",
" <td>11000.0</td>\n",
" <td>10000.0</td>\n",
" <td>58753</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>183.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>330249062.0</td>\n",
" <td>371639</td>\n",
" <td>3018.0</td>\n",
" <td>673.0</td>\n",
" <td>197000</td>\n",
" <td>0.0</td>\n",
" <td>15000.0</td>\n",
" <td>4000.0</td>\n",
" <td>2000.0</td>\n",
" <td>24450</td>\n",
" <td>6.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>11</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>209000000.0</td>\n",
" <td>200069408.0</td>\n",
" <td>240396</td>\n",
" <td>2367.0</td>\n",
" <td>434.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>18000.0</td>\n",
" <td>10000.0</td>\n",
" <td>903.0</td>\n",
" <td>29991</td>\n",
" <td>6.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>12</td>\n",
" <td>106.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>200000000.0</td>\n",
" <td>168368427.0</td>\n",
" <td>330784</td>\n",
" <td>1243.0</td>\n",
" <td>403.0</td>\n",
" <td>0</td>\n",
" <td>395.0</td>\n",
" <td>451.0</td>\n",
" <td>412.0</td>\n",
" <td>393.0</td>\n",
" <td>2023</td>\n",
" <td>6.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>13</td>\n",
" <td>151.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>423032628.0</td>\n",
" <td>522040</td>\n",
" <td>1832.0</td>\n",
" <td>313.0</td>\n",
" <td>5000</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48486</td>\n",
" <td>7.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>14</td>\n",
" <td>150.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>215000000.0</td>\n",
" <td>89289910.0</td>\n",
" <td>181792</td>\n",
" <td>711.0</td>\n",
" <td>450.0</td>\n",
" <td>48000</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>2000.0</td>\n",
" <td>1000.0</td>\n",
" <td>45757</td>\n",
" <td>6.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>15</td>\n",
" <td>143.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>291021565.0</td>\n",
" <td>548573</td>\n",
" <td>2536.0</td>\n",
" <td>733.0</td>\n",
" <td>118000</td>\n",
" <td>0.0</td>\n",
" <td>15000.0</td>\n",
" <td>3000.0</td>\n",
" <td>748.0</td>\n",
" <td>20495</td>\n",
" <td>7.2</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>16</td>\n",
" <td>150.0</td>\n",
" <td>4.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>141614023.0</td>\n",
" <td>149922</td>\n",
" <td>438.0</td>\n",
" <td>258.0</td>\n",
" <td>0</td>\n",
" <td>80.0</td>\n",
" <td>22000.0</td>\n",
" <td>216.0</td>\n",
" <td>201.0</td>\n",
" <td>22697</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>17</td>\n",
" <td>173.0</td>\n",
" <td>3.0</td>\n",
" <td>USA</td>\n",
" <td>220000000.0</td>\n",
" <td>623279547.0</td>\n",
" <td>995415</td>\n",
" <td>1722.0</td>\n",
" <td>703.0</td>\n",
" <td>123000</td>\n",
" <td>0.0</td>\n",
" <td>26000.0</td>\n",
" <td>21000.0</td>\n",
" <td>19000.0</td>\n",
" <td>87697</td>\n",
" <td>8.1</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>18</td>\n",
" <td>136.0</td>\n",
" <td>4.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>241063875.0</td>\n",
" <td>370704</td>\n",
" <td>484.0</td>\n",
" <td>448.0</td>\n",
" <td>58000</td>\n",
" <td>252.0</td>\n",
" <td>40000.0</td>\n",
" <td>11000.0</td>\n",
" <td>1000.0</td>\n",
" <td>54083</td>\n",
" <td>6.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>19</td>\n",
" <td>106.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>179020854.0</td>\n",
" <td>268154</td>\n",
" <td>341.0</td>\n",
" <td>451.0</td>\n",
" <td>40000</td>\n",
" <td>188.0</td>\n",
" <td>10000.0</td>\n",
" <td>816.0</td>\n",
" <td>718.0</td>\n",
" <td>12572</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>20</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>New Zealand</td>\n",
" <td>250000000.0</td>\n",
" <td>255108370.0</td>\n",
" <td>354228</td>\n",
" <td>802.0</td>\n",
" <td>422.0</td>\n",
" <td>65000</td>\n",
" <td>0.0</td>\n",
" <td>5000.0</td>\n",
" <td>972.0</td>\n",
" <td>773.0</td>\n",
" <td>9152</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"5 6 156.0 0.0 USA 258000000.0 \n",
"6 7 100.0 1.0 USA 260000000.0 \n",
"7 8 141.0 4.0 USA 250000000.0 \n",
"8 9 153.0 3.0 UK 250000000.0 \n",
"9 10 183.0 0.0 USA 250000000.0 \n",
"10 11 169.0 0.0 USA 209000000.0 \n",
"11 12 106.0 1.0 UK 200000000.0 \n",
"12 13 151.0 2.0 USA 225000000.0 \n",
"13 14 150.0 1.0 USA 215000000.0 \n",
"14 15 143.0 0.0 USA 225000000.0 \n",
"15 16 150.0 4.0 USA 225000000.0 \n",
"16 17 173.0 3.0 USA 220000000.0 \n",
"17 18 136.0 4.0 USA 250000000.0 \n",
"18 19 106.0 1.0 USA 225000000.0 \n",
"19 20 164.0 0.0 New Zealand 250000000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews \\\n",
"0 760505847.0 886204 3054.0 \n",
"1 309404152.0 471220 1238.0 \n",
"2 200074175.0 275868 994.0 \n",
"3 448130642.0 1144337 2701.0 \n",
"4 73058679.0 212204 738.0 \n",
"5 336530303.0 383056 1902.0 \n",
"6 200807262.0 294810 387.0 \n",
"7 458991599.0 462669 1117.0 \n",
"8 301956980.0 321795 973.0 \n",
"9 330249062.0 371639 3018.0 \n",
"10 200069408.0 240396 2367.0 \n",
"11 168368427.0 330784 1243.0 \n",
"12 423032628.0 522040 1832.0 \n",
"13 89289910.0 181792 711.0 \n",
"14 291021565.0 548573 2536.0 \n",
"15 141614023.0 149922 438.0 \n",
"16 623279547.0 995415 1722.0 \n",
"17 241063875.0 370704 484.0 \n",
"18 179020854.0 268154 341.0 \n",
"19 255108370.0 354228 802.0 \n",
"\n",
" num_critic_for_reviews movie_facebook_likes director_facebook_likes \\\n",
"0 723.0 33000 0.0 \n",
"1 302.0 0 563.0 \n",
"2 602.0 85000 0.0 \n",
"3 813.0 164000 22000.0 \n",
"4 462.0 24000 475.0 \n",
"5 392.0 0 0.0 \n",
"6 324.0 29000 15.0 \n",
"7 635.0 118000 0.0 \n",
"8 375.0 10000 282.0 \n",
"9 673.0 197000 0.0 \n",
"10 434.0 0 0.0 \n",
"11 403.0 0 395.0 \n",
"12 313.0 5000 563.0 \n",
"13 450.0 48000 563.0 \n",
"14 733.0 118000 0.0 \n",
"15 258.0 0 80.0 \n",
"16 703.0 123000 0.0 \n",
"17 448.0 58000 252.0 \n",
"18 451.0 40000 188.0 \n",
"19 422.0 65000 0.0 \n",
"\n",
" actor_1_facebook_likes actor_2_facebook_likes actor_3_facebook_likes \\\n",
"0 1000.0 936.0 855.0 \n",
"1 40000.0 5000.0 1000.0 \n",
"2 11000.0 393.0 161.0 \n",
"3 27000.0 23000.0 23000.0 \n",
"4 640.0 632.0 530.0 \n",
"5 24000.0 11000.0 4000.0 \n",
"6 799.0 553.0 284.0 \n",
"7 26000.0 21000.0 19000.0 \n",
"8 25000.0 11000.0 10000.0 \n",
"9 15000.0 4000.0 2000.0 \n",
"10 18000.0 10000.0 903.0 \n",
"11 451.0 412.0 393.0 \n",
"12 40000.0 5000.0 1000.0 \n",
"13 40000.0 2000.0 1000.0 \n",
"14 15000.0 3000.0 748.0 \n",
"15 22000.0 216.0 201.0 \n",
"16 26000.0 21000.0 19000.0 \n",
"17 40000.0 11000.0 1000.0 \n",
"18 10000.0 816.0 718.0 \n",
"19 5000.0 972.0 773.0 \n",
"\n",
" cast_total_facebook_likes imdb_score imdb_score_class \n",
"0 4834 7.9 Good \n",
"1 48350 7.1 Good \n",
"2 11700 6.8 Good \n",
"3 106759 8.5 Excellent \n",
"4 1873 6.6 Good \n",
"5 46055 6.2 Good \n",
"6 2036 7.8 Good \n",
"7 92000 7.5 Good \n",
"8 58753 7.5 Good \n",
"9 24450 6.9 Good \n",
"10 29991 6.1 Good \n",
"11 2023 6.7 Good \n",
"12 48486 7.3 Good \n",
"13 45757 6.5 Good \n",
"14 20495 7.2 Good \n",
"15 22697 6.6 Good \n",
"16 87697 8.1 Excellent \n",
"17 54083 6.7 Good \n",
"18 12572 6.8 Good \n",
"19 9152 7.5 Good "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.head(n=20)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5037</th>\n",
" <td>5038</td>\n",
" <td>87.0</td>\n",
" <td>2.0</td>\n",
" <td>Canada</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>629</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>84</td>\n",
" <td>2.0</td>\n",
" <td>637.0</td>\n",
" <td>470.0</td>\n",
" <td>318.0</td>\n",
" <td>2283</td>\n",
" <td>7.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>5039</td>\n",
" <td>43.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>73839</td>\n",
" <td>359.0</td>\n",
" <td>43.0</td>\n",
" <td>32000</td>\n",
" <td>NaN</td>\n",
" <td>841.0</td>\n",
" <td>593.0</td>\n",
" <td>319.0</td>\n",
" <td>1753</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>5040</td>\n",
" <td>76.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1400.0</td>\n",
" <td>NaN</td>\n",
" <td>38</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>16</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>5041</td>\n",
" <td>100.0</td>\n",
" <td>5.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>10443.0</td>\n",
" <td>1255</td>\n",
" <td>9.0</td>\n",
" <td>14.0</td>\n",
" <td>660</td>\n",
" <td>0.0</td>\n",
" <td>946.0</td>\n",
" <td>719.0</td>\n",
" <td>489.0</td>\n",
" <td>2386</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>5042</td>\n",
" <td>90.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1100.0</td>\n",
" <td>85222.0</td>\n",
" <td>4285</td>\n",
" <td>84.0</td>\n",
" <td>43.0</td>\n",
" <td>456</td>\n",
" <td>16.0</td>\n",
" <td>86.0</td>\n",
" <td>23.0</td>\n",
" <td>16.0</td>\n",
" <td>163</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget gross \\\n",
"5037 5038 87.0 2.0 Canada NaN NaN \n",
"5038 5039 43.0 1.0 USA NaN NaN \n",
"5039 5040 76.0 0.0 USA 1400.0 NaN \n",
"5040 5041 100.0 5.0 USA NaN 10443.0 \n",
"5041 5042 90.0 0.0 USA 1100.0 85222.0 \n",
"\n",
" num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"5037 629 6.0 1.0 \n",
"5038 73839 359.0 43.0 \n",
"5039 38 3.0 13.0 \n",
"5040 1255 9.0 14.0 \n",
"5041 4285 84.0 43.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"5037 84 2.0 637.0 \n",
"5038 32000 NaN 841.0 \n",
"5039 16 0.0 0.0 \n",
"5040 660 0.0 946.0 \n",
"5041 456 16.0 86.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes \\\n",
"5037 470.0 318.0 \n",
"5038 593.0 319.0 \n",
"5039 0.0 0.0 \n",
"5040 719.0 489.0 \n",
"5041 23.0 16.0 \n",
"\n",
" cast_total_facebook_likes imdb_score imdb_score_class \n",
"5037 2283 7.7 Good \n",
"5038 1753 7.5 Good \n",
"5039 0 6.3 Good \n",
"5040 2386 6.3 Good \n",
"5041 163 6.6 Good "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.tail() #tail of data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5022</th>\n",
" <td>5023</td>\n",
" <td>88.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>15000.0</td>\n",
" <td>76382.0</td>\n",
" <td>1194</td>\n",
" <td>8.0</td>\n",
" <td>22.0</td>\n",
" <td>324</td>\n",
" <td>38.0</td>\n",
" <td>331.0</td>\n",
" <td>212.0</td>\n",
" <td>211.0</td>\n",
" <td>1546</td>\n",
" <td>6.2</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5023</th>\n",
" <td>5024</td>\n",
" <td>78.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>20000.0</td>\n",
" <td>NaN</td>\n",
" <td>1771</td>\n",
" <td>35.0</td>\n",
" <td>42.0</td>\n",
" <td>835</td>\n",
" <td>91.0</td>\n",
" <td>407.0</td>\n",
" <td>91.0</td>\n",
" <td>86.0</td>\n",
" <td>674</td>\n",
" <td>4.0</td>\n",
" <td>Bad</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5024</th>\n",
" <td>5025</td>\n",
" <td>108.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>10000.0</td>\n",
" <td>180483.0</td>\n",
" <td>16792</td>\n",
" <td>183.0</td>\n",
" <td>73.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>462.0</td>\n",
" <td>143.0</td>\n",
" <td>105.0</td>\n",
" <td>760</td>\n",
" <td>6.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5025</th>\n",
" <td>5026</td>\n",
" <td>110.0</td>\n",
" <td>1.0</td>\n",
" <td>France</td>\n",
" <td>4500.0</td>\n",
" <td>136007.0</td>\n",
" <td>3924</td>\n",
" <td>39.0</td>\n",
" <td>81.0</td>\n",
" <td>171</td>\n",
" <td>107.0</td>\n",
" <td>576.0</td>\n",
" <td>133.0</td>\n",
" <td>45.0</td>\n",
" <td>776</td>\n",
" <td>6.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5026</th>\n",
" <td>5027</td>\n",
" <td>90.0</td>\n",
" <td>0.0</td>\n",
" <td>Iran</td>\n",
" <td>10000.0</td>\n",
" <td>673780.0</td>\n",
" <td>4555</td>\n",
" <td>26.0</td>\n",
" <td>64.0</td>\n",
" <td>697</td>\n",
" <td>397.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5027</th>\n",
" <td>5028</td>\n",
" <td>83.0</td>\n",
" <td>0.0</td>\n",
" <td>Ireland</td>\n",
" <td>10000.0</td>\n",
" <td>NaN</td>\n",
" <td>57</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>105</td>\n",
" <td>18.0</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>15</td>\n",
" <td>6.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5028</th>\n",
" <td>5029</td>\n",
" <td>111.0</td>\n",
" <td>0.0</td>\n",
" <td>Japan</td>\n",
" <td>1000000.0</td>\n",
" <td>94596.0</td>\n",
" <td>6318</td>\n",
" <td>50.0</td>\n",
" <td>78.0</td>\n",
" <td>817</td>\n",
" <td>62.0</td>\n",
" <td>89.0</td>\n",
" <td>13.0</td>\n",
" <td>6.0</td>\n",
" <td>115</td>\n",
" <td>7.4</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5029</th>\n",
" <td>5030</td>\n",
" <td>84.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>156</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>22</td>\n",
" <td>5.0</td>\n",
" <td>21.0</td>\n",
" <td>20.0</td>\n",
" <td>12.0</td>\n",
" <td>62</td>\n",
" <td>6.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5030</th>\n",
" <td>5031</td>\n",
" <td>82.0</td>\n",
" <td>NaN</td>\n",
" <td>USA</td>\n",
" <td>200000.0</td>\n",
" <td>NaN</td>\n",
" <td>133</td>\n",
" <td>8.0</td>\n",
" <td>13.0</td>\n",
" <td>424</td>\n",
" <td>120.0</td>\n",
" <td>785.0</td>\n",
" <td>98.0</td>\n",
" <td>84.0</td>\n",
" <td>1111</td>\n",
" <td>5.4</td>\n",
" <td>Fair</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5031</th>\n",
" <td>5032</td>\n",
" <td>98.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>438</td>\n",
" <td>14.0</td>\n",
" <td>10.0</td>\n",
" <td>20</td>\n",
" <td>3.0</td>\n",
" <td>789.0</td>\n",
" <td>194.0</td>\n",
" <td>152.0</td>\n",
" <td>1186</td>\n",
" <td>6.4</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5032</th>\n",
" <td>5033</td>\n",
" <td>77.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>7000.0</td>\n",
" <td>424760.0</td>\n",
" <td>72639</td>\n",
" <td>371.0</td>\n",
" <td>143.0</td>\n",
" <td>19000</td>\n",
" <td>291.0</td>\n",
" <td>291.0</td>\n",
" <td>45.0</td>\n",
" <td>8.0</td>\n",
" <td>368</td>\n",
" <td>7.0</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5033</th>\n",
" <td>5034</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>Philippines</td>\n",
" <td>7000.0</td>\n",
" <td>70071.0</td>\n",
" <td>589</td>\n",
" <td>35.0</td>\n",
" <td>35.0</td>\n",
" <td>74</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5034</th>\n",
" <td>5035</td>\n",
" <td>81.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>7000.0</td>\n",
" <td>2040920.0</td>\n",
" <td>52055</td>\n",
" <td>130.0</td>\n",
" <td>56.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>121.0</td>\n",
" <td>20.0</td>\n",
" <td>6.0</td>\n",
" <td>147</td>\n",
" <td>6.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5035</th>\n",
" <td>5036</td>\n",
" <td>84.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>3250.0</td>\n",
" <td>NaN</td>\n",
" <td>36</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>2.0</td>\n",
" <td>45.0</td>\n",
" <td>44.0</td>\n",
" <td>2.0</td>\n",
" <td>93</td>\n",
" <td>7.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5036</th>\n",
" <td>5037</td>\n",
" <td>95.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>9000.0</td>\n",
" <td>4584.0</td>\n",
" <td>1338</td>\n",
" <td>14.0</td>\n",
" <td>14.0</td>\n",
" <td>413</td>\n",
" <td>0.0</td>\n",
" <td>296.0</td>\n",
" <td>205.0</td>\n",
" <td>133.0</td>\n",
" <td>690</td>\n",
" <td>6.4</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5037</th>\n",
" <td>5038</td>\n",
" <td>87.0</td>\n",
" <td>2.0</td>\n",
" <td>Canada</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>629</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>84</td>\n",
" <td>2.0</td>\n",
" <td>637.0</td>\n",
" <td>470.0</td>\n",
" <td>318.0</td>\n",
" <td>2283</td>\n",
" <td>7.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>5039</td>\n",
" <td>43.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>73839</td>\n",
" <td>359.0</td>\n",
" <td>43.0</td>\n",
" <td>32000</td>\n",
" <td>NaN</td>\n",
" <td>841.0</td>\n",
" <td>593.0</td>\n",
" <td>319.0</td>\n",
" <td>1753</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>5040</td>\n",
" <td>76.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1400.0</td>\n",
" <td>NaN</td>\n",
" <td>38</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>16</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>5041</td>\n",
" <td>100.0</td>\n",
" <td>5.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>10443.0</td>\n",
" <td>1255</td>\n",
" <td>9.0</td>\n",
" <td>14.0</td>\n",
" <td>660</td>\n",
" <td>0.0</td>\n",
" <td>946.0</td>\n",
" <td>719.0</td>\n",
" <td>489.0</td>\n",
" <td>2386</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>5042</td>\n",
" <td>90.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1100.0</td>\n",
" <td>85222.0</td>\n",
" <td>4285</td>\n",
" <td>84.0</td>\n",
" <td>43.0</td>\n",
" <td>456</td>\n",
" <td>16.0</td>\n",
" <td>86.0</td>\n",
" <td>23.0</td>\n",
" <td>16.0</td>\n",
" <td>163</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"5022 5023 88.0 2.0 USA 15000.0 \n",
"5023 5024 78.0 0.0 USA 20000.0 \n",
"5024 5025 108.0 2.0 USA 10000.0 \n",
"5025 5026 110.0 1.0 France 4500.0 \n",
"5026 5027 90.0 0.0 Iran 10000.0 \n",
"5027 5028 83.0 0.0 Ireland 10000.0 \n",
"5028 5029 111.0 0.0 Japan 1000000.0 \n",
"5029 5030 84.0 2.0 USA NaN \n",
"5030 5031 82.0 NaN USA 200000.0 \n",
"5031 5032 98.0 1.0 USA NaN \n",
"5032 5033 77.0 0.0 USA 7000.0 \n",
"5033 5034 80.0 0.0 Philippines 7000.0 \n",
"5034 5035 81.0 0.0 USA 7000.0 \n",
"5035 5036 84.0 0.0 USA 3250.0 \n",
"5036 5037 95.0 1.0 USA 9000.0 \n",
"5037 5038 87.0 2.0 Canada NaN \n",
"5038 5039 43.0 1.0 USA NaN \n",
"5039 5040 76.0 0.0 USA 1400.0 \n",
"5040 5041 100.0 5.0 USA NaN \n",
"5041 5042 90.0 0.0 USA 1100.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews \\\n",
"5022 76382.0 1194 8.0 \n",
"5023 NaN 1771 35.0 \n",
"5024 180483.0 16792 183.0 \n",
"5025 136007.0 3924 39.0 \n",
"5026 673780.0 4555 26.0 \n",
"5027 NaN 57 1.0 \n",
"5028 94596.0 6318 50.0 \n",
"5029 NaN 156 3.0 \n",
"5030 NaN 133 8.0 \n",
"5031 NaN 438 14.0 \n",
"5032 424760.0 72639 371.0 \n",
"5033 70071.0 589 35.0 \n",
"5034 2040920.0 52055 130.0 \n",
"5035 NaN 36 1.0 \n",
"5036 4584.0 1338 14.0 \n",
"5037 NaN 629 6.0 \n",
"5038 NaN 73839 359.0 \n",
"5039 NaN 38 3.0 \n",
"5040 10443.0 1255 9.0 \n",
"5041 85222.0 4285 84.0 \n",
"\n",
" num_critic_for_reviews movie_facebook_likes director_facebook_likes \\\n",
"5022 22.0 324 38.0 \n",
"5023 42.0 835 91.0 \n",
"5024 73.0 0 0.0 \n",
"5025 81.0 171 107.0 \n",
"5026 64.0 697 397.0 \n",
"5027 12.0 105 18.0 \n",
"5028 78.0 817 62.0 \n",
"5029 NaN 22 5.0 \n",
"5030 13.0 424 120.0 \n",
"5031 10.0 20 3.0 \n",
"5032 143.0 19000 291.0 \n",
"5033 35.0 74 0.0 \n",
"5034 56.0 0 0.0 \n",
"5035 NaN 4 2.0 \n",
"5036 14.0 413 0.0 \n",
"5037 1.0 84 2.0 \n",
"5038 43.0 32000 NaN \n",
"5039 13.0 16 0.0 \n",
"5040 14.0 660 0.0 \n",
"5041 43.0 456 16.0 \n",
"\n",
" actor_1_facebook_likes actor_2_facebook_likes actor_3_facebook_likes \\\n",
"5022 331.0 212.0 211.0 \n",
"5023 407.0 91.0 86.0 \n",
"5024 462.0 143.0 105.0 \n",
"5025 576.0 133.0 45.0 \n",
"5026 5.0 0.0 0.0 \n",
"5027 10.0 5.0 0.0 \n",
"5028 89.0 13.0 6.0 \n",
"5029 21.0 20.0 12.0 \n",
"5030 785.0 98.0 84.0 \n",
"5031 789.0 194.0 152.0 \n",
"5032 291.0 45.0 8.0 \n",
"5033 0.0 0.0 0.0 \n",
"5034 121.0 20.0 6.0 \n",
"5035 45.0 44.0 2.0 \n",
"5036 296.0 205.0 133.0 \n",
"5037 637.0 470.0 318.0 \n",
"5038 841.0 593.0 319.0 \n",
"5039 0.0 0.0 0.0 \n",
"5040 946.0 719.0 489.0 \n",
"5041 86.0 23.0 16.0 \n",
"\n",
" cast_total_facebook_likes imdb_score imdb_score_class \n",
"5022 1546 6.2 Good \n",
"5023 674 4.0 Bad \n",
"5024 760 6.1 Good \n",
"5025 776 6.9 Good \n",
"5026 5 7.5 Good \n",
"5027 15 6.7 Good \n",
"5028 115 7.4 Good \n",
"5029 62 6.1 Good \n",
"5030 1111 5.4 Fair \n",
"5031 1186 6.4 Good \n",
"5032 368 7.0 Good \n",
"5033 0 6.3 Good \n",
"5034 147 6.9 Good \n",
"5035 93 7.8 Good \n",
"5036 690 6.4 Good \n",
"5037 2283 7.7 Good \n",
"5038 1753 7.5 Good \n",
"5039 0 6.3 Good \n",
"5040 2386 6.3 Good \n",
"5041 163 6.6 Good "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.tail(n=20)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5042.000000</td>\n",
" <td>5028.000000</td>\n",
" <td>5029.000000</td>\n",
" <td>4.551000e+03</td>\n",
" <td>4.159000e+03</td>\n",
" <td>5.042000e+03</td>\n",
" <td>5022.000000</td>\n",
" <td>4993.000000</td>\n",
" <td>5042.000000</td>\n",
" <td>4938.000000</td>\n",
" <td>5035.000000</td>\n",
" <td>5029.000000</td>\n",
" <td>5020.000000</td>\n",
" <td>5042.000000</td>\n",
" <td>5042.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2521.500000</td>\n",
" <td>107.201074</td>\n",
" <td>1.371446</td>\n",
" <td>3.975262e+07</td>\n",
" <td>4.846841e+07</td>\n",
" <td>8.368475e+04</td>\n",
" <td>272.770808</td>\n",
" <td>140.194272</td>\n",
" <td>7527.457160</td>\n",
" <td>686.621709</td>\n",
" <td>6561.323932</td>\n",
" <td>1652.080533</td>\n",
" <td>645.009761</td>\n",
" <td>9700.959143</td>\n",
" <td>6.442007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1455.644359</td>\n",
" <td>25.197441</td>\n",
" <td>2.013683</td>\n",
" <td>2.061149e+08</td>\n",
" <td>6.845299e+07</td>\n",
" <td>1.384940e+05</td>\n",
" <td>377.982886</td>\n",
" <td>121.601675</td>\n",
" <td>19322.070537</td>\n",
" <td>2813.602405</td>\n",
" <td>15021.977635</td>\n",
" <td>4042.774685</td>\n",
" <td>1665.041728</td>\n",
" <td>18165.101925</td>\n",
" <td>1.125189</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>7.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.180000e+02</td>\n",
" <td>1.620000e+02</td>\n",
" <td>5.000000e+00</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1261.250000</td>\n",
" <td>93.000000</td>\n",
" <td>0.000000</td>\n",
" <td>6.000000e+06</td>\n",
" <td>5.340988e+06</td>\n",
" <td>8.599250e+03</td>\n",
" <td>65.000000</td>\n",
" <td>50.000000</td>\n",
" <td>0.000000</td>\n",
" <td>7.000000</td>\n",
" <td>614.500000</td>\n",
" <td>281.000000</td>\n",
" <td>133.000000</td>\n",
" <td>1411.250000</td>\n",
" <td>5.800000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2521.500000</td>\n",
" <td>103.000000</td>\n",
" <td>1.000000</td>\n",
" <td>2.000000e+07</td>\n",
" <td>2.551750e+07</td>\n",
" <td>3.437100e+04</td>\n",
" <td>156.000000</td>\n",
" <td>110.000000</td>\n",
" <td>166.000000</td>\n",
" <td>49.000000</td>\n",
" <td>988.000000</td>\n",
" <td>595.000000</td>\n",
" <td>371.500000</td>\n",
" <td>3091.000000</td>\n",
" <td>6.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>3781.750000</td>\n",
" <td>118.000000</td>\n",
" <td>2.000000</td>\n",
" <td>4.500000e+07</td>\n",
" <td>6.230944e+07</td>\n",
" <td>9.634700e+04</td>\n",
" <td>326.000000</td>\n",
" <td>195.000000</td>\n",
" <td>3000.000000</td>\n",
" <td>194.750000</td>\n",
" <td>11000.000000</td>\n",
" <td>918.000000</td>\n",
" <td>636.000000</td>\n",
" <td>13758.750000</td>\n",
" <td>7.200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>5042.000000</td>\n",
" <td>511.000000</td>\n",
" <td>43.000000</td>\n",
" <td>1.221550e+10</td>\n",
" <td>7.605058e+08</td>\n",
" <td>1.689764e+06</td>\n",
" <td>5060.000000</td>\n",
" <td>813.000000</td>\n",
" <td>349000.000000</td>\n",
" <td>23000.000000</td>\n",
" <td>640000.000000</td>\n",
" <td>137000.000000</td>\n",
" <td>23000.000000</td>\n",
" <td>656730.000000</td>\n",
" <td>9.500000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster budget \\\n",
"count 5042.000000 5028.000000 5029.000000 4.551000e+03 \n",
"mean 2521.500000 107.201074 1.371446 3.975262e+07 \n",
"std 1455.644359 25.197441 2.013683 2.061149e+08 \n",
"min 1.000000 7.000000 0.000000 2.180000e+02 \n",
"25% 1261.250000 93.000000 0.000000 6.000000e+06 \n",
"50% 2521.500000 103.000000 1.000000 2.000000e+07 \n",
"75% 3781.750000 118.000000 2.000000 4.500000e+07 \n",
"max 5042.000000 511.000000 43.000000 1.221550e+10 \n",
"\n",
" gross num_voted_users num_user_for_reviews \\\n",
"count 4.159000e+03 5.042000e+03 5022.000000 \n",
"mean 4.846841e+07 8.368475e+04 272.770808 \n",
"std 6.845299e+07 1.384940e+05 377.982886 \n",
"min 1.620000e+02 5.000000e+00 1.000000 \n",
"25% 5.340988e+06 8.599250e+03 65.000000 \n",
"50% 2.551750e+07 3.437100e+04 156.000000 \n",
"75% 6.230944e+07 9.634700e+04 326.000000 \n",
"max 7.605058e+08 1.689764e+06 5060.000000 \n",
"\n",
" num_critic_for_reviews movie_facebook_likes director_facebook_likes \\\n",
"count 4993.000000 5042.000000 4938.000000 \n",
"mean 140.194272 7527.457160 686.621709 \n",
"std 121.601675 19322.070537 2813.602405 \n",
"min 1.000000 0.000000 0.000000 \n",
"25% 50.000000 0.000000 7.000000 \n",
"50% 110.000000 166.000000 49.000000 \n",
"75% 195.000000 3000.000000 194.750000 \n",
"max 813.000000 349000.000000 23000.000000 \n",
"\n",
" actor_1_facebook_likes actor_2_facebook_likes actor_3_facebook_likes \\\n",
"count 5035.000000 5029.000000 5020.000000 \n",
"mean 6561.323932 1652.080533 645.009761 \n",
"std 15021.977635 4042.774685 1665.041728 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 614.500000 281.000000 133.000000 \n",
"50% 988.000000 595.000000 371.500000 \n",
"75% 11000.000000 918.000000 636.000000 \n",
"max 640000.000000 137000.000000 23000.000000 \n",
"\n",
" cast_total_facebook_likes imdb_score \n",
"count 5042.000000 5042.000000 \n",
"mean 9700.959143 6.442007 \n",
"std 18165.101925 1.125189 \n",
"min 0.000000 1.600000 \n",
"25% 1411.250000 5.800000 \n",
"50% 3091.000000 6.600000 \n",
"75% 13758.750000 7.200000 \n",
"max 656730.000000 9.500000 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" #descriptive statistics\n",
"movie.describe()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 14\n",
"facenumber_in_poster 13\n",
"country 4\n",
"budget 491\n",
"gross 883\n",
"num_voted_users 0\n",
"num_user_for_reviews 20\n",
"num_critic_for_reviews 49\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 104\n",
"actor_1_facebook_likes 7\n",
"actor_2_facebook_likes 13\n",
"actor_3_facebook_likes 22\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#number of NA for each variable\n",
"np.sum(movie.isnull()) "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 14\n",
"facenumber_in_poster 13\n",
"country 4\n",
"budget 0\n",
"gross 883\n",
"num_voted_users 0\n",
"num_user_for_reviews 20\n",
"num_critic_for_reviews 49\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 104\n",
"actor_1_facebook_likes 7\n",
"actor_2_facebook_likes 13\n",
"actor_3_facebook_likes 22\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#fill NA with a value\n",
"movie['budget']=movie['budget'].fillna(3.975262e+07)\n",
"np.sum(movie.isnull())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 0\n",
"facenumber_in_poster 13\n",
"country 4\n",
"budget 0\n",
"gross 883\n",
"num_voted_users 0\n",
"num_user_for_reviews 20\n",
"num_critic_for_reviews 49\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 0\n",
"actor_1_facebook_likes 7\n",
"actor_2_facebook_likes 13\n",
"actor_3_facebook_likes 22\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Fill missing value with mean\n",
"movie['duration']=movie['duration'].fillna((movie['duration'].mean()))\n",
"\n",
"# Fill missing value with Median\n",
"movie['director_facebook_likes']=movie['director_facebook_likes'].fillna((movie['director_facebook_likes'].median()))\n",
"\n",
"np.sum(movie.isnull())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 0\n",
"facenumber_in_poster 0\n",
"country 4\n",
"budget 0\n",
"gross 0\n",
"num_voted_users 0\n",
"num_user_for_reviews 0\n",
"num_critic_for_reviews 0\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 0\n",
"actor_1_facebook_likes 0\n",
"actor_2_facebook_likes 0\n",
"actor_3_facebook_likes 0\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie['budget']=movie['budget'].fillna((movie['budget'].mean()))\n",
"movie['actor_1_facebook_likes']=movie['actor_1_facebook_likes'].fillna((movie['actor_1_facebook_likes'].mean()))\n",
"movie['actor_2_facebook_likes']=movie['actor_2_facebook_likes'].fillna((movie['actor_2_facebook_likes'].mean()))\n",
"movie['actor_3_facebook_likes']=movie['actor_3_facebook_likes'].fillna((movie['actor_3_facebook_likes'].mean()))\n",
"movie['gross']=movie['gross'].fillna((movie['gross'].mean()))\n",
"movie['facenumber_in_poster']=movie['facenumber_in_poster'].fillna((movie['facenumber_in_poster'].mean()))\n",
"movie['num_user_for_reviews']=movie['num_user_for_reviews'].fillna((movie['num_user_for_reviews'].mean()))\n",
"movie['num_critic_for_reviews']=movie['num_critic_for_reviews'].fillna((movie['num_critic_for_reviews'].mean()))\n",
"\n",
"np.sum(movie.isnull())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"23000000.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#calculate median from a variable\n",
"movie['budget'].median()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Get mean of each bin/category for actor_1_facebook_likes\n",
"rata=movie.groupby(['country'])['director_facebook_likes'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\asus\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: The signature of `Series.to_csv` was aligned to that of `DataFrame.to_csv`, and argument 'header' will change its default value from False to True: please pass an explicit value to suppress this warning.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"rata.to_csv(\"rata.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 USA\n",
"dtype: object"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#mode from a variable\n",
"movie['country'].mode()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"from scipy.stats import mode\n",
"\n",
"# Fill missing value with mode for qualitative/categorical data\n",
"movie['country']=movie['country'].fillna(movie['country'].mode()[0])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 0\n",
"facenumber_in_poster 0\n",
"country 0\n",
"budget 0\n",
"gross 0\n",
"num_voted_users 0\n",
"num_user_for_reviews 0\n",
"num_critic_for_reviews 0\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 0\n",
"actor_1_facebook_likes 0\n",
"actor_2_facebook_likes 0\n",
"actor_3_facebook_likes 0\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"#export data to csv\n",
"movie.to_csv(\"hasil_movie.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"#Noisy data\n",
"#Outlier identification using boxplot, scatterplot\n",
"#Identifying outliers with boxplot\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAAEKCAYAAAAl5S8KAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAADohJREFUeJzt3W9sXXd9x/HPJ/VI24RtXZJJUUu5gPmTiGyDeIN1qA2l7ZJGaxGUiQmoJ6WrSKXkwcSDSXhK2vnRpm3qKtgaMoTLtPKnQlvGaLYU6EoL7XBK09B00Au4kC4aqUu6xesyXH/34Bxn1zeO77Vz77lfx++XZOX63uP8fj/5nndOzvU9dkQIANB7y3o9AQBAgSADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiibz4br169Omq1WpemAgDnp4MHDz4fEWtabTevINdqNY2Oji58VgCwBNl+tp3tOGUBAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0AS8/qdegt1yy236MSJE7rqqqu0Y8eOKoYEgEWnkiAfO3ZMExMTqtfrVQwHAIsSpywAIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASVQS5FOnTkmSnnvuOd11111VDAkAi04lQZ6ampIkvfTSS6rX61UMCQCLDqcsACAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJBEpUGemJjQoUOHtGnTpjM+6vW66vW6tm7dqnq9LkkaHx/X9u3bddttt2l0dHTGY83Gx8e1c+dO1et17dy5U+Pj411dy/R4s41T9Vw6Ya71AEtZlftGmiPk4eFhDQ8Pa2JiQsPDw5KkkZERPf300zpy5Ih2794947FmIyMjOnz4sIaHh3X48GHdc889XZ3v9HizjVP1XDphrvUAS1mV+0aaII+NjWlsbOz07YMHD+r+++8//fjJkydPP9Z8lDw+Pq79+/crIjQ2NqaI0P79+7v2L1rjeM3jVD2XTphrPcBSVvW+kSbIzXbt2qXJyclZH2s+Sh4ZGdHU1NSM+15++eWu/YvWOF7zOFXPpRPmWg+wlFW9b7QMsu1bbY/aHj1+/HhXJ9Po5MmTiohZH5s+kp72wAMPnBHvyclJHThwoCtzaxyveZyq59IJc60HWMqq3jdaBjki9kTEQEQMrFmzpquTabRy5UrZnvWxWq024/NrrrlGfX19M+7r6+vTtdde25W5NY7XPE7Vc+mEudYDLGVV7xtpT1ncfvvtZ4Rt2tDQ0IzPBwcHtWzZzKVccMEFuvnmm7syt8bxmsepei6dMNd6gKWs6n0jTZBrtdrpI99araaNGzdqy5Ytpx9fuXLl6cf6+/tnfO2qVau0efNm2VatVpNtbd68WatWrerKXBvHax6n6rl0wlzrAZayqveNNEEeGhrS0NCQVqxYcfoIeHBwUOvWrdP69eu1e/fuGY81Gxwc1IYNGzQ0NKQNGzZ0/V+y6fFmG6fquXTCXOsBlrIq9w2f7YWz2QwMDMTo6Oi8B7n66qs1NTWlFStWqL+/X3feeee8/w4AWKxsH4yIgVbbpTlCBoCljiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkUUmQly0rhrnooovU399fxZAAsOhUEuTly5dLki699FLt2LGjiiEBYNHhlAUAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACTRV8Uga9eu1YkTJ9Tf31/FcACwKFUS5L1791YxDAAsapyyAIAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEk4Itrf2D4u6dkFjrVa0vML/NpMWEce58MaJNaRTTfW8eqIWNNqo3kF+VzYHo2IgUoG6yLWkcf5sAaJdWTTy3VwygIAkiDIAJBElUHeU+FY3cQ68jgf1iCxjmx6to7KziEDAObGKQsASKLjQba92fZ3bNdt/8Esjy+3/dny8cds1zo9h3PVxhp+3/YR20/a/rLtV/dinq20WkfDdjfZDtspXyFvZx22f7v8njxl+2+rnmM72nheXW77q7a/VT63ru/FPOdi+5O2f2z722d53Lb/olzjk7bfWvUc29HGOj5Qzv9J21+3/cuVTCwiOvYh6QJJ35P0WkmvkHRI0vqmbW6T9Ffl7fdL+mwn51DRGt4p6eLy9vZsa2h3HeV2r5T0kKRHJQ30et4L/H68XtK3JF1Sfv6LvZ73AtexR9L28vZ6SWO9nvcs67hS0lslffssj18v6X5JlvR2SY/1es4LXMcVDc+nLVWto9NHyL8mqR4R34+I/5X0GUk3Nm1zo6SR8vZ9kt5l2x2ex7louYaI+GpE/Hf56aOSLqt4ju1o53shSX8k6Y8l/U+Vk5uHdtbxe5I+FhE/kaSI+HHFc2xHO+sIST9b3v45Sf9e4fzaEhEPSXphjk1ulHRPFB6V9PO211Yzu/a1WkdEfH36+aQK9/FOB/lSST9q+Pxoed+s20TEpKQXJa3q8DzORTtraLRNxRFBNi3XYfstkl4VEV+scmLz1M734w2S3mD7EduP2t5c2eza1846dkv6oO2jkr4kaUc1U+uo+e4/i0Fl+3hfh/++2Y50m3+Mo51teqnt+dn+oKQBSVd1dUYLM+c6bC+T9OeSfreqCS1QO9+PPhWnLTapOJL5mu03R8SJLs9tPtpZx+9I+lRE/KntX5f06XIdU92fXsdk37/nxfY7VQT5HVWM1+kj5KOSXtXw+WU6879dp7ex3afiv2Zz/Reoau2sQbavkfRRSTdExKmK5jYfrdbxSklvlvSg7TEV5/v2JXxhr93n1N9HxE8j4geSvqMi0Jm0s45tkj4nSRHxDUkXqriuwmLS1v6zGNj+JUl7Jd0YEeNVjNnpIH9T0uttv8b2K1S8aLevaZt9kgbL2zdJ+kqUZ86TaLmG8r/6d6uIccbzlVKLdUTEixGxOiJqEVFTcZ7shogY7c10z6qd59TfqXihVbZXqziF8f1KZ9laO+v4oaR3SZLtdSqCfLzSWZ67fZJuLn/a4u2SXoyIY72e1HzZvlzSFyR9KCK+W9nAXXj18npJ31XxivJHy/vuULGzS8WT7POS6pL+VdJre/lq6wLX8ICk/5D0RPmxr9dzXsg6mrZ9UAl/yqLN74cl/ZmkI5IOS3p/r+e8wHWsl/SIip/AeELSdb2e8yxruFfSMUk/VXE0vE3ShyV9uOF78bFyjYcTP6darWOvpJ807OOjVcyLd+oBQBK8Uw8AkiDIAJAEQQaAJAgyACRBkAEsea0uNtS07ZW2H7c9afumpscGbT9Tfgye7e84G4KMnrNda2dHmOPrx8qfP17I177b9vqFjo3zxqcktfuW+x+qeIfrjKsK2v4FSbskvU3FtUt22b5kPpMgyFjq3q3i53+xhMUsFxuy/Trb+20ftP01228qtx2LiCclNb+l/TclHYiIF6K4MNEBtR95SQQZefTZHimvP3uf7Ysbj3xtD9h+sLy9yvY/l9cNvlsN10+w/Ye2/832Adv32v5Ief8ZO5ftKyTdIOlPbD9h+3XVLxuJ7ZG0IyI2SvqIpI+32P6cL6zU6YsLAQv1RknbIuIR259Ucd3ss9kl6eGIuMP2Vkm3SkW0Jb1X0ltUPLcfl3Sw/Jo9Kt6F9Yztt0n6eERcbXufpC9GxH3dWRYWI9srVVwT+fMNVwde3urLZrlvXu+8I8jI4kcR8Uh5+28k7Zxj2yslvUeSIuIfbU9ft/YdKi4y9JIk2f6H8s+F7FxY2pZJOhERvzKPrzmq4oqD0y5TcUmCeQ0KZNB8JBGSJvX/z9ELW2wvzX6EIjXsXA0f6xY+VZzvIuI/Jf3A9vuk07+aqtWvcfonSdfZvqR8Me+68r62EWRkcXl5DWCpuC7ww5LGJG0s73tvw7YPSfqAJNneImn6leyHJf2W7QvLo+KtUsud679UXIoUS5jteyV9Q9IbbR+1vU3Fc2yb7UOSnlL5G15s/2r5SwTeJ+lu209JUkS8oOI38Hyz/LijvK/9eXBxIfSai190+yUVob1C0jOSPqQixn+t4sp6j6m4ctgm26tUXK1rtaR/UXH6YmNEPG97t4qgP6vi0pUPRsQnbL9G0l9KWivpZyR9pjwH/RuSPiHplKSbIuJ7lSwamAVBxnnF9sqIOGn7YhWBvzUiHu/1vIB28KIezjd7yjd6XChphBhjMeEIGQCS4EU9AEiCIANAEgQZAJIgyACQBEEGgCQIMgAk8X/ighccCHUO/wAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.boxplot(movie['budget'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Waktu')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEICAYAAABF82P+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHGZJREFUeJzt3X+clXWd9/HXmxkddECQjJGfTpbtwk1lRa5t1j1At5Sp1JqbY2sS3LJuOms3VpC0d3Xf0Q12r615Vyw1mj9g+mG5mrreushUrmmhmzk6laT8GCHNUH5LMH72j+saOgwXzDBzrjnnwPv5eJzHua7vdZ3r+pzhcN7ne/1URGBmZtbdoFIXYGZm5ckBYWZmmRwQZmaWyQFhZmaZHBBmZpbJAWFmZpkcEFaxJH1L0hdKXUcxSXqnpF+Xuo6+kHSVpG+Wug4rHgeE9ZukNZJ2Stom6UVJd0kaV+q6CkkKSa87yPSZkjrT97BN0jOSbpD0+oGsKyJ+EhF/lsN66tN1Pdqt/QRJf5S0pr/riIgvRsR/7+9yrHw4IKxYzomIIcAo4DnguhLX0xc/Td/DMODdwE7gEUmT+rIwSdXFLK5Iaru9nwuBZ0pVjJU3B4QVVUS8DNwKTOxqkzRM0k2Sfi9praTPSBqUTvu6pFsL5l0saYUSDZI60k0XL6Q9lQ8faN2SLpG0WtImSXdIGp22/zid5bG0d/ChHt5DZ0T8NiI+BvwI+Fy6nAZJHd3WuUbSu9Phz0m6VdItkrYAMyWdJumnkl6StFHS/5N09IHq6r4OSRMktaavf0LSuQXTviXpq2mPbaukhyW99mDvDbgZuLhg/CPATd3eU+Y6JZ0u6XeSqgrm/YCkXxa8/1sKpp0u6cF0OY9JauihNiszDggrKknHAh8CHipovo7kV/nJwH8l+VL6aDrtSuCN6SaedwKzgYvjT9eAORE4ARhD8sW2VNJ+m2AkTQX+D/DXJL2YtcC3ASLiXelsb4qIIRHxnUN4Sz8A3nkI888gCcjhwDKgE/gf6Xt4OzAN+Fhv6pJ0FPBD4F5gJNAELOv2/huBzwPHA6uBhT3UdwtwgaQqSROAocDDvVlnRDwEbAemFizvQmB595VIGgPcBXwBGAF8Avi+pFf3UJ+VEQeEFcu/SHoJ2AL8N+BLAOmvzQ8Bn46IrRGxBvhH4CKAiNgB/A1wDcmXV1NEdHRb9j9ExK6I+BHJl85fZ6z/w8D1EfFoROwCPg28XVJ9P9/XBpIvuN76aUT8S0S8EhE7I+KRiHgoIvak7/2fSUKyN04HhgCLIuKPEXE/cCdJKHT5QUT8LCL2kATSqT0sswP4NckmtIvp1nvoxTpbuoYlDQXOStu6+xvg7oi4O/1b3AesSue3CuGAsGJ5f0QMB2qAy4EfSer69X80yS/6LmtJegQARMTPgKcBAd/tttwXI2J7t9eOzlj/6MJ1RMQ24A+F6+mjMcCmQ5h/feGIpNdLujPdNLMF+CLJ36Q3RgPrI+KVgrZ9/nbA7wqGd5B8uffkJmAmyRf9Ld2m9bTO5cBfSaoB/gp4NCLWsr+TgPPTzUsvpT8eziDp3VmFcEBYUaXb739AsmnlDOAFYDfJF0aX8cCzXSOSLiMJlg3Ap7ot8nhJtd1euyFj1RsK15G+5lWF6+mjDwA/SYe3A8cWrKMK6L7JpPvlkb8O/Ao4JSKOA64iCcLe2ACM69pfk9rnb9dH3wfeBzyd8eV+0HVGxJMkgfFeDrB5KbUeuDkihhc8aiNiUT9rtwHkgLCiSncuzyDZJt4eEZ0kvYKFkoZKOgmYS/rLNT2M9AskmyQuAj4lqftmks9LOjrdR3E28L2MVS8HPirp1PTX7ReBh9PNOpAcWXVyL99DlaTXSLoOaCDZxg/wG2CwpPel2+o/QxJsBzOUZLPbNkl/Dvxdt+kHq+thklD6lKSj0p2855DuW+mrtEc2Fcg6JLU361wO/D3wLrL/LSD59z1H0vT07zk43QE/tj+128ByQFix/FDSNpIvw4UkO5qfSKc1kXzpPA08QPIFc72Sw0BvARZHxGMR8RTJL+yb0y95SDahvEjyy3YZcGlE/Kr7yiNiBfAPJL+ONwKvBS4omOVzwI3p5o6sfRiQ7LPoeg+twHHA2yLi8XQdm0l2MH+T5Bf1dpJt+gfzCZJf2luBbwDdd5AfsK6I+CNwLsmv9ReArwEfyXr/hyoiVkXEbzPae7POFpLgvD8iXjjA8teT7LC/Cvg9SY/ik/g7p6LINwyycpX+er0lIvyr06wEnOZmZpbJAWFmZpm8icnMzDK5B2FmZpnK8WJivXbCCSdEfX19qcsw28/27dupra3teUazEnjkkUdeiIgeL3tS0QFRX1/PqlWrSl2G2X5aW1tpaGgodRlmmSRlnf2+H29iMjOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCLMiamlpYdKkSUybNo1JkybR0pJ1Lx2zylDRh7malZOWlhYWLFhAc3MznZ2dVFVVMXv2bAAaGxt7eLVZ+XEPwqxIFi5cSHNzM1OmTKG6upopU6bQ3NzMwoU93SbarDw5IMyKpL29nTPOOGOftjPOOIP29vYSVWTWP7kGhKQ1kh6X9AtJq9K2EZLuk/RU+nx82i5JX5G0WtIvJb0lz9rMim3ChAk88MAD+7Q98MADTJgwoUQVmfXPQPQgpkTEqRExOR2fD6yIiFOAFek4JHewOiV9zCG5l69ZxViwYAGzZ89m5cqV7Nmzh5UrVzJ79mwWLFhQ6tLM+qQUO6lnkNyuEOBGkls7zkvbb4rk+uMPSRouaVREbCxBjWaHrGtHdFNTE+3t7UyYMIGFCxd6B7VVrLx7EAHcK+kRSXPStrquL/30eWTaPobkvrVdOtI2s4rR2NhIW1sbK1asoK2tzeFgFS3vHsQ7ImKDpJHAfZIOdrN1ZbTtdzejNGjmANTV1dHa2lqUQs2Kadu2bf5sWsXLNSAiYkP6/Lyk24DTgOe6Nh1JGgU8n87eAYwrePlYYEPGMpcCSwEmT54cvqSylSNf7tsOB7ltYpJUK2lo1zBwJtAG3AFcnM52MXB7OnwH8JH0aKbTgc3e/2BmVjp59iDqgNskda1neUTcI+nnwHclzQbWAeen898NnAWsBnYAH82xNjMz60FuARERTwNvymj/AzAtoz2Ay/Kqx8zMDo3PpDYzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0y5B4SkKkn/IenOdPw1kh6W9JSk70g6Om2vScdXp9Pr867NzMwObCB6EFcA7QXji4EvR8QpwIvA7LR9NvBiRLwO+HI6n5mZlUiuASFpLPA+4JvpuICpwK3pLDcC70+HZ6TjpNOnpfObmVkJVOe8/H8CPgUMTcdfBbwUEXvS8Q5gTDo8BlgPEBF7JG1O53+hcIGS5gBzAOrq6mhtbc2zfrM+2bZtmz+bVvFyCwhJZwPPR8Qjkhq6mjNmjV5M+1NDxFJgKcDkyZOjoaGh+yxmJdfa2oo/m1bp8uxBvAM4V9JZwGDgOJIexXBJ1WkvYiywIZ2/AxgHdEiqBoYBm3Ksz8zMDiK3fRAR8emIGBsR9cAFwP0R8WFgJfDBdLaLgdvT4TvScdLp90fEfj0IMzMbGKU4D2IeMFfSapJ9DM1pezPwqrR9LjC/BLWZmVkq753UAEREK9CaDj8NnJYxz8vA+QNRj5mZ9cxnUpuZWSYHhJmZZXJAmJlZJgeEmZllckCYmVkmB4SZmWVyQJiZWSYHhJmZZXJAmJlZJgeEmZllckCYmVkmB4SZmWVyQJiZWSYHhJmZZXJAmJlZJgeEmZllckCYmVkmB4RZEbW0tDBp0iSmTZvGpEmTaGlpKXVJZn3mgDArkpaWFq644gq2b99ORLB9+3auuOIKh4RVLEVEqWvos8mTJ8eqVatKXYYZAOPGjWPPnj0sX76czs5OqqqquPDCC6murmb9+vWlLs9sL0mPRMTknuZzD8KsSDo6Opg5cyZNTU1Mnz6dpqYmZs6cSUdHR6lLM+uT6lIXYHY4ueGGG2hpadnbg2hsbCx1SWZ95h6EWZFUV1eze/fufdp2795NdbV/h1ll8ifXrEi6eg2zZs1i3bp1jB8/nqqqKjo7O0tdmlmfuAdhViQTJ05kzpw51NbWAlBbW8ucOXOYOHFiiSsz6xv3IMyKZMGCBSxYsIDm5ua9vYnZs2ezcOHCUpdm1icOCLMi6doh3dTURHt7OxMmTGDhwoXeUW0Vy+dBmOWgtbWVhoaGUpdhlsnnQZiZWb84IMzMLFNuASFpsKSfSXpM0hOSPp+2v0bSw5KekvQdSUen7TXp+Op0en1etZnlxRfrs8NJnjupdwFTI2KbpKOAByT9KzAX+HJEfFvSEmA28PX0+cWIeJ2kC4DFwIdyrM+sqFpaWjKPYgK8o9oqUm49iEhsS0ePSh8BTAVuTdtvBN6fDs9Ix0mnT5OkvOozK7aFCxfS3NzMlClTqK6uZsqUKTQ3N/swV6tYuR7mKqkKeAR4HfBV4LfASxGxJ52lAxiTDo8B1gNExB5Jm4FXAS90W+YcYA5AXV0dra2teb4Fs15rb2/nvvvu2+dM6sbGRtrb2/05tYo0IIe5ShoO3Ab8T+CGiHhd2j4OuDsi3iDpCWB6RHSk034LnBYRfzjQcn2Yq5WTcePGsXXrVo4//njWrl3LSSedxIsvvsjQoUN9uW8rK2V1mGtEvAS0AqcDwyV19VzGAhvS4Q5gHEA6fRiwaSDqMyuGHTt2sGXLFnbu3AnAzp072bJlCzt27ChxZWZ906uAkDQ+69HDa16d9hyQdAzwbqAdWAl8MJ3tYuD2dPiOdJx0+v1RyWfx2RFn06ZNHHfccRxzzDFI4phjjuG4445j0yb/zrHK1NsexF3AnenzCuBp4F97eM0oYKWkXwI/B+6LiDuBecBcSatJ9jE0p/M3A69K2+cC8w/ljZiVgwULFvDMM8+wYsUKnnnmGRYsWFDqksz6rE/7ICS9BfjbiPjb4pfUe94HYeVEEsOGDdtvH8TmzZtxZ9jKSa77ICLiUeBtfXmt2eFqxIgRbNmyhZdffhlJvPzyy2zZsoURI0aUujSzPunVYa6S5haMDgLeAvw+l4rMKtSxxx7LK6+8wuDBg4kIBg8ezLBhwzj22GNLXZpZn/S2BzG04FFDsi9iRl5FmVWiDRs20NjYyMaNG4kINm7cSGNjIxs2bOj5xWZlqLcnyj0ZEd8rbJB0PvC9A8xvdsQZPXo0LS0tjBo1inXr1jFq1ChaWloYPXp0qUsz65Pe9iA+3cs2syPWjh072Lp1K01NTdx11100NTWxdetWnwdhFeugPQhJ7wXOAsZI+krBpOOAPdmvMjsybdq0ifnz53P99dfvvaPcJz/5SRYtWlTq0sz6pKcexAZgFfAyyTWVuh53ANPzLc2s8kydOpW2tjZWrFhBW1sbU6dOLXVJZn3Wq/MgJL0tIn7ere2ciPhhbpX1gs+DsHIybtw49uzZw/Lly/de7vvCCy+kurra12KyslLs8yD+WdIbChbeCHymr8WZHY6uvvpqOjs7mTVrFmeeeSazZs2is7OTq6++utSlmfVJbwPig8CNkiZIugT4GHBmfmWZVZ7GxkauvfZaamtrkURtbS3XXnutbxZkFatXARERTwMXAN8nCYszI2JznoWZVaIHH3yQ1atX88orr7B69WoefPDBUpdk1mc9HcX0OMld4LqMAKqAhyUREW/MszizStLU1MSSJUtYvHgxEydO5Mknn2TevHkAXHfddSWuzuzQ9XSi3NkDUoXZYeAb3/gGixcvZu7cubS2tjJ3bnKFmquuusoBYRXpoJuYImJt4QPYSdKj6HqYWWrXrl1ceuml+7Rdeuml7Nq1q0QVmfVPb28YdK6kp4BngB8Ba+j5fhBmR5SamhqWLFmyT9uSJUuoqakpUUVm/dPbazH9b5Lbhf5bRLxZ0hTAh2aYFbjkkku48sorufLKK/dpv/zyy0tUkVn/9PYw190R8QdgkKRBEbESODXHuswqzm9+85tDajcrd73tQbwkaQjwE2CZpOfxtZjM9nHvvfcydOhQbr/99r1nUs+YMYN777231KWZ9clBexCSPi7pbcD7gR3Ax4F7gN8C5+RfnlllWbZsGVOmTKG6upopU6awbNmyUpdk1mc9bWIaC1wLrAPuB/4X8AJwS7rJycwK3HnnnQcdN6skvb1Y39HAZOAvgbenj5ciYmK+5R2cL9Zn5WTIkCFs3759v/ba2lq2bdtWgorMshX7Yn3HkNwDYlj62AA83PfyzA4/J5988iG1m5W7ni61sRT4L8BWkkB4ELgmIl4cgNrMKkpbWxtjx47l2WefJSKQxJgxY2hrayt1aWZ90lMPYjxQA/wOeBboAF7KuyizShQRPPvss4wcORJJjBw5cm9YmFWig/YgIuI9kkTSi/hL4EpgkqRNwE8j4rMDUKNZxYgInnvuOYC9z2aVqsd9EJFoA+4mubzGvwOvBa7IuTazilRfX8/NN99MfX19qUsx65ee9kH8PUnP4R3AbpJw+ClwPfB47tWZVZiqqirWrFnDRRddtHe8s7OzxFWZ9U1PPYh64FbgtIg4OSIuioivRcRjEfFK/uWZVZbOzk7OPfdcbrvtNs4991yHg1W0Xp0HUa58HoSVk2R3XbZK/n9mh59inwdhZmZHmNwCQtI4SSsltUt6QtIVafsISfdJeip9Pj5tl6SvSFot6ZeS3pJXbWZm1rM8exB7gCsjYgLJvSQukzQRmA+siIhTgBXpOMB7gVPSxxzg6znWZmZmPcgtICJiY0Q8mg5vBdqBMcAM4MZ0thtJrhRL2n5TeljtQ8BwSaPyqs/MzA6ut/eD6BdJ9cCbSS7XURcRGyEJEUkj09nGAOsLXtaRtm3stqw5JD0M6urqaG1tzbN0s6Lw59QqUe4Bkd5o6PvAxyNiy0GO9MiasN+hHxGxFFgKyVFMDQ0NRarULD/+nFolyvUoJklHkYTDsoj4Qdr8XNemo/T5+bS9AxhX8PKxJFeNNTOzEsjzKCYBzUB7RFxTMOkO4OJ0+GLg9oL2j6RHM50ObO7aFGVmZgMvz01M7wAuAh6X9Iu07SpgEfBdSbNJ7lR3fjrtbuAsYDXJ7U0/mmNtZmbWg9wCIiIeIHu/AsC0jPkDuCyveszM7ND4TGozM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMA3JHObNKd5AbXRX19ck1K83KgwPCrBd688V9sBDwF79VIm9iMiuSA4WAw8EqlQPCrIgigojgpHl37h02q1QOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMvlifHXHe9Pl72bxzd+7rqZ9/V+7rGHbMUTz22TNzX48dmRwQdsTZvHM3axa9L9d1tLa20tDQkOs6YGBCyI5c3sRkZmaZHBBmZpYpt4CQdL2k5yW1FbSNkHSfpKfS5+PTdkn6iqTVkn4p6S151WVmZr2TZw/iW8B7urXNB1ZExCnAinQc4L3AKeljDvD1HOsyM7NeyG0ndUT8WFJ9t+YZQEM6fCPQCsxL22+K5O4qD0kaLmlURGzMqz47cg2dMJ833Di/5xn768b8VzF0AkC+O9ztyDXQRzHVdX3pR8RGSSPT9jHA+oL5OtK2/QJC0hySXgZ1dXW0trbmWrAdfra2L+Jb76nNdR3btm1jyJAhua4DYOY92/1/wHJTLoe5Zt3tPfNejRGxFFgKMHny5BiIQwntMHPPXbkfgjpQh7kOxHuxI9dAH8X0nKRRAOnz82l7BzCuYL6xwIYBrs3MzAoMdA/iDuBiYFH6fHtB++WSvg38BbDZ+x8sTwNygtk9A3MmtVlecgsISS0kO6RPkNQBfJYkGL4raTawDjg/nf1u4CxgNbAD+GhedZnlfRY1JAE0EOsxy1OeRzE1HmDStIx5A7gsr1rMzOzQ+UxqMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDArovHjxyOJtYvPRhLjx48vdUlmfeaAMCuS8ePHs379+n3a1q9f75CwilUu94MwK2tS1i1Lemf9+vW9fn1yWTKz8uAehFkvRESPD4Dq6mrq6+sZNGgQ9fX1VFdX9/r1DgcrN+5BmBXRnj17WLNmDcDeZ7NK5R6EmZllckCYmVkmB4SZmWVyQJiZWSYHhFkO+nNYrFm5cECY5cCHrNrhwAFhZmaZHBBmZpbJAWFmZpkcEGZmlskBYVZkVVVVXHPNNVRVVZW6FLN+8bWYzIqss7OTuXPnlroMs35zD8LMzDI5IMzMLJMDwszMMjkgzIrsxBNPZNCgQZx44omlLsWsX7yT2qyIhg4dyvLly+ns7KSqqooZM2awdevWUpdl1idlFRCS3gNcC1QB34yIRSUuyeyQ1NTUMGvWLNatW8f48eOpqalxQFjFKptNTJKqgK8C7wUmAo2SJpa2KrPeq6mpYfr06dTW1gJQW1vL9OnTqampKXFlZn1TTj2I04DVEfE0gKRvAzOAJ0talVkvXXLJJSxZsoTFixczceJEnnzySebNm8ell15a6tLM+qScAmIMsL5gvAP4i+4zSZoDzAGoq6ujtbV1QIoz68l5551HR0cH8+fPZ/fu3Rx11FGcffbZnHfeef6cWkUqp4DIusPKfhfVj4ilwFKAyZMnR0NDQ85lmfVe1+extbUVfzat0pXNPgiSHsO4gvGxwIYS1WJmdsQrp4D4OXCKpNdIOhq4ALijxDWZmR2xymYTU0TskXQ58P9JDnO9PiKeKHFZZmZHrLIJCICIuBu4u9R1mJlZeW1iMjOzMqKI/Q4UqhiSfg+sLXUdZhlOAF4odRFmB3BSRLy6p5kqOiDMypWkVRExudR1mPWHNzGZmVkmB4SZmWVyQJjlY2mpCzDrL++DMDOzTO5BmJlZJgeEmZllckCYZZD0OUmfKMJyhkv6WMH4aEm39ne5ZgPBAWHWT5IOdsma4cDegIiIDRHxwfyrMus/B4RZStICSb+W9G/An6VtrZImp8MnSFqTDs+U9D1JPwTulTRE0gpJj0p6XNKMdLGLgNdK+oWkL0mql9SWLmOwpBvS+f9D0pSCZf9A0j2SnpJ09QD/KcyAMrtYn1mpSHorySXm30zy/+JR4JEeXvZ24I0RsSntRXwgIrZIOgF4SNIdwHxgUkScmq6nvuD1lwFExBsk/TlJ0Lw+nXZqWssu4NeSrouIwjsumuXOAWGWeCdwW0TsAEi/3HtyX0RsSocFfFHSu4BXSG6hW9fD688ArgOIiF9JWgt0BcSKiNic1vIkcBL73pLXLHcOCLM/yTopaA9/2hQ7uNu07QXDHwZeDbw1Inanm6K6z99d1m12u+wqGO7E/1etBLwPwizxY+ADko6RNBQ4J21fA7w1HT7YzuVhwPNpOEwh+cUPsBUYepB1fhgg3bQ0Hvh1n9+BWZE5IMyAiHgU+A7wC+D7wE/SSf8X+DtJD5JcwvtAlgGTJa0i+dL/VbrcPwD/LqlN0pe6veZrQJWkx9N1z4yIXZiVCV9qw8zMMrkHYWZmmRwQZmaWyQFhZmaZHBBmZpbJAWFmZpkcEGZmlskBYWZmmf4Tlt/YqbAS2EwAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"movie.boxplot(column='duration')\n",
"plt.title('Boxplot Duration Movie')\n",
"plt.ylabel('Waktu')"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2.81395543 2.4562438 1.62158335 ... 1.24010965 0.28621199 0.68366935]\n"
]
}
],
"source": [
"from scipy import stats\n",
"z = np.abs(stats.zscore(movie['duration']))\n",
"print(z)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(array([ 9, 22, 24, 25, 31, 110, 112, 146, 164, 274, 278,\n",
" 307, 325, 338, 478, 644, 697, 705, 711, 756, 815, 825,\n",
" 832, 839, 882, 907, 916, 1037, 1123, 1124, 1143, 1159, 1174,\n",
" 1216, 1264, 1373, 1428, 1500, 1522, 1570, 1663, 1709, 1713, 1812,\n",
" 1817, 1841, 1873, 1907, 1946, 1971, 1979, 2048, 2087, 2190, 2240,\n",
" 2256, 2265, 2341, 2344, 2354, 2355, 2465, 2560, 2628, 2643, 2726,\n",
" 2815, 2836, 2969, 3025, 3047, 3074, 3083, 3266, 3302, 3310, 3328,\n",
" 3509, 3649, 3660, 3870, 3950, 3969, 3981, 4076, 4078, 4086, 4280,\n",
" 4326, 4438, 4481, 4633, 4672, 4693, 4707, 4746, 4802, 4936],\n",
" dtype=int64),)\n"
]
}
],
"source": [
"print(np.where(z > 3))"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD8CAYAAACLrvgBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAG75JREFUeJzt3X+Q1PWd5/HnKwPomKwZ0NHTGTwwmXIXYxKwS/G4SuXiCuilhMpqHZ57TGW5o841d9nbOxK4pI788CrmuIoedcYNRlbMeqJBg2yMO0uBqdRdKdI4UUQlTNSVGYxMjh+xIqeA7/ujP4PN0D3zne6e6Wl8Paq65vt9fz/f7/fT0s5rvt/v5/ttRQRmZmZZfKjeHTAzs8bh0DAzs8wcGmZmlplDw8zMMnNomJlZZg4NMzPLzKFhZmaZOTTMzCwzh4aZmWU2od4dqLVzzz03pk2bVu9umJk1lB07dvw2IlqHa3fahca0adPI5/P17oaZWUOR9A9Z2g17ekrSWkn7Jb1QYtl/khSSzk3zkrRaUo+k5yXNKmrbKWlPenUW1S+XtDOts1qSUn2KpM2p/WZJk7O8ITMzGz1ZrmncB8wfXJQ0FbgGeL2ofC3QkV5LgbtT2ynASuBK4ApgZVEI3J3aDqw3sK/lwJaI6AC2pHkzM6ujYUMjIn4BHCix6A7gK0DxY3IXAPdHwdNAi6QLgHnA5og4EBEHgc3A/LTs7Ih4KgqP270fWFi0rXVpel1R3czM6qSi0VOSrgf6IuK5QYvagL1F872pNlS9t0Qd4PyIeAMg/Tyvkr6amVntjPhCuKSzgK8Bc0stLlGLCuoj7dNSCqe4uOiii0a6upmZZVTJkcbHgOnAc5JeA9qBZyX9IwpHClOL2rYD+4apt5eoA7yZTl+Rfu4v16GIWBMRuYjItbYOO2LMamRjdx9zbt/K9OWPM+f2rWzs7qt3l8xslI04NCJiZ0ScFxHTImIahV/8syLiN8AmYHEaRTUbOJxOLXUBcyVNThfA5wJdadlbkmanUVOLgcfSrjYBA6OsOovqNg5s7O5jxaM76Tt0hAD6Dh1hxaM7HRxmp7ksQ24fBJ4CLpHUK2nJEM1/BrwC9AD3AH8OEBEHgG8D29PrW6kGcAvww7TOr4EnUv124BpJeyiM0rp9ZG/NRtOqrt0cOXr8pNqRo8dZ1bW7Tj0ys7Ew7DWNiLhpmOXTiqYDuLVMu7XA2hL1PPCJEvX/C1w9XP+sPvYdOjKiupmdHvzsKavIhS3NI6qb2enBoWEVWTbvEponNp1Ua57YxLJ5l9SpR2Y2Fk67Z0/Z2Fg4s3A7zaqu3ew7dIQLW5pZNu+SE3UzOz05NKxiC2e2OSTMPmB8esrMzDJzaJiZWWYODTMzy8yhYWZmmTk0zMwsM4eGmZll5tAwM7PMHBpmZpaZQ8PMzDJzaJiZWWYODTMzy8yhYWZmmTk0zMwsM4eGmZll5tAwM7PMHBpmZpbZsKEhaa2k/ZJeKKqtkvSypOcl/URSS9GyFZJ6JO2WNK+oPj/VeiQtL6pPl7RN0h5JD0malOpnpPmetHxard60mZlVJsuRxn3A/EG1zcAnIuKTwK+AFQCSZgCLgEvTOt+X1CSpCbgLuBaYAdyU2gJ8F7gjIjqAg8CSVF8CHIyIjwN3pHZmZlZHw4ZGRPwCODCo9vcRcSzNPg20p+kFwPqIeCciXgV6gCvSqyciXomId4H1wAJJAj4HbEjrrwMWFm1rXZreAFyd2puZWZ3U4prGnwFPpOk2YG/Rst5UK1c/BzhUFEAD9ZO2lZYfTu3NzKxOqgoNSV8DjgEPDJRKNIsK6kNtq1Q/lkrKS8r39/cP3WkzM6tYxaEhqRP4PHBzRAz8Mu8FphY1awf2DVH/LdAiacKg+knbSss/yqDTZAMiYk1E5CIi19raWulbMjOzYVQUGpLmA18Fro+It4sWbQIWpZFP04EO4BlgO9CRRkpNonCxfFMKmyeBG9L6ncBjRdvqTNM3AFuLwsnMzOpgwnANJD0IfBY4V1IvsJLCaKkzgM3p2vTTEfFvI2KXpIeBFymctro1Io6n7XwJ6AKagLURsSvt4qvAekm3Ad3Aval+L/AjST0UjjAW1eD9mplZFXS6/fGey+Uin8/XuxtmZg1F0o6IyA3XzneEm5lZZg4NMzPLzKFhZmaZOTTMzCwzh4aZmWXm0DAzs8wcGmZmlplDw8zMMnNomJlZZg4NMzPLzKFhZmaZOTTMzCwzh4aZmWXm0DAzs8wcGmZmlplDw8zMMnNomJlZZg4NMzPLzKFhZmaZOTTMzCyzYUND0lpJ+yW9UFSbImmzpD3p5+RUl6TVknokPS9pVtE6nan9HkmdRfXLJe1M66yWpKH2YWZm9ZPlSOM+YP6g2nJgS0R0AFvSPMC1QEd6LQXuhkIAACuBK4ErgJVFIXB3ajuw3vxh9mFmZnUybGhExC+AA4PKC4B1aXodsLCofn8UPA20SLoAmAdsjogDEXEQ2AzMT8vOjoinIiKA+wdtq9Q+zMysTiq9pnF+RLwBkH6el+ptwN6idr2pNlS9t0R9qH2YmVmd1PpCuErUooL6yHYqLZWUl5Tv7+8f6epmZpZRpaHxZjq1RPq5P9V7galF7dqBfcPU20vUh9rHKSJiTUTkIiLX2tpa4VsyM7PhVBoam4CBEVCdwGNF9cVpFNVs4HA6tdQFzJU0OV0Anwt0pWVvSZqdRk0tHrStUvswM7M6mTBcA0kPAp8FzpXUS2EU1O3Aw5KWAK8DN6bmPwOuA3qAt4EvAkTEAUnfBrandt+KiIGL67dQGKHVDDyRXgyxDzMzqxMVBi2dPnK5XOTz+Xp3w8ysoUjaERG54dr5jnAzM8vMoWFmZpk5NMzMLDOHhpmZZebQMDOzzBwaZmaWmUPDzMwyc2iYmVlmDg0zM8vMoWFmZpk5NMzMLDOHhpmZZebQMDOzzBwaZmaWmUPDzMwyc2iYmVlmDg0zM8vMoWFmZpk5NMzMLDOHhpmZZVZVaEj6D5J2SXpB0oOSzpQ0XdI2SXskPSRpUmp7RprvScunFW1nRarvljSvqD4/1XokLa+mr2ZmVr2KQ0NSG/DvgVxEfAJoAhYB3wXuiIgO4CCwJK2yBDgYER8H7kjtkDQjrXcpMB/4vqQmSU3AXcC1wAzgptTWzMzqpNrTUxOAZkkTgLOAN4DPARvS8nXAwjS9IM2Tll8tSam+PiLeiYhXgR7givTqiYhXIuJdYH1qa2ZmdVJxaEREH/DfgdcphMVhYAdwKCKOpWa9QFuabgP2pnWPpfbnFNcHrVOufgpJSyXlJeX7+/srfUtmZjaMak5PTabwl/904ELgwxROJQ0WA6uUWTbS+qnFiDURkYuIXGtr63BdNzOzClVzeuqPgVcjoj8ijgKPAv8EaEmnqwDagX1puheYCpCWfxQ4UFwftE65upmZ1Uk1ofE6MFvSWenaxNXAi8CTwA2pTSfwWJrelOZJy7dGRKT6ojS6ajrQATwDbAc60misSRQulm+qor9mZlalCcM3KS0itknaADwLHAO6gTXA48B6Sbel2r1plXuBH0nqoXCEsShtZ5ekhykEzjHg1og4DiDpS0AXhZFZayNiV6X9NTOz6qnwx/7pI5fLRT6fr3c3zMwaiqQdEZEbrp3vCDczs8wcGmZmlplDw8zMMnNomJlZZg4NMzPLzKFhZmaZOTTMzCwzh4aZmWXm0DAzs8wcGmZmlplDw8zMMnNomJlZZg4NMzPLzKFhZmaZOTTMzCwzh4aZmWXm0DAzs8wcGmZmlplDw8zMMnNomJlZZlWFhqQWSRskvSzpJUlXSZoiabOkPenn5NRWklZL6pH0vKRZRdvpTO33SOosql8uaWdaZ7UkVdNfMzOrTrVHGv8D+LuI+EPgU8BLwHJgS0R0AFvSPMC1QEd6LQXuBpA0BVgJXAlcAawcCJrUZmnRevOr7K+ZmVWh4tCQdDbwGeBegIh4NyIOAQuAdanZOmBhml4A3B8FTwMtki4A5gGbI+JARBwENgPz07KzI+KpiAjg/qJtmZlZHVRzpHEx0A/8taRuST+U9GHg/Ih4AyD9PC+1bwP2Fq3fm2pD1XtL1E8haamkvKR8f39/FW/JzMyGUk1oTABmAXdHxEzg97x/KqqUUtcjooL6qcWINRGRi4hca2vr0L02M7OKVRMavUBvRGxL8xsohMib6dQS6ef+ovZTi9ZvB/YNU28vUTczszqpODQi4jfAXkmXpNLVwIvAJmBgBFQn8Fia3gQsTqOoZgOH0+mrLmCupMnpAvhcoCste0vS7DRqanHRtszMrA4mVLn+vwMekDQJeAX4IoUgeljSEuB14MbU9mfAdUAP8HZqS0QckPRtYHtq962IOJCmbwHuA5qBJ9LLzMzqRIWBSaePXC4X+Xy+3t0wM2soknZERG64dr4j3MzMMnNomJlZZg4NMzPLzKFhZmaZOTTMzCwzh4aZmWXm0DAzs8wcGmZmlplDw8zMMnNomJlZZg4NMzPLzKFhZmaZOTTMzCwzh4aZmWXm0DAzs8wcGmZmlplDw8zMMnNomJlZZg4NMzPLzKFhZmaZVR0akpokdUv6aZqfLmmbpD2SHpI0KdXPSPM9afm0om2sSPXdkuYV1eenWo+k5dX21czMqlOLI40vAy8VzX8XuCMiOoCDwJJUXwIcjIiPA3ekdkiaASwCLgXmA99PQdQE3AVcC8wAbkptzcysTqoKDUntwD8HfpjmBXwO2JCarAMWpukFaZ60/OrUfgGwPiLeiYhXgR7givTqiYhXIuJdYH1qa2ZmdVLtkcadwFeA99L8OcChiDiW5nuBtjTdBuwFSMsPp/Yn6oPWKVc/haSlkvKS8v39/VW+JTMzK6fi0JD0eWB/ROwoLpdoGsMsG2n91GLEmojIRUSutbV1iF6bmVk1JlSx7hzgeknXAWcCZ1M48miRNCEdTbQD+1L7XmAq0CtpAvBR4EBRfUDxOuXqZmZWBxUfaUTEiohoj4hpFC5kb42Im4EngRtSs07gsTS9Kc2Tlm+NiEj1RWl01XSgA3gG2A50pNFYk9I+NlXaXzMzq141RxrlfBVYL+k2oBu4N9XvBX4kqYfCEcYigIjYJelh4EXgGHBrRBwHkPQloAtoAtZGxK5R6K+ZmWWkwh/7p49cLhf5fL7e3TAzayiSdkREbrh2viPczMwyc2iYmVlmDg0zM8vMoWFmZpk5NMzMLDOHhpmZZebQMDOzzEbj5r4PtI3dfazq2s2+Q0e4sKWZZfMuYeHMks9ZNDNrOA6NGtrY3ceKR3dy5OhxAPoOHWHFozsBHBxmdlrw6akaWtW1+0RgDDhy9DirunbXqUdmZrXlI40qfH3jTh7ctpfjETRJHC/zSJZ9h46Mcc/MzEaHQ6NCX9+4k795+vUT8+UCA+DCluax6JKZ2ajz6akKPbht7/CNgOaJTSybd8ko98bMbGw4NCo01JFFsfbJZ/oiuJmdNhwaFWpSqW+jPdWe/b/n5nueGuXemJmNDYdGhW66curwjZL/8+sDo9gTM7Ox49CowMbuPn7ybF+9u2FmNuY8emqEBo+aymra8sdpkrjpyqnctvCyUeiZmdno85HGCGzs7uOBCgJjwPEI/ubp1/n6xp017JWZ2dipODQkTZX0pKSXJO2S9OVUnyJps6Q96efkVJek1ZJ6JD0vaVbRtjpT+z2SOovql0vamdZZLWW8+jxKVnXtphbfqJ51uK6Njo3dfcy5fSvTlz/OnNu3srHbpxrNsqrmSOMY8B8j4o+A2cCtkmYAy4EtEdEBbEnzANcCHem1FLgbCiEDrASuBK4AVg4ETWqztGi9+VX0t2q1urM763Bdq72B54P1HTpC8P7zwRwcZtlUHBoR8UZEPJum3wJeAtqABcC61GwdsDBNLwDuj4KngRZJFwDzgM0RcSAiDgKbgflp2dkR8VREBHB/0bbqYmJTbQ50PlTX46UPNj8fzKw6NbmmIWkaMBPYBpwfEW9AIViA81KzNqD4vExvqg1V7y1Rr5t3j9fmCOGMCb6UVC/ljhb9fDCzbKr+7SXpI8AjwF9ExO+GalqiFhXUS/VhqaS8pHx/f/9wXa7INd/7ec229f+OvlezbdnIlHsOmJ8PZpZNVaEhaSKFwHggIh5N5TfTqSXSz/2p3gsU3xHXDuwbpt5eon6KiFgTEbmIyLW2tlbzlkra2N3Hnv2/r9n2Kv0F5Qu41Vs27xKaJzadVPPzwcyyq2b0lIB7gZci4ntFizYBAyOgOoHHiuqL0yiq2cDhdPqqC5graXK6AD4X6ErL3pI0O+1rcdG2xtRfPvzLmm2r0l9QvoBbGwtntvGdL1xGW0szAtpamvnOFy7z88HMMqrm5r45wL8Cdkoa+K36n4HbgYclLQFeB25My34GXAf0AG8DXwSIiAOSvg1sT+2+FREDz924BbgPaAaeSK8x914NLmUIqvr616Eu4PoX3sgsnNnm/2ZmFao4NCLif1P6ugPA1SXaB3BrmW2tBdaWqOeBT1Tax1qo1V/yN8++qKo7wX0B18zGAw/jGcayH9fm1FS1d4L7Aq6ZjQcOjWHUcqBTNXeC+wKumY0HDo0hTF/+eE23dzyi4tNdC2e28SeXt534Ho8miT+53OfmzWxsOTTKuPmep2rynKnBlm14rqLg2NjdxyM7+k48guR4BI/s6PPoKTMbUw6NMkbri5OOHo+KHlnhx1+Y2Xjg0KiDSkY8efSUmY0HDo0SptX4WsZgLWdNHPE6zRNL/1OVq5uZjQb/xqmDSp6MfuRY6WFc5epmZqPBoVEHh48cHfE65YLGX81hZmPJ3xFeB5XckNcklfzypqb6fpmhmY0DG7v7WNW1m32HjlT1uKIsfKRRB//sD0f+JN6brpw6orqZfTCM9cNMHRp18OTLI//Oj9sWXsafzr7opJv7/rTK51mZWeMb6+H4Pj1VB5UOk71t4WUOCTM7yVgPx/eRRh34IYNmVitj/TBTh8Ygo/1YDj9k0MxqaawfZurTU0Wu+d7Pa/q1roO1NE/kG9df6ocMmlnNDPw+GavRUw6N5OZ7nhrVwAB4xzfimdkoGMtvo/TpqWS0HlBY7MjR43z1kedHfT9mZqPFRxpj7J1j73HzPU/xwL+5asTrjuUNPI3YHzMbfQ4NRv8BhYNVclSzsbuPZT9+jqPvFe4K7zt0hGU/fg6gJr+oRxoAAzcUDYwPH7ihqFb9GU0OO7PKjfvTU5LmS9otqUfS8lpv/5Mr/67WmxwV39i060RgDDj6XvCNTbuq3vbG7j6WbXjupDtKh/uyqEb9fo9K3quZvW9cH2lIagLuAq4BeoHtkjZFxIu12sfv3jk+fKM6GPzX8KEyDzksVx+Jb/7tLo4eHxRIx4Nv/u2usn+BN+r3e1TyXs3sfeP9SOMKoCciXomId4H1wII692nUlXqWzGg6+Hbp4ClXh7G/oahWKnmvZva+8R4abcDeovneVDutlTr1M96M9Q1FZjY+jPfQKPXc71OeDy5pqaS8pHx//8gfBjjejOQUz4dq8GT0lubS3yRYrg6Fi93f+cJltLU0I6CtpZnvfOGycX+Kp5L3ambvG9fXNCgcWRQ/+7sd2De4UUSsAdYA5HK5hv9aogtbmjOfkvqXV15U9f6+cf2lJ43MApj4IfGN6y8dcr2xvKGoVip9r2ZWMN6PNLYDHZKmS5oELAI21blPVbvzX3x6yOXlTv3M+diUUXk0+sKZbay68VMnHTWsuvFTDRcIWXyQ3qvZaFCM8+8LlXQdcCfQBKyNiP86VPtcLhf5fH5E+6j1fRpzPjblxM17ld4T4HsJzGwsSdoREblh24330BipSkLDzOyDLmtojPfTU2ZmNo44NMzMLDOHhpmZZebQMDOzzBwaZmaW2Wk3ekpSP/APFa5+LvDbGnZnrDVy/xu579DY/W/kvoP7Xyv/OCJah2t02oVGNSTlsww5G68auf+N3Hdo7P43ct/B/R9rPj1lZmaZOTTMzCwzh8bJ1tS7A1Vq5P43ct+hsfvfyH0H939M+ZqGmZll5iMNMzPLzKGRSJovabekHknLx3jfayXtl/RCUW2KpM2S9qSfk1Ndklanfj4vaVbROp2p/R5JnUX1yyXtTOuslgrPVy+3jxH2faqkJyW9JGmXpC83WP/PlPSMpOdS/7+Z6tMlbUvbfig9mh9JZ6T5nrR8WtG2VqT6bknziuolP1vl9lHBe2iS1C3ppw3Y99fSv+0vJeVTrVE+Oy2SNkh6OX3+r2qUvlclIj7wLwqPXf81cDEwCXgOmDGG+/8MMAt4oaj234DlaXo58N00fR3wBIVvNZwNbEv1KcAr6efkND05LXsGuCqt8wRw7VD7GGHfLwBmpek/AH4FzGig/gv4SJqeCGxL/XoYWJTqfwXckqb/HPirNL0IeChNz0ifmzOA6enz1DTUZ6vcPip4D38J/C/gp0Ntd5z2/TXg3EG1RvnsrAP+dZqeBLQ0St+reY3ZjsbzK/3DdBXNrwBWjHEfpnFyaOwGLkjTFwC70/QPgJsGtwNuAn5QVP9Bql0AvFxUP9Gu3D6qfB+PAdc0Yv+Bs4BngSsp3Gw1YfDnA+gCrkrTE1I7Df7MDLQr99lK65Tcxwj73A5sAT4H/HSo7Y63vqd1X+PU0Bj3nx3gbOBV0nXhRup7tS+fnipoA/YWzfemWj2dHxFvAKSf56V6ub4OVe8tUR9qHxVJpztmUvhrvWH6n07v/BLYD2ym8Nf1oYg4VmKfJ/qZlh8GzqngfZ0zxD5G4k7gK8B7aX6o7Y63vgME8PeSdkhammqN8Nm5GOgH/jqdGvyhpA83SN+r4tAoUInaeB1WVq6vI63XlKSPAI8AfxERvxuqaZn+1K3/EXE8Ij5N4a/2K4A/GmKftep/1e9L0ueB/RGxo7g8xHbHTd+LzImIWcC1wK2SPjNE2/H02ZlA4ZTy3RExE/g9hVNF5YynvlfFoVHQC0wtmm8H9tWpLwPelHQBQPq5P9XL9XWoenuJ+lD7GBFJEykExgMR8Wij9X9ARBwCfk7hnHOLpAkl9nmin2n5R4EDFbyv3w6xj6zmANdLeg1YT+EU1Z0N0ncAImJf+rkf+AmF0G6Ez04v0BsR29L8Bgoh0gh9r4pDo2A70JFGhEyicJFwU537tAkYGEnRSeFawUB9cRqNMRs4nA5Ru4C5kian0RRzKZxnfgN4S9LsNPpi8aBtldpHZmmb9wIvRcT3GrD/rZJa0nQz8MfAS8CTwA1l+j+wzxuArVE4ubwJWKTCCKXpQAeFC5klP1tpnXL7yCQiVkREe0RMS9vdGhE3N0LfASR9WNIfDExT+Dd/gQb47ETEb4C9ki5JpauBFxuh71Ubywso4/lFYXTDryicz/7aGO/7QeAN4CiFvzCWUDhvvAXYk35OSW0F3JX6uRPIFW3nz4Ce9PpiUT1H4X/GXwP/k/dv6iy5jxH2/Z9SOGx+Hvhlel3XQP3/JNCd+v8C8F9S/WIKvzh7gB8DZ6T6mWm+Jy2/uGhbX0t93E0a6TLUZ6vcPir8DH2W90dPNUTf0zaeS69dA9tvoM/Op4F8+uxspDD6qSH6Xs3Ld4SbmVlmPj1lZmaZOTTMzCwzh4aZmWXm0DAzs8wcGmZmlplDw8zMMnNomJlZZg4NMzPL7P8DocU3+s9yM+0AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#scatterplot\n",
"plt.scatter(movie[\"actor_1_facebook_likes\"], movie[\"actor_2_facebook_likes\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" <th>cat_actor1_fb</th>\n",
" <th>scorebin_actor1_fb</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class cat_actor1_fb scorebin_actor1_fb \n",
"0 7.9 Good Low (500, 2000] \n",
"1 7.1 Good Very high (10000, 700000] \n",
"2 6.8 Good Very high (10000, 700000] \n",
"3 8.5 Excellent Very high (10000, 700000] \n",
"4 6.6 Good Low (500, 2000] "
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Binning\n",
"bins = [-1, 500, 2000, 5000, 10000, 700000] #bin\n",
"group_names = ['Very low', 'Low', 'Medium', 'High', 'Very high'] #labelling each bin\n",
"\n",
"movie['cat_actor1_fb'] = pd.cut(movie['actor_1_facebook_likes'], bins, labels=group_names)\n",
"movie['scorebin_actor1_fb'] = pd.cut(movie['actor_1_facebook_likes'], bins)\n",
"\n",
"movie.head() # view the dataframe"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" <th>cat_actor1_fb</th>\n",
" <th>scorebin_actor1_fb</th>\n",
" <th>score_code</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class cat_actor1_fb scorebin_actor1_fb score_code \n",
"0 7.9 Good Low (500, 2000] 3 \n",
"1 7.1 Good Very high (10000, 700000] 3 \n",
"2 6.8 Good Very high (10000, 700000] 3 \n",
"3 8.5 Excellent Very high (10000, 700000] 1 \n",
"4 6.6 Good Low (500, 2000] 3 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"LE = LabelEncoder()\n",
"movie['score_code'] = LE.fit_transform(movie['imdb_score_class'])\n",
"movie.head()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\asus\\Anaconda3\\lib\\site-packages\\numpy\\core\\fromnumeric.py:2389: FutureWarning: Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.\n",
" return ptp(axis=axis, out=out, **kwargs)\n"
]
},
{
"data": {
"text/plain": [
"const 0.997801\n",
"duration 0.997994\n",
"facenumber_in_poster 0.998582\n",
"budget 0.000000\n",
"gross 0.000000\n",
"num_voted_users 0.924973\n",
"num_user_for_reviews 0.978588\n",
"num_critic_for_reviews 0.997783\n",
"movie_facebook_likes 0.983995\n",
"director_facebook_likes 0.994986\n",
"actor_1_facebook_likes 0.997458\n",
"actor_2_facebook_likes 0.997804\n",
"actor_3_facebook_likes 0.996341\n",
"cast_total_facebook_likes 0.997823\n",
"imdb_score 0.999620\n",
"score_code 0.000000\n",
"dtype: float64"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Feature Selection with Backward Elimination\n",
"import statsmodels.api as sm\n",
"\n",
"y = movie['score_code']\n",
"x = movie.drop(['movie_code','country','imdb_score_class', 'cat_actor1_fb','scorebin_actor1_fb'],axis=1)\n",
"x = x.astype(int)\n",
"\n",
"\n",
"x_1 = sm.add_constant(x) #Fitting sm.OLS model\n",
"model = sm.OLS(y,x_1).fit()\n",
"model.pvalues"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class \n",
"0 7.9 Good \n",
"1 7.1 Good \n",
"2 6.8 Good \n",
"3 8.5 Excellent \n",
"4 6.6 Good "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Merging to dataset\n",
"# Import data\n",
"movie1 = pd.read_csv(\"movie1.csv\")\n",
"movie2 = pd.read_csv(\"movie2.csv\")\n",
"\n",
"movie_complete = pd.merge(movie1, movie2, on='movie_code')\n",
"\n",
"movie_complete.head()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"#export data to csv\n",
"movie_complete.to_csv(\"movie_complete.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment