Skip to content

Instantly share code, notes, and snippets.

@shindishella
Created February 27, 2020 02:58
Show Gist options
  • Save shindishella/862636402282b2742c811367730851e2 to your computer and use it in GitHub Desktop.
Save shindishella/862636402282b2742c811367730851e2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import pandas and numpy\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class \n",
"0 7.9 Good \n",
"1 7.1 Good \n",
"2 6.8 Good \n",
"3 8.5 Excellent \n",
"4 6.6 Good "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#import dataset\n",
"movie = pd.read_csv(\"movie_metadata.csv\")\n",
"movie.head() #head of data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>156.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>258000000.0</td>\n",
" <td>336530303.0</td>\n",
" <td>383056</td>\n",
" <td>1902.0</td>\n",
" <td>392.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>24000.0</td>\n",
" <td>11000.0</td>\n",
" <td>4000.0</td>\n",
" <td>46055</td>\n",
" <td>6.2</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>100.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>260000000.0</td>\n",
" <td>200807262.0</td>\n",
" <td>294810</td>\n",
" <td>387.0</td>\n",
" <td>324.0</td>\n",
" <td>29000</td>\n",
" <td>15.0</td>\n",
" <td>799.0</td>\n",
" <td>553.0</td>\n",
" <td>284.0</td>\n",
" <td>2036</td>\n",
" <td>7.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>141.0</td>\n",
" <td>4.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>458991599.0</td>\n",
" <td>462669</td>\n",
" <td>1117.0</td>\n",
" <td>635.0</td>\n",
" <td>118000</td>\n",
" <td>0.0</td>\n",
" <td>26000.0</td>\n",
" <td>21000.0</td>\n",
" <td>19000.0</td>\n",
" <td>92000</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>153.0</td>\n",
" <td>3.0</td>\n",
" <td>UK</td>\n",
" <td>250000000.0</td>\n",
" <td>301956980.0</td>\n",
" <td>321795</td>\n",
" <td>973.0</td>\n",
" <td>375.0</td>\n",
" <td>10000</td>\n",
" <td>282.0</td>\n",
" <td>25000.0</td>\n",
" <td>11000.0</td>\n",
" <td>10000.0</td>\n",
" <td>58753</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>183.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>330249062.0</td>\n",
" <td>371639</td>\n",
" <td>3018.0</td>\n",
" <td>673.0</td>\n",
" <td>197000</td>\n",
" <td>0.0</td>\n",
" <td>15000.0</td>\n",
" <td>4000.0</td>\n",
" <td>2000.0</td>\n",
" <td>24450</td>\n",
" <td>6.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>11</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>209000000.0</td>\n",
" <td>200069408.0</td>\n",
" <td>240396</td>\n",
" <td>2367.0</td>\n",
" <td>434.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>18000.0</td>\n",
" <td>10000.0</td>\n",
" <td>903.0</td>\n",
" <td>29991</td>\n",
" <td>6.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>12</td>\n",
" <td>106.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>200000000.0</td>\n",
" <td>168368427.0</td>\n",
" <td>330784</td>\n",
" <td>1243.0</td>\n",
" <td>403.0</td>\n",
" <td>0</td>\n",
" <td>395.0</td>\n",
" <td>451.0</td>\n",
" <td>412.0</td>\n",
" <td>393.0</td>\n",
" <td>2023</td>\n",
" <td>6.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>13</td>\n",
" <td>151.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>423032628.0</td>\n",
" <td>522040</td>\n",
" <td>1832.0</td>\n",
" <td>313.0</td>\n",
" <td>5000</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48486</td>\n",
" <td>7.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>14</td>\n",
" <td>150.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>215000000.0</td>\n",
" <td>89289910.0</td>\n",
" <td>181792</td>\n",
" <td>711.0</td>\n",
" <td>450.0</td>\n",
" <td>48000</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>2000.0</td>\n",
" <td>1000.0</td>\n",
" <td>45757</td>\n",
" <td>6.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>15</td>\n",
" <td>143.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>291021565.0</td>\n",
" <td>548573</td>\n",
" <td>2536.0</td>\n",
" <td>733.0</td>\n",
" <td>118000</td>\n",
" <td>0.0</td>\n",
" <td>15000.0</td>\n",
" <td>3000.0</td>\n",
" <td>748.0</td>\n",
" <td>20495</td>\n",
" <td>7.2</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>16</td>\n",
" <td>150.0</td>\n",
" <td>4.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>141614023.0</td>\n",
" <td>149922</td>\n",
" <td>438.0</td>\n",
" <td>258.0</td>\n",
" <td>0</td>\n",
" <td>80.0</td>\n",
" <td>22000.0</td>\n",
" <td>216.0</td>\n",
" <td>201.0</td>\n",
" <td>22697</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>17</td>\n",
" <td>173.0</td>\n",
" <td>3.0</td>\n",
" <td>USA</td>\n",
" <td>220000000.0</td>\n",
" <td>623279547.0</td>\n",
" <td>995415</td>\n",
" <td>1722.0</td>\n",
" <td>703.0</td>\n",
" <td>123000</td>\n",
" <td>0.0</td>\n",
" <td>26000.0</td>\n",
" <td>21000.0</td>\n",
" <td>19000.0</td>\n",
" <td>87697</td>\n",
" <td>8.1</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>18</td>\n",
" <td>136.0</td>\n",
" <td>4.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>241063875.0</td>\n",
" <td>370704</td>\n",
" <td>484.0</td>\n",
" <td>448.0</td>\n",
" <td>58000</td>\n",
" <td>252.0</td>\n",
" <td>40000.0</td>\n",
" <td>11000.0</td>\n",
" <td>1000.0</td>\n",
" <td>54083</td>\n",
" <td>6.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>19</td>\n",
" <td>106.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>225000000.0</td>\n",
" <td>179020854.0</td>\n",
" <td>268154</td>\n",
" <td>341.0</td>\n",
" <td>451.0</td>\n",
" <td>40000</td>\n",
" <td>188.0</td>\n",
" <td>10000.0</td>\n",
" <td>816.0</td>\n",
" <td>718.0</td>\n",
" <td>12572</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>20</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>New Zealand</td>\n",
" <td>250000000.0</td>\n",
" <td>255108370.0</td>\n",
" <td>354228</td>\n",
" <td>802.0</td>\n",
" <td>422.0</td>\n",
" <td>65000</td>\n",
" <td>0.0</td>\n",
" <td>5000.0</td>\n",
" <td>972.0</td>\n",
" <td>773.0</td>\n",
" <td>9152</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"5 6 156.0 0.0 USA 258000000.0 \n",
"6 7 100.0 1.0 USA 260000000.0 \n",
"7 8 141.0 4.0 USA 250000000.0 \n",
"8 9 153.0 3.0 UK 250000000.0 \n",
"9 10 183.0 0.0 USA 250000000.0 \n",
"10 11 169.0 0.0 USA 209000000.0 \n",
"11 12 106.0 1.0 UK 200000000.0 \n",
"12 13 151.0 2.0 USA 225000000.0 \n",
"13 14 150.0 1.0 USA 215000000.0 \n",
"14 15 143.0 0.0 USA 225000000.0 \n",
"15 16 150.0 4.0 USA 225000000.0 \n",
"16 17 173.0 3.0 USA 220000000.0 \n",
"17 18 136.0 4.0 USA 250000000.0 \n",
"18 19 106.0 1.0 USA 225000000.0 \n",
"19 20 164.0 0.0 New Zealand 250000000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews \\\n",
"0 760505847.0 886204 3054.0 \n",
"1 309404152.0 471220 1238.0 \n",
"2 200074175.0 275868 994.0 \n",
"3 448130642.0 1144337 2701.0 \n",
"4 73058679.0 212204 738.0 \n",
"5 336530303.0 383056 1902.0 \n",
"6 200807262.0 294810 387.0 \n",
"7 458991599.0 462669 1117.0 \n",
"8 301956980.0 321795 973.0 \n",
"9 330249062.0 371639 3018.0 \n",
"10 200069408.0 240396 2367.0 \n",
"11 168368427.0 330784 1243.0 \n",
"12 423032628.0 522040 1832.0 \n",
"13 89289910.0 181792 711.0 \n",
"14 291021565.0 548573 2536.0 \n",
"15 141614023.0 149922 438.0 \n",
"16 623279547.0 995415 1722.0 \n",
"17 241063875.0 370704 484.0 \n",
"18 179020854.0 268154 341.0 \n",
"19 255108370.0 354228 802.0 \n",
"\n",
" num_critic_for_reviews movie_facebook_likes director_facebook_likes \\\n",
"0 723.0 33000 0.0 \n",
"1 302.0 0 563.0 \n",
"2 602.0 85000 0.0 \n",
"3 813.0 164000 22000.0 \n",
"4 462.0 24000 475.0 \n",
"5 392.0 0 0.0 \n",
"6 324.0 29000 15.0 \n",
"7 635.0 118000 0.0 \n",
"8 375.0 10000 282.0 \n",
"9 673.0 197000 0.0 \n",
"10 434.0 0 0.0 \n",
"11 403.0 0 395.0 \n",
"12 313.0 5000 563.0 \n",
"13 450.0 48000 563.0 \n",
"14 733.0 118000 0.0 \n",
"15 258.0 0 80.0 \n",
"16 703.0 123000 0.0 \n",
"17 448.0 58000 252.0 \n",
"18 451.0 40000 188.0 \n",
"19 422.0 65000 0.0 \n",
"\n",
" actor_1_facebook_likes actor_2_facebook_likes actor_3_facebook_likes \\\n",
"0 1000.0 936.0 855.0 \n",
"1 40000.0 5000.0 1000.0 \n",
"2 11000.0 393.0 161.0 \n",
"3 27000.0 23000.0 23000.0 \n",
"4 640.0 632.0 530.0 \n",
"5 24000.0 11000.0 4000.0 \n",
"6 799.0 553.0 284.0 \n",
"7 26000.0 21000.0 19000.0 \n",
"8 25000.0 11000.0 10000.0 \n",
"9 15000.0 4000.0 2000.0 \n",
"10 18000.0 10000.0 903.0 \n",
"11 451.0 412.0 393.0 \n",
"12 40000.0 5000.0 1000.0 \n",
"13 40000.0 2000.0 1000.0 \n",
"14 15000.0 3000.0 748.0 \n",
"15 22000.0 216.0 201.0 \n",
"16 26000.0 21000.0 19000.0 \n",
"17 40000.0 11000.0 1000.0 \n",
"18 10000.0 816.0 718.0 \n",
"19 5000.0 972.0 773.0 \n",
"\n",
" cast_total_facebook_likes imdb_score imdb_score_class \n",
"0 4834 7.9 Good \n",
"1 48350 7.1 Good \n",
"2 11700 6.8 Good \n",
"3 106759 8.5 Excellent \n",
"4 1873 6.6 Good \n",
"5 46055 6.2 Good \n",
"6 2036 7.8 Good \n",
"7 92000 7.5 Good \n",
"8 58753 7.5 Good \n",
"9 24450 6.9 Good \n",
"10 29991 6.1 Good \n",
"11 2023 6.7 Good \n",
"12 48486 7.3 Good \n",
"13 45757 6.5 Good \n",
"14 20495 7.2 Good \n",
"15 22697 6.6 Good \n",
"16 87697 8.1 Excellent \n",
"17 54083 6.7 Good \n",
"18 12572 6.8 Good \n",
"19 9152 7.5 Good "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.head(n=20)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5037</th>\n",
" <td>5038</td>\n",
" <td>87.0</td>\n",
" <td>2.0</td>\n",
" <td>Canada</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>629</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>84</td>\n",
" <td>2.0</td>\n",
" <td>637.0</td>\n",
" <td>470.0</td>\n",
" <td>318.0</td>\n",
" <td>2283</td>\n",
" <td>7.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>5039</td>\n",
" <td>43.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>73839</td>\n",
" <td>359.0</td>\n",
" <td>43.0</td>\n",
" <td>32000</td>\n",
" <td>NaN</td>\n",
" <td>841.0</td>\n",
" <td>593.0</td>\n",
" <td>319.0</td>\n",
" <td>1753</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>5040</td>\n",
" <td>76.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1400.0</td>\n",
" <td>NaN</td>\n",
" <td>38</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>16</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>5041</td>\n",
" <td>100.0</td>\n",
" <td>5.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>10443.0</td>\n",
" <td>1255</td>\n",
" <td>9.0</td>\n",
" <td>14.0</td>\n",
" <td>660</td>\n",
" <td>0.0</td>\n",
" <td>946.0</td>\n",
" <td>719.0</td>\n",
" <td>489.0</td>\n",
" <td>2386</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>5042</td>\n",
" <td>90.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1100.0</td>\n",
" <td>85222.0</td>\n",
" <td>4285</td>\n",
" <td>84.0</td>\n",
" <td>43.0</td>\n",
" <td>456</td>\n",
" <td>16.0</td>\n",
" <td>86.0</td>\n",
" <td>23.0</td>\n",
" <td>16.0</td>\n",
" <td>163</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget gross \\\n",
"5037 5038 87.0 2.0 Canada NaN NaN \n",
"5038 5039 43.0 1.0 USA NaN NaN \n",
"5039 5040 76.0 0.0 USA 1400.0 NaN \n",
"5040 5041 100.0 5.0 USA NaN 10443.0 \n",
"5041 5042 90.0 0.0 USA 1100.0 85222.0 \n",
"\n",
" num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"5037 629 6.0 1.0 \n",
"5038 73839 359.0 43.0 \n",
"5039 38 3.0 13.0 \n",
"5040 1255 9.0 14.0 \n",
"5041 4285 84.0 43.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"5037 84 2.0 637.0 \n",
"5038 32000 NaN 841.0 \n",
"5039 16 0.0 0.0 \n",
"5040 660 0.0 946.0 \n",
"5041 456 16.0 86.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes \\\n",
"5037 470.0 318.0 \n",
"5038 593.0 319.0 \n",
"5039 0.0 0.0 \n",
"5040 719.0 489.0 \n",
"5041 23.0 16.0 \n",
"\n",
" cast_total_facebook_likes imdb_score imdb_score_class \n",
"5037 2283 7.7 Good \n",
"5038 1753 7.5 Good \n",
"5039 0 6.3 Good \n",
"5040 2386 6.3 Good \n",
"5041 163 6.6 Good "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.tail() #tail of data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5022</th>\n",
" <td>5023</td>\n",
" <td>88.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>15000.0</td>\n",
" <td>76382.0</td>\n",
" <td>1194</td>\n",
" <td>8.0</td>\n",
" <td>22.0</td>\n",
" <td>324</td>\n",
" <td>38.0</td>\n",
" <td>331.0</td>\n",
" <td>212.0</td>\n",
" <td>211.0</td>\n",
" <td>1546</td>\n",
" <td>6.2</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5023</th>\n",
" <td>5024</td>\n",
" <td>78.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>20000.0</td>\n",
" <td>NaN</td>\n",
" <td>1771</td>\n",
" <td>35.0</td>\n",
" <td>42.0</td>\n",
" <td>835</td>\n",
" <td>91.0</td>\n",
" <td>407.0</td>\n",
" <td>91.0</td>\n",
" <td>86.0</td>\n",
" <td>674</td>\n",
" <td>4.0</td>\n",
" <td>Bad</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5024</th>\n",
" <td>5025</td>\n",
" <td>108.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>10000.0</td>\n",
" <td>180483.0</td>\n",
" <td>16792</td>\n",
" <td>183.0</td>\n",
" <td>73.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>462.0</td>\n",
" <td>143.0</td>\n",
" <td>105.0</td>\n",
" <td>760</td>\n",
" <td>6.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5025</th>\n",
" <td>5026</td>\n",
" <td>110.0</td>\n",
" <td>1.0</td>\n",
" <td>France</td>\n",
" <td>4500.0</td>\n",
" <td>136007.0</td>\n",
" <td>3924</td>\n",
" <td>39.0</td>\n",
" <td>81.0</td>\n",
" <td>171</td>\n",
" <td>107.0</td>\n",
" <td>576.0</td>\n",
" <td>133.0</td>\n",
" <td>45.0</td>\n",
" <td>776</td>\n",
" <td>6.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5026</th>\n",
" <td>5027</td>\n",
" <td>90.0</td>\n",
" <td>0.0</td>\n",
" <td>Iran</td>\n",
" <td>10000.0</td>\n",
" <td>673780.0</td>\n",
" <td>4555</td>\n",
" <td>26.0</td>\n",
" <td>64.0</td>\n",
" <td>697</td>\n",
" <td>397.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5027</th>\n",
" <td>5028</td>\n",
" <td>83.0</td>\n",
" <td>0.0</td>\n",
" <td>Ireland</td>\n",
" <td>10000.0</td>\n",
" <td>NaN</td>\n",
" <td>57</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>105</td>\n",
" <td>18.0</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>15</td>\n",
" <td>6.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5028</th>\n",
" <td>5029</td>\n",
" <td>111.0</td>\n",
" <td>0.0</td>\n",
" <td>Japan</td>\n",
" <td>1000000.0</td>\n",
" <td>94596.0</td>\n",
" <td>6318</td>\n",
" <td>50.0</td>\n",
" <td>78.0</td>\n",
" <td>817</td>\n",
" <td>62.0</td>\n",
" <td>89.0</td>\n",
" <td>13.0</td>\n",
" <td>6.0</td>\n",
" <td>115</td>\n",
" <td>7.4</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5029</th>\n",
" <td>5030</td>\n",
" <td>84.0</td>\n",
" <td>2.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>156</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>22</td>\n",
" <td>5.0</td>\n",
" <td>21.0</td>\n",
" <td>20.0</td>\n",
" <td>12.0</td>\n",
" <td>62</td>\n",
" <td>6.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5030</th>\n",
" <td>5031</td>\n",
" <td>82.0</td>\n",
" <td>NaN</td>\n",
" <td>USA</td>\n",
" <td>200000.0</td>\n",
" <td>NaN</td>\n",
" <td>133</td>\n",
" <td>8.0</td>\n",
" <td>13.0</td>\n",
" <td>424</td>\n",
" <td>120.0</td>\n",
" <td>785.0</td>\n",
" <td>98.0</td>\n",
" <td>84.0</td>\n",
" <td>1111</td>\n",
" <td>5.4</td>\n",
" <td>Fair</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5031</th>\n",
" <td>5032</td>\n",
" <td>98.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>438</td>\n",
" <td>14.0</td>\n",
" <td>10.0</td>\n",
" <td>20</td>\n",
" <td>3.0</td>\n",
" <td>789.0</td>\n",
" <td>194.0</td>\n",
" <td>152.0</td>\n",
" <td>1186</td>\n",
" <td>6.4</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5032</th>\n",
" <td>5033</td>\n",
" <td>77.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>7000.0</td>\n",
" <td>424760.0</td>\n",
" <td>72639</td>\n",
" <td>371.0</td>\n",
" <td>143.0</td>\n",
" <td>19000</td>\n",
" <td>291.0</td>\n",
" <td>291.0</td>\n",
" <td>45.0</td>\n",
" <td>8.0</td>\n",
" <td>368</td>\n",
" <td>7.0</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5033</th>\n",
" <td>5034</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>Philippines</td>\n",
" <td>7000.0</td>\n",
" <td>70071.0</td>\n",
" <td>589</td>\n",
" <td>35.0</td>\n",
" <td>35.0</td>\n",
" <td>74</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5034</th>\n",
" <td>5035</td>\n",
" <td>81.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>7000.0</td>\n",
" <td>2040920.0</td>\n",
" <td>52055</td>\n",
" <td>130.0</td>\n",
" <td>56.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>121.0</td>\n",
" <td>20.0</td>\n",
" <td>6.0</td>\n",
" <td>147</td>\n",
" <td>6.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5035</th>\n",
" <td>5036</td>\n",
" <td>84.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>3250.0</td>\n",
" <td>NaN</td>\n",
" <td>36</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>2.0</td>\n",
" <td>45.0</td>\n",
" <td>44.0</td>\n",
" <td>2.0</td>\n",
" <td>93</td>\n",
" <td>7.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5036</th>\n",
" <td>5037</td>\n",
" <td>95.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>9000.0</td>\n",
" <td>4584.0</td>\n",
" <td>1338</td>\n",
" <td>14.0</td>\n",
" <td>14.0</td>\n",
" <td>413</td>\n",
" <td>0.0</td>\n",
" <td>296.0</td>\n",
" <td>205.0</td>\n",
" <td>133.0</td>\n",
" <td>690</td>\n",
" <td>6.4</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5037</th>\n",
" <td>5038</td>\n",
" <td>87.0</td>\n",
" <td>2.0</td>\n",
" <td>Canada</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>629</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>84</td>\n",
" <td>2.0</td>\n",
" <td>637.0</td>\n",
" <td>470.0</td>\n",
" <td>318.0</td>\n",
" <td>2283</td>\n",
" <td>7.7</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>5039</td>\n",
" <td>43.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>73839</td>\n",
" <td>359.0</td>\n",
" <td>43.0</td>\n",
" <td>32000</td>\n",
" <td>NaN</td>\n",
" <td>841.0</td>\n",
" <td>593.0</td>\n",
" <td>319.0</td>\n",
" <td>1753</td>\n",
" <td>7.5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>5040</td>\n",
" <td>76.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1400.0</td>\n",
" <td>NaN</td>\n",
" <td>38</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>16</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>5041</td>\n",
" <td>100.0</td>\n",
" <td>5.0</td>\n",
" <td>USA</td>\n",
" <td>NaN</td>\n",
" <td>10443.0</td>\n",
" <td>1255</td>\n",
" <td>9.0</td>\n",
" <td>14.0</td>\n",
" <td>660</td>\n",
" <td>0.0</td>\n",
" <td>946.0</td>\n",
" <td>719.0</td>\n",
" <td>489.0</td>\n",
" <td>2386</td>\n",
" <td>6.3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>5042</td>\n",
" <td>90.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>1100.0</td>\n",
" <td>85222.0</td>\n",
" <td>4285</td>\n",
" <td>84.0</td>\n",
" <td>43.0</td>\n",
" <td>456</td>\n",
" <td>16.0</td>\n",
" <td>86.0</td>\n",
" <td>23.0</td>\n",
" <td>16.0</td>\n",
" <td>163</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"5022 5023 88.0 2.0 USA 15000.0 \n",
"5023 5024 78.0 0.0 USA 20000.0 \n",
"5024 5025 108.0 2.0 USA 10000.0 \n",
"5025 5026 110.0 1.0 France 4500.0 \n",
"5026 5027 90.0 0.0 Iran 10000.0 \n",
"5027 5028 83.0 0.0 Ireland 10000.0 \n",
"5028 5029 111.0 0.0 Japan 1000000.0 \n",
"5029 5030 84.0 2.0 USA NaN \n",
"5030 5031 82.0 NaN USA 200000.0 \n",
"5031 5032 98.0 1.0 USA NaN \n",
"5032 5033 77.0 0.0 USA 7000.0 \n",
"5033 5034 80.0 0.0 Philippines 7000.0 \n",
"5034 5035 81.0 0.0 USA 7000.0 \n",
"5035 5036 84.0 0.0 USA 3250.0 \n",
"5036 5037 95.0 1.0 USA 9000.0 \n",
"5037 5038 87.0 2.0 Canada NaN \n",
"5038 5039 43.0 1.0 USA NaN \n",
"5039 5040 76.0 0.0 USA 1400.0 \n",
"5040 5041 100.0 5.0 USA NaN \n",
"5041 5042 90.0 0.0 USA 1100.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews \\\n",
"5022 76382.0 1194 8.0 \n",
"5023 NaN 1771 35.0 \n",
"5024 180483.0 16792 183.0 \n",
"5025 136007.0 3924 39.0 \n",
"5026 673780.0 4555 26.0 \n",
"5027 NaN 57 1.0 \n",
"5028 94596.0 6318 50.0 \n",
"5029 NaN 156 3.0 \n",
"5030 NaN 133 8.0 \n",
"5031 NaN 438 14.0 \n",
"5032 424760.0 72639 371.0 \n",
"5033 70071.0 589 35.0 \n",
"5034 2040920.0 52055 130.0 \n",
"5035 NaN 36 1.0 \n",
"5036 4584.0 1338 14.0 \n",
"5037 NaN 629 6.0 \n",
"5038 NaN 73839 359.0 \n",
"5039 NaN 38 3.0 \n",
"5040 10443.0 1255 9.0 \n",
"5041 85222.0 4285 84.0 \n",
"\n",
" num_critic_for_reviews movie_facebook_likes director_facebook_likes \\\n",
"5022 22.0 324 38.0 \n",
"5023 42.0 835 91.0 \n",
"5024 73.0 0 0.0 \n",
"5025 81.0 171 107.0 \n",
"5026 64.0 697 397.0 \n",
"5027 12.0 105 18.0 \n",
"5028 78.0 817 62.0 \n",
"5029 NaN 22 5.0 \n",
"5030 13.0 424 120.0 \n",
"5031 10.0 20 3.0 \n",
"5032 143.0 19000 291.0 \n",
"5033 35.0 74 0.0 \n",
"5034 56.0 0 0.0 \n",
"5035 NaN 4 2.0 \n",
"5036 14.0 413 0.0 \n",
"5037 1.0 84 2.0 \n",
"5038 43.0 32000 NaN \n",
"5039 13.0 16 0.0 \n",
"5040 14.0 660 0.0 \n",
"5041 43.0 456 16.0 \n",
"\n",
" actor_1_facebook_likes actor_2_facebook_likes actor_3_facebook_likes \\\n",
"5022 331.0 212.0 211.0 \n",
"5023 407.0 91.0 86.0 \n",
"5024 462.0 143.0 105.0 \n",
"5025 576.0 133.0 45.0 \n",
"5026 5.0 0.0 0.0 \n",
"5027 10.0 5.0 0.0 \n",
"5028 89.0 13.0 6.0 \n",
"5029 21.0 20.0 12.0 \n",
"5030 785.0 98.0 84.0 \n",
"5031 789.0 194.0 152.0 \n",
"5032 291.0 45.0 8.0 \n",
"5033 0.0 0.0 0.0 \n",
"5034 121.0 20.0 6.0 \n",
"5035 45.0 44.0 2.0 \n",
"5036 296.0 205.0 133.0 \n",
"5037 637.0 470.0 318.0 \n",
"5038 841.0 593.0 319.0 \n",
"5039 0.0 0.0 0.0 \n",
"5040 946.0 719.0 489.0 \n",
"5041 86.0 23.0 16.0 \n",
"\n",
" cast_total_facebook_likes imdb_score imdb_score_class \n",
"5022 1546 6.2 Good \n",
"5023 674 4.0 Bad \n",
"5024 760 6.1 Good \n",
"5025 776 6.9 Good \n",
"5026 5 7.5 Good \n",
"5027 15 6.7 Good \n",
"5028 115 7.4 Good \n",
"5029 62 6.1 Good \n",
"5030 1111 5.4 Fair \n",
"5031 1186 6.4 Good \n",
"5032 368 7.0 Good \n",
"5033 0 6.3 Good \n",
"5034 147 6.9 Good \n",
"5035 93 7.8 Good \n",
"5036 690 6.4 Good \n",
"5037 2283 7.7 Good \n",
"5038 1753 7.5 Good \n",
"5039 0 6.3 Good \n",
"5040 2386 6.3 Good \n",
"5041 163 6.6 Good "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.tail(n=20)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5042.000000</td>\n",
" <td>5028.000000</td>\n",
" <td>5029.000000</td>\n",
" <td>4.551000e+03</td>\n",
" <td>4.159000e+03</td>\n",
" <td>5.042000e+03</td>\n",
" <td>5022.000000</td>\n",
" <td>4993.000000</td>\n",
" <td>5042.000000</td>\n",
" <td>4938.000000</td>\n",
" <td>5035.000000</td>\n",
" <td>5029.000000</td>\n",
" <td>5020.000000</td>\n",
" <td>5042.000000</td>\n",
" <td>5042.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2521.500000</td>\n",
" <td>107.201074</td>\n",
" <td>1.371446</td>\n",
" <td>3.975262e+07</td>\n",
" <td>4.846841e+07</td>\n",
" <td>8.368475e+04</td>\n",
" <td>272.770808</td>\n",
" <td>140.194272</td>\n",
" <td>7527.457160</td>\n",
" <td>686.621709</td>\n",
" <td>6561.323932</td>\n",
" <td>1652.080533</td>\n",
" <td>645.009761</td>\n",
" <td>9700.959143</td>\n",
" <td>6.442007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1455.644359</td>\n",
" <td>25.197441</td>\n",
" <td>2.013683</td>\n",
" <td>2.061149e+08</td>\n",
" <td>6.845299e+07</td>\n",
" <td>1.384940e+05</td>\n",
" <td>377.982886</td>\n",
" <td>121.601675</td>\n",
" <td>19322.070537</td>\n",
" <td>2813.602405</td>\n",
" <td>15021.977635</td>\n",
" <td>4042.774685</td>\n",
" <td>1665.041728</td>\n",
" <td>18165.101925</td>\n",
" <td>1.125189</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>7.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.180000e+02</td>\n",
" <td>1.620000e+02</td>\n",
" <td>5.000000e+00</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1261.250000</td>\n",
" <td>93.000000</td>\n",
" <td>0.000000</td>\n",
" <td>6.000000e+06</td>\n",
" <td>5.340988e+06</td>\n",
" <td>8.599250e+03</td>\n",
" <td>65.000000</td>\n",
" <td>50.000000</td>\n",
" <td>0.000000</td>\n",
" <td>7.000000</td>\n",
" <td>614.500000</td>\n",
" <td>281.000000</td>\n",
" <td>133.000000</td>\n",
" <td>1411.250000</td>\n",
" <td>5.800000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2521.500000</td>\n",
" <td>103.000000</td>\n",
" <td>1.000000</td>\n",
" <td>2.000000e+07</td>\n",
" <td>2.551750e+07</td>\n",
" <td>3.437100e+04</td>\n",
" <td>156.000000</td>\n",
" <td>110.000000</td>\n",
" <td>166.000000</td>\n",
" <td>49.000000</td>\n",
" <td>988.000000</td>\n",
" <td>595.000000</td>\n",
" <td>371.500000</td>\n",
" <td>3091.000000</td>\n",
" <td>6.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>3781.750000</td>\n",
" <td>118.000000</td>\n",
" <td>2.000000</td>\n",
" <td>4.500000e+07</td>\n",
" <td>6.230944e+07</td>\n",
" <td>9.634700e+04</td>\n",
" <td>326.000000</td>\n",
" <td>195.000000</td>\n",
" <td>3000.000000</td>\n",
" <td>194.750000</td>\n",
" <td>11000.000000</td>\n",
" <td>918.000000</td>\n",
" <td>636.000000</td>\n",
" <td>13758.750000</td>\n",
" <td>7.200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>5042.000000</td>\n",
" <td>511.000000</td>\n",
" <td>43.000000</td>\n",
" <td>1.221550e+10</td>\n",
" <td>7.605058e+08</td>\n",
" <td>1.689764e+06</td>\n",
" <td>5060.000000</td>\n",
" <td>813.000000</td>\n",
" <td>349000.000000</td>\n",
" <td>23000.000000</td>\n",
" <td>640000.000000</td>\n",
" <td>137000.000000</td>\n",
" <td>23000.000000</td>\n",
" <td>656730.000000</td>\n",
" <td>9.500000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster budget \\\n",
"count 5042.000000 5028.000000 5029.000000 4.551000e+03 \n",
"mean 2521.500000 107.201074 1.371446 3.975262e+07 \n",
"std 1455.644359 25.197441 2.013683 2.061149e+08 \n",
"min 1.000000 7.000000 0.000000 2.180000e+02 \n",
"25% 1261.250000 93.000000 0.000000 6.000000e+06 \n",
"50% 2521.500000 103.000000 1.000000 2.000000e+07 \n",
"75% 3781.750000 118.000000 2.000000 4.500000e+07 \n",
"max 5042.000000 511.000000 43.000000 1.221550e+10 \n",
"\n",
" gross num_voted_users num_user_for_reviews \\\n",
"count 4.159000e+03 5.042000e+03 5022.000000 \n",
"mean 4.846841e+07 8.368475e+04 272.770808 \n",
"std 6.845299e+07 1.384940e+05 377.982886 \n",
"min 1.620000e+02 5.000000e+00 1.000000 \n",
"25% 5.340988e+06 8.599250e+03 65.000000 \n",
"50% 2.551750e+07 3.437100e+04 156.000000 \n",
"75% 6.230944e+07 9.634700e+04 326.000000 \n",
"max 7.605058e+08 1.689764e+06 5060.000000 \n",
"\n",
" num_critic_for_reviews movie_facebook_likes director_facebook_likes \\\n",
"count 4993.000000 5042.000000 4938.000000 \n",
"mean 140.194272 7527.457160 686.621709 \n",
"std 121.601675 19322.070537 2813.602405 \n",
"min 1.000000 0.000000 0.000000 \n",
"25% 50.000000 0.000000 7.000000 \n",
"50% 110.000000 166.000000 49.000000 \n",
"75% 195.000000 3000.000000 194.750000 \n",
"max 813.000000 349000.000000 23000.000000 \n",
"\n",
" actor_1_facebook_likes actor_2_facebook_likes actor_3_facebook_likes \\\n",
"count 5035.000000 5029.000000 5020.000000 \n",
"mean 6561.323932 1652.080533 645.009761 \n",
"std 15021.977635 4042.774685 1665.041728 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 614.500000 281.000000 133.000000 \n",
"50% 988.000000 595.000000 371.500000 \n",
"75% 11000.000000 918.000000 636.000000 \n",
"max 640000.000000 137000.000000 23000.000000 \n",
"\n",
" cast_total_facebook_likes imdb_score \n",
"count 5042.000000 5042.000000 \n",
"mean 9700.959143 6.442007 \n",
"std 18165.101925 1.125189 \n",
"min 0.000000 1.600000 \n",
"25% 1411.250000 5.800000 \n",
"50% 3091.000000 6.600000 \n",
"75% 13758.750000 7.200000 \n",
"max 656730.000000 9.500000 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" #descriptive statistics\n",
"movie.describe()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 14\n",
"facenumber_in_poster 13\n",
"country 4\n",
"budget 491\n",
"gross 883\n",
"num_voted_users 0\n",
"num_user_for_reviews 20\n",
"num_critic_for_reviews 49\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 104\n",
"actor_1_facebook_likes 7\n",
"actor_2_facebook_likes 13\n",
"actor_3_facebook_likes 22\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#number of NA for each variable\n",
"np.sum(movie.isnull()) "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 14\n",
"facenumber_in_poster 13\n",
"country 4\n",
"budget 0\n",
"gross 883\n",
"num_voted_users 0\n",
"num_user_for_reviews 20\n",
"num_critic_for_reviews 49\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 104\n",
"actor_1_facebook_likes 7\n",
"actor_2_facebook_likes 13\n",
"actor_3_facebook_likes 22\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#fill NA with a value\n",
"movie['budget']=movie['budget'].fillna(3.975262e+07)\n",
"np.sum(movie.isnull())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 0\n",
"facenumber_in_poster 13\n",
"country 4\n",
"budget 0\n",
"gross 883\n",
"num_voted_users 0\n",
"num_user_for_reviews 20\n",
"num_critic_for_reviews 49\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 0\n",
"actor_1_facebook_likes 7\n",
"actor_2_facebook_likes 13\n",
"actor_3_facebook_likes 22\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Fill missing value with mean\n",
"movie['duration']=movie['duration'].fillna((movie['duration'].mean()))\n",
"\n",
"# Fill missing value with Median\n",
"movie['director_facebook_likes']=movie['director_facebook_likes'].fillna((movie['director_facebook_likes'].median()))\n",
"\n",
"np.sum(movie.isnull())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 0\n",
"facenumber_in_poster 0\n",
"country 4\n",
"budget 0\n",
"gross 0\n",
"num_voted_users 0\n",
"num_user_for_reviews 0\n",
"num_critic_for_reviews 0\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 0\n",
"actor_1_facebook_likes 0\n",
"actor_2_facebook_likes 0\n",
"actor_3_facebook_likes 0\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie['budget']=movie['budget'].fillna((movie['budget'].mean()))\n",
"movie['actor_1_facebook_likes']=movie['actor_1_facebook_likes'].fillna((movie['actor_1_facebook_likes'].mean()))\n",
"movie['actor_2_facebook_likes']=movie['actor_2_facebook_likes'].fillna((movie['actor_2_facebook_likes'].mean()))\n",
"movie['actor_3_facebook_likes']=movie['actor_3_facebook_likes'].fillna((movie['actor_3_facebook_likes'].mean()))\n",
"movie['gross']=movie['gross'].fillna((movie['gross'].mean()))\n",
"movie['facenumber_in_poster']=movie['facenumber_in_poster'].fillna((movie['facenumber_in_poster'].mean()))\n",
"movie['num_user_for_reviews']=movie['num_user_for_reviews'].fillna((movie['num_user_for_reviews'].mean()))\n",
"movie['num_critic_for_reviews']=movie['num_critic_for_reviews'].fillna((movie['num_critic_for_reviews'].mean()))\n",
"\n",
"np.sum(movie.isnull())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"23000000.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#calculate median from a variable\n",
"movie['budget'].median()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Get mean of each bin/category for actor_1_facebook_likes\n",
"rata=movie.groupby(['country'])['director_facebook_likes'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\asus\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: The signature of `Series.to_csv` was aligned to that of `DataFrame.to_csv`, and argument 'header' will change its default value from False to True: please pass an explicit value to suppress this warning.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"rata.to_csv(\"rata.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 USA\n",
"dtype: object"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#mode from a variable\n",
"movie['country'].mode()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"from scipy.stats import mode\n",
"\n",
"# Fill missing value with mode for qualitative/categorical data\n",
"movie['country']=movie['country'].fillna(movie['country'].mode()[0])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"movie_code 0\n",
"duration 0\n",
"facenumber_in_poster 0\n",
"country 0\n",
"budget 0\n",
"gross 0\n",
"num_voted_users 0\n",
"num_user_for_reviews 0\n",
"num_critic_for_reviews 0\n",
"movie_facebook_likes 0\n",
"director_facebook_likes 0\n",
"actor_1_facebook_likes 0\n",
"actor_2_facebook_likes 0\n",
"actor_3_facebook_likes 0\n",
"cast_total_facebook_likes 0\n",
"imdb_score 0\n",
"imdb_score_class 0\n",
"dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"#export data to csv\n",
"movie.to_csv(\"hasil_movie.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"#Noisy data\n",
"#Outlier identification using boxplot, scatterplot\n",
"#Identifying outliers with boxplot\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAAEKCAYAAAAl5S8KAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAADohJREFUeJzt3W9sXXd9x/HPJ/VI24RtXZJJUUu5gPmTiGyDeIN1qA2l7ZJGaxGUiQmoJ6WrSKXkwcSDSXhK2vnRpm3qKtgaMoTLtPKnQlvGaLYU6EoL7XBK09B00Au4kC4aqUu6xesyXH/34Bxn1zeO77Vz77lfx++XZOX63uP8fj/5nndOzvU9dkQIANB7y3o9AQBAgSADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiibz4br169Omq1WpemAgDnp4MHDz4fEWtabTevINdqNY2Oji58VgCwBNl+tp3tOGUBAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0AS8/qdegt1yy236MSJE7rqqqu0Y8eOKoYEgEWnkiAfO3ZMExMTqtfrVQwHAIsSpywAIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASVQS5FOnTkmSnnvuOd11111VDAkAi04lQZ6ampIkvfTSS6rX61UMCQCLDqcsACAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJBEpUGemJjQoUOHtGnTpjM+6vW66vW6tm7dqnq9LkkaHx/X9u3bddttt2l0dHTGY83Gx8e1c+dO1et17dy5U+Pj411dy/R4s41T9Vw6Ya71AEtZlftGmiPk4eFhDQ8Pa2JiQsPDw5KkkZERPf300zpy5Ih2794947FmIyMjOnz4sIaHh3X48GHdc889XZ3v9HizjVP1XDphrvUAS1mV+0aaII+NjWlsbOz07YMHD+r+++8//fjJkydPP9Z8lDw+Pq79+/crIjQ2NqaI0P79+7v2L1rjeM3jVD2XTphrPcBSVvW+kSbIzXbt2qXJyclZH2s+Sh4ZGdHU1NSM+15++eWu/YvWOF7zOFXPpRPmWg+wlFW9b7QMsu1bbY/aHj1+/HhXJ9Po5MmTiohZH5s+kp72wAMPnBHvyclJHThwoCtzaxyveZyq59IJc60HWMqq3jdaBjki9kTEQEQMrFmzpquTabRy5UrZnvWxWq024/NrrrlGfX19M+7r6+vTtdde25W5NY7XPE7Vc+mEudYDLGVV7xtpT1ncfvvtZ4Rt2tDQ0IzPBwcHtWzZzKVccMEFuvnmm7syt8bxmsepei6dMNd6gKWs6n0jTZBrtdrpI99araaNGzdqy5Ytpx9fuXLl6cf6+/tnfO2qVau0efNm2VatVpNtbd68WatWrerKXBvHax6n6rl0wlzrAZayqveNNEEeGhrS0NCQVqxYcfoIeHBwUOvWrdP69eu1e/fuGY81Gxwc1IYNGzQ0NKQNGzZ0/V+y6fFmG6fquXTCXOsBlrIq9w2f7YWz2QwMDMTo6Oi8B7n66qs1NTWlFStWqL+/X3feeee8/w4AWKxsH4yIgVbbpTlCBoCljiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkUUmQly0rhrnooovU399fxZAAsOhUEuTly5dLki699FLt2LGjiiEBYNHhlAUAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEkQZABIgiADQBIEGQCSIMgAkARBBoAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACTRV8Uga9eu1YkTJ9Tf31/FcACwKFUS5L1791YxDAAsapyyAIAkCDIAJEGQASAJggwASRBkAEiCIANAEgQZAJIgyACQBEEGgCQIMgAkQZABIAmCDABJEGQASIIgA0ASBBkAkiDIAJAEQQaAJAgyACRBkAEgCYIMAEk4Itrf2D4u6dkFjrVa0vML/NpMWEce58MaJNaRTTfW8eqIWNNqo3kF+VzYHo2IgUoG6yLWkcf5sAaJdWTTy3VwygIAkiDIAJBElUHeU+FY3cQ68jgf1iCxjmx6to7KziEDAObGKQsASKLjQba92fZ3bNdt/8Esjy+3/dny8cds1zo9h3PVxhp+3/YR20/a/rLtV/dinq20WkfDdjfZDtspXyFvZx22f7v8njxl+2+rnmM72nheXW77q7a/VT63ru/FPOdi+5O2f2z722d53Lb/olzjk7bfWvUc29HGOj5Qzv9J21+3/cuVTCwiOvYh6QJJ35P0WkmvkHRI0vqmbW6T9Ffl7fdL+mwn51DRGt4p6eLy9vZsa2h3HeV2r5T0kKRHJQ30et4L/H68XtK3JF1Sfv6LvZ73AtexR9L28vZ6SWO9nvcs67hS0lslffssj18v6X5JlvR2SY/1es4LXMcVDc+nLVWto9NHyL8mqR4R34+I/5X0GUk3Nm1zo6SR8vZ9kt5l2x2ex7louYaI+GpE/Hf56aOSLqt4ju1o53shSX8k6Y8l/U+Vk5uHdtbxe5I+FhE/kaSI+HHFc2xHO+sIST9b3v45Sf9e4fzaEhEPSXphjk1ulHRPFB6V9PO211Yzu/a1WkdEfH36+aQK9/FOB/lSST9q+Pxoed+s20TEpKQXJa3q8DzORTtraLRNxRFBNi3XYfstkl4VEV+scmLz1M734w2S3mD7EduP2t5c2eza1846dkv6oO2jkr4kaUc1U+uo+e4/i0Fl+3hfh/++2Y50m3+Mo51teqnt+dn+oKQBSVd1dUYLM+c6bC+T9OeSfreqCS1QO9+PPhWnLTapOJL5mu03R8SJLs9tPtpZx+9I+lRE/KntX5f06XIdU92fXsdk37/nxfY7VQT5HVWM1+kj5KOSXtXw+WU6879dp7ex3afiv2Zz/Reoau2sQbavkfRRSTdExKmK5jYfrdbxSklvlvSg7TEV5/v2JXxhr93n1N9HxE8j4geSvqMi0Jm0s45tkj4nSRHxDUkXqriuwmLS1v6zGNj+JUl7Jd0YEeNVjNnpIH9T0uttv8b2K1S8aLevaZt9kgbL2zdJ+kqUZ86TaLmG8r/6d6uIccbzlVKLdUTEixGxOiJqEVFTcZ7shogY7c10z6qd59TfqXihVbZXqziF8f1KZ9laO+v4oaR3SZLtdSqCfLzSWZ67fZJuLn/a4u2SXoyIY72e1HzZvlzSFyR9KCK+W9nAXXj18npJ31XxivJHy/vuULGzS8WT7POS6pL+VdJre/lq6wLX8ICk/5D0RPmxr9dzXsg6mrZ9UAl/yqLN74cl/ZmkI5IOS3p/r+e8wHWsl/SIip/AeELSdb2e8yxruFfSMUk/VXE0vE3ShyV9uOF78bFyjYcTP6darWOvpJ807OOjVcyLd+oBQBK8Uw8AkiDIAJAEQQaAJAgyACRBkAEsea0uNtS07ZW2H7c9afumpscGbT9Tfgye7e84G4KMnrNda2dHmOPrx8qfP17I177b9vqFjo3zxqcktfuW+x+qeIfrjKsK2v4FSbskvU3FtUt22b5kPpMgyFjq3q3i53+xhMUsFxuy/Trb+20ftP01228qtx2LiCclNb+l/TclHYiIF6K4MNEBtR95SQQZefTZHimvP3uf7Ysbj3xtD9h+sLy9yvY/l9cNvlsN10+w/Ye2/832Adv32v5Ief8ZO5ftKyTdIOlPbD9h+3XVLxuJ7ZG0IyI2SvqIpI+32P6cL6zU6YsLAQv1RknbIuIR259Ucd3ss9kl6eGIuMP2Vkm3SkW0Jb1X0ltUPLcfl3Sw/Jo9Kt6F9Yztt0n6eERcbXufpC9GxH3dWRYWI9srVVwT+fMNVwde3urLZrlvXu+8I8jI4kcR8Uh5+28k7Zxj2yslvUeSIuIfbU9ft/YdKi4y9JIk2f6H8s+F7FxY2pZJOhERvzKPrzmq4oqD0y5TcUmCeQ0KZNB8JBGSJvX/z9ELW2wvzX6EIjXsXA0f6xY+VZzvIuI/Jf3A9vuk07+aqtWvcfonSdfZvqR8Me+68r62EWRkcXl5DWCpuC7ww5LGJG0s73tvw7YPSfqAJNneImn6leyHJf2W7QvLo+KtUsud679UXIoUS5jteyV9Q9IbbR+1vU3Fc2yb7UOSnlL5G15s/2r5SwTeJ+lu209JUkS8oOI38Hyz/LijvK/9eXBxIfSai190+yUVob1C0jOSPqQixn+t4sp6j6m4ctgm26tUXK1rtaR/UXH6YmNEPG97t4qgP6vi0pUPRsQnbL9G0l9KWivpZyR9pjwH/RuSPiHplKSbIuJ7lSwamAVBxnnF9sqIOGn7YhWBvzUiHu/1vIB28KIezjd7yjd6XChphBhjMeEIGQCS4EU9AEiCIANAEgQZAJIgyACQBEEGgCQIMgAk8X/ighccCHUO/wAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.boxplot(movie['budget'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Waktu')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEICAYAAABF82P+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHGZJREFUeJzt3X+clXWd9/HXmxkddECQjJGfTpbtwk1lRa5t1j1At5Sp1JqbY2sS3LJuOms3VpC0d3Xf0Q12r615Vyw1mj9g+mG5mrreushUrmmhmzk6laT8GCHNUH5LMH72j+saOgwXzDBzrjnnwPv5eJzHua7vdZ3r+pzhcN7ne/1URGBmZtbdoFIXYGZm5ckBYWZmmRwQZmaWyQFhZmaZHBBmZpbJAWFmZpkcEFaxJH1L0hdKXUcxSXqnpF+Xuo6+kHSVpG+Wug4rHgeE9ZukNZJ2Stom6UVJd0kaV+q6CkkKSa87yPSZkjrT97BN0jOSbpD0+oGsKyJ+EhF/lsN66tN1Pdqt/QRJf5S0pr/riIgvRsR/7+9yrHw4IKxYzomIIcAo4DnguhLX0xc/Td/DMODdwE7gEUmT+rIwSdXFLK5Iaru9nwuBZ0pVjJU3B4QVVUS8DNwKTOxqkzRM0k2Sfi9praTPSBqUTvu6pFsL5l0saYUSDZI60k0XL6Q9lQ8faN2SLpG0WtImSXdIGp22/zid5bG0d/ChHt5DZ0T8NiI+BvwI+Fy6nAZJHd3WuUbSu9Phz0m6VdItkrYAMyWdJumnkl6StFHS/5N09IHq6r4OSRMktaavf0LSuQXTviXpq2mPbaukhyW99mDvDbgZuLhg/CPATd3eU+Y6JZ0u6XeSqgrm/YCkXxa8/1sKpp0u6cF0OY9JauihNiszDggrKknHAh8CHipovo7kV/nJwH8l+VL6aDrtSuCN6SaedwKzgYvjT9eAORE4ARhD8sW2VNJ+m2AkTQX+D/DXJL2YtcC3ASLiXelsb4qIIRHxnUN4Sz8A3nkI888gCcjhwDKgE/gf6Xt4OzAN+Fhv6pJ0FPBD4F5gJNAELOv2/huBzwPHA6uBhT3UdwtwgaQqSROAocDDvVlnRDwEbAemFizvQmB595VIGgPcBXwBGAF8Avi+pFf3UJ+VEQeEFcu/SHoJ2AL8N+BLAOmvzQ8Bn46IrRGxBvhH4CKAiNgB/A1wDcmXV1NEdHRb9j9ExK6I+BHJl85fZ6z/w8D1EfFoROwCPg28XVJ9P9/XBpIvuN76aUT8S0S8EhE7I+KRiHgoIvak7/2fSUKyN04HhgCLIuKPEXE/cCdJKHT5QUT8LCL2kATSqT0sswP4NckmtIvp1nvoxTpbuoYlDQXOStu6+xvg7oi4O/1b3AesSue3CuGAsGJ5f0QMB2qAy4EfSer69X80yS/6LmtJegQARMTPgKcBAd/tttwXI2J7t9eOzlj/6MJ1RMQ24A+F6+mjMcCmQ5h/feGIpNdLujPdNLMF+CLJ36Q3RgPrI+KVgrZ9/nbA7wqGd5B8uffkJmAmyRf9Ld2m9bTO5cBfSaoB/gp4NCLWsr+TgPPTzUsvpT8eziDp3VmFcEBYUaXb739AsmnlDOAFYDfJF0aX8cCzXSOSLiMJlg3Ap7ot8nhJtd1euyFj1RsK15G+5lWF6+mjDwA/SYe3A8cWrKMK6L7JpPvlkb8O/Ao4JSKOA64iCcLe2ACM69pfk9rnb9dH3wfeBzyd8eV+0HVGxJMkgfFeDrB5KbUeuDkihhc8aiNiUT9rtwHkgLCiSncuzyDZJt4eEZ0kvYKFkoZKOgmYS/rLNT2M9AskmyQuAj4lqftmks9LOjrdR3E28L2MVS8HPirp1PTX7ReBh9PNOpAcWXVyL99DlaTXSLoOaCDZxg/wG2CwpPel2+o/QxJsBzOUZLPbNkl/Dvxdt+kHq+thklD6lKSj0p2855DuW+mrtEc2Fcg6JLU361wO/D3wLrL/LSD59z1H0vT07zk43QE/tj+128ByQFix/FDSNpIvw4UkO5qfSKc1kXzpPA08QPIFc72Sw0BvARZHxGMR8RTJL+yb0y95SDahvEjyy3YZcGlE/Kr7yiNiBfAPJL+ONwKvBS4omOVzwI3p5o6sfRiQ7LPoeg+twHHA2yLi8XQdm0l2MH+T5Bf1dpJt+gfzCZJf2luBbwDdd5AfsK6I+CNwLsmv9ReArwEfyXr/hyoiVkXEbzPae7POFpLgvD8iXjjA8teT7LC/Cvg9SY/ik/g7p6LINwyycpX+er0lIvyr06wEnOZmZpbJAWFmZpm8icnMzDK5B2FmZpnK8WJivXbCCSdEfX19qcsw28/27dupra3teUazEnjkkUdeiIgeL3tS0QFRX1/PqlWrSl2G2X5aW1tpaGgodRlmmSRlnf2+H29iMjOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCLMiamlpYdKkSUybNo1JkybR0pJ1Lx2zylDRh7malZOWlhYWLFhAc3MznZ2dVFVVMXv2bAAaGxt7eLVZ+XEPwqxIFi5cSHNzM1OmTKG6upopU6bQ3NzMwoU93SbarDw5IMyKpL29nTPOOGOftjPOOIP29vYSVWTWP7kGhKQ1kh6X9AtJq9K2EZLuk/RU+nx82i5JX5G0WtIvJb0lz9rMim3ChAk88MAD+7Q98MADTJgwoUQVmfXPQPQgpkTEqRExOR2fD6yIiFOAFek4JHewOiV9zCG5l69ZxViwYAGzZ89m5cqV7Nmzh5UrVzJ79mwWLFhQ6tLM+qQUO6lnkNyuEOBGkls7zkvbb4rk+uMPSRouaVREbCxBjWaHrGtHdFNTE+3t7UyYMIGFCxd6B7VVrLx7EAHcK+kRSXPStrquL/30eWTaPobkvrVdOtI2s4rR2NhIW1sbK1asoK2tzeFgFS3vHsQ7ImKDpJHAfZIOdrN1ZbTtdzejNGjmANTV1dHa2lqUQs2Kadu2bf5sWsXLNSAiYkP6/Lyk24DTgOe6Nh1JGgU8n87eAYwrePlYYEPGMpcCSwEmT54cvqSylSNf7tsOB7ltYpJUK2lo1zBwJtAG3AFcnM52MXB7OnwH8JH0aKbTgc3e/2BmVjp59iDqgNskda1neUTcI+nnwHclzQbWAeen898NnAWsBnYAH82xNjMz60FuARERTwNvymj/AzAtoz2Ay/Kqx8zMDo3PpDYzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0y5B4SkKkn/IenOdPw1kh6W9JSk70g6Om2vScdXp9Pr867NzMwObCB6EFcA7QXji4EvR8QpwIvA7LR9NvBiRLwO+HI6n5mZlUiuASFpLPA+4JvpuICpwK3pLDcC70+HZ6TjpNOnpfObmVkJVOe8/H8CPgUMTcdfBbwUEXvS8Q5gTDo8BlgPEBF7JG1O53+hcIGS5gBzAOrq6mhtbc2zfrM+2bZtmz+bVvFyCwhJZwPPR8Qjkhq6mjNmjV5M+1NDxFJgKcDkyZOjoaGh+yxmJdfa2oo/m1bp8uxBvAM4V9JZwGDgOJIexXBJ1WkvYiywIZ2/AxgHdEiqBoYBm3Ksz8zMDiK3fRAR8emIGBsR9cAFwP0R8WFgJfDBdLaLgdvT4TvScdLp90fEfj0IMzMbGKU4D2IeMFfSapJ9DM1pezPwqrR9LjC/BLWZmVkq753UAEREK9CaDj8NnJYxz8vA+QNRj5mZ9cxnUpuZWSYHhJmZZXJAmJlZJgeEmZllckCYmVkmB4SZmWVyQJiZWSYHhJmZZXJAmJlZJgeEmZllckCYmVkmB4SZmWVyQJiZWSYHhJmZZXJAmJlZJgeEmZllckCYmVkmB4RZEbW0tDBp0iSmTZvGpEmTaGlpKXVJZn3mgDArkpaWFq644gq2b99ORLB9+3auuOIKh4RVLEVEqWvos8mTJ8eqVatKXYYZAOPGjWPPnj0sX76czs5OqqqquPDCC6murmb9+vWlLs9sL0mPRMTknuZzD8KsSDo6Opg5cyZNTU1Mnz6dpqYmZs6cSUdHR6lLM+uT6lIXYHY4ueGGG2hpadnbg2hsbCx1SWZ95h6EWZFUV1eze/fufdp2795NdbV/h1ll8ifXrEi6eg2zZs1i3bp1jB8/nqqqKjo7O0tdmlmfuAdhViQTJ05kzpw51NbWAlBbW8ucOXOYOHFiiSsz6xv3IMyKZMGCBSxYsIDm5ua9vYnZs2ezcOHCUpdm1icOCLMi6doh3dTURHt7OxMmTGDhwoXeUW0Vy+dBmOWgtbWVhoaGUpdhlsnnQZiZWb84IMzMLFNuASFpsKSfSXpM0hOSPp+2v0bSw5KekvQdSUen7TXp+Op0en1etZnlxRfrs8NJnjupdwFTI2KbpKOAByT9KzAX+HJEfFvSEmA28PX0+cWIeJ2kC4DFwIdyrM+sqFpaWjKPYgK8o9oqUm49iEhsS0ePSh8BTAVuTdtvBN6fDs9Ix0mnT5OkvOozK7aFCxfS3NzMlClTqK6uZsqUKTQ3N/swV6tYuR7mKqkKeAR4HfBV4LfASxGxJ52lAxiTDo8B1gNExB5Jm4FXAS90W+YcYA5AXV0dra2teb4Fs15rb2/nvvvu2+dM6sbGRtrb2/05tYo0IIe5ShoO3Ab8T+CGiHhd2j4OuDsi3iDpCWB6RHSk034LnBYRfzjQcn2Yq5WTcePGsXXrVo4//njWrl3LSSedxIsvvsjQoUN9uW8rK2V1mGtEvAS0AqcDwyV19VzGAhvS4Q5gHEA6fRiwaSDqMyuGHTt2sGXLFnbu3AnAzp072bJlCzt27ChxZWZ906uAkDQ+69HDa16d9hyQdAzwbqAdWAl8MJ3tYuD2dPiOdJx0+v1RyWfx2RFn06ZNHHfccRxzzDFI4phjjuG4445j0yb/zrHK1NsexF3AnenzCuBp4F97eM0oYKWkXwI/B+6LiDuBecBcSatJ9jE0p/M3A69K2+cC8w/ljZiVgwULFvDMM8+wYsUKnnnmGRYsWFDqksz6rE/7ICS9BfjbiPjb4pfUe94HYeVEEsOGDdtvH8TmzZtxZ9jKSa77ICLiUeBtfXmt2eFqxIgRbNmyhZdffhlJvPzyy2zZsoURI0aUujSzPunVYa6S5haMDgLeAvw+l4rMKtSxxx7LK6+8wuDBg4kIBg8ezLBhwzj22GNLXZpZn/S2BzG04FFDsi9iRl5FmVWiDRs20NjYyMaNG4kINm7cSGNjIxs2bOj5xWZlqLcnyj0ZEd8rbJB0PvC9A8xvdsQZPXo0LS0tjBo1inXr1jFq1ChaWloYPXp0qUsz65Pe9iA+3cs2syPWjh072Lp1K01NTdx11100NTWxdetWnwdhFeugPQhJ7wXOAsZI+krBpOOAPdmvMjsybdq0ifnz53P99dfvvaPcJz/5SRYtWlTq0sz6pKcexAZgFfAyyTWVuh53ANPzLc2s8kydOpW2tjZWrFhBW1sbU6dOLXVJZn3Wq/MgJL0tIn7ere2ciPhhbpX1gs+DsHIybtw49uzZw/Lly/de7vvCCy+kurra12KyslLs8yD+WdIbChbeCHymr8WZHY6uvvpqOjs7mTVrFmeeeSazZs2is7OTq6++utSlmfVJbwPig8CNkiZIugT4GHBmfmWZVZ7GxkauvfZaamtrkURtbS3XXnutbxZkFatXARERTwMXAN8nCYszI2JznoWZVaIHH3yQ1atX88orr7B69WoefPDBUpdk1mc9HcX0OMld4LqMAKqAhyUREW/MszizStLU1MSSJUtYvHgxEydO5Mknn2TevHkAXHfddSWuzuzQ9XSi3NkDUoXZYeAb3/gGixcvZu7cubS2tjJ3bnKFmquuusoBYRXpoJuYImJt4QPYSdKj6HqYWWrXrl1ceuml+7Rdeuml7Nq1q0QVmfVPb28YdK6kp4BngB8Ba+j5fhBmR5SamhqWLFmyT9uSJUuoqakpUUVm/dPbazH9b5Lbhf5bRLxZ0hTAh2aYFbjkkku48sorufLKK/dpv/zyy0tUkVn/9PYw190R8QdgkKRBEbESODXHuswqzm9+85tDajcrd73tQbwkaQjwE2CZpOfxtZjM9nHvvfcydOhQbr/99r1nUs+YMYN777231KWZ9clBexCSPi7pbcD7gR3Ax4F7gN8C5+RfnlllWbZsGVOmTKG6upopU6awbNmyUpdk1mc9bWIaC1wLrAPuB/4X8AJwS7rJycwK3HnnnQcdN6skvb1Y39HAZOAvgbenj5ciYmK+5R2cL9Zn5WTIkCFs3759v/ba2lq2bdtWgorMshX7Yn3HkNwDYlj62AA83PfyzA4/J5988iG1m5W7ni61sRT4L8BWkkB4ELgmIl4cgNrMKkpbWxtjx47l2WefJSKQxJgxY2hrayt1aWZ90lMPYjxQA/wOeBboAF7KuyizShQRPPvss4wcORJJjBw5cm9YmFWig/YgIuI9kkTSi/hL4EpgkqRNwE8j4rMDUKNZxYgInnvuOYC9z2aVqsd9EJFoA+4mubzGvwOvBa7IuTazilRfX8/NN99MfX19qUsx65ee9kH8PUnP4R3AbpJw+ClwPfB47tWZVZiqqirWrFnDRRddtHe8s7OzxFWZ9U1PPYh64FbgtIg4OSIuioivRcRjEfFK/uWZVZbOzk7OPfdcbrvtNs4991yHg1W0Xp0HUa58HoSVk2R3XbZK/n9mh59inwdhZmZHmNwCQtI4SSsltUt6QtIVafsISfdJeip9Pj5tl6SvSFot6ZeS3pJXbWZm1rM8exB7gCsjYgLJvSQukzQRmA+siIhTgBXpOMB7gVPSxxzg6znWZmZmPcgtICJiY0Q8mg5vBdqBMcAM4MZ0thtJrhRL2n5TeljtQ8BwSaPyqs/MzA6ut/eD6BdJ9cCbSS7XURcRGyEJEUkj09nGAOsLXtaRtm3stqw5JD0M6urqaG1tzbN0s6Lw59QqUe4Bkd5o6PvAxyNiy0GO9MiasN+hHxGxFFgKyVFMDQ0NRarULD/+nFolyvUoJklHkYTDsoj4Qdr8XNemo/T5+bS9AxhX8PKxJFeNNTOzEsjzKCYBzUB7RFxTMOkO4OJ0+GLg9oL2j6RHM50ObO7aFGVmZgMvz01M7wAuAh6X9Iu07SpgEfBdSbNJ7lR3fjrtbuAsYDXJ7U0/mmNtZmbWg9wCIiIeIHu/AsC0jPkDuCyveszM7ND4TGozM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMA3JHObNKd5AbXRX19ck1K83KgwPCrBd688V9sBDwF79VIm9iMiuSA4WAw8EqlQPCrIgigojgpHl37h02q1QOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMvlifHXHe9Pl72bxzd+7rqZ9/V+7rGHbMUTz22TNzX48dmRwQdsTZvHM3axa9L9d1tLa20tDQkOs6YGBCyI5c3sRkZmaZHBBmZpYpt4CQdL2k5yW1FbSNkHSfpKfS5+PTdkn6iqTVkn4p6S151WVmZr2TZw/iW8B7urXNB1ZExCnAinQc4L3AKeljDvD1HOsyM7NeyG0ndUT8WFJ9t+YZQEM6fCPQCsxL22+K5O4qD0kaLmlURGzMqz47cg2dMJ833Di/5xn768b8VzF0AkC+O9ztyDXQRzHVdX3pR8RGSSPT9jHA+oL5OtK2/QJC0hySXgZ1dXW0trbmWrAdfra2L+Jb76nNdR3btm1jyJAhua4DYOY92/1/wHJTLoe5Zt3tPfNejRGxFFgKMHny5BiIQwntMHPPXbkfgjpQh7kOxHuxI9dAH8X0nKRRAOnz82l7BzCuYL6xwIYBrs3MzAoMdA/iDuBiYFH6fHtB++WSvg38BbDZ+x8sTwNygtk9A3MmtVlecgsISS0kO6RPkNQBfJYkGL4raTawDjg/nf1u4CxgNbAD+GhedZnlfRY1JAE0EOsxy1OeRzE1HmDStIx5A7gsr1rMzOzQ+UxqMzPL5IAwM7NMDggzM8vkgDAzs0wOCDMzy+SAMDOzTA4IMzPL5IAwM7NMDggzM8vkgDArovHjxyOJtYvPRhLjx48vdUlmfeaAMCuS8ePHs379+n3a1q9f75CwilUu94MwK2tS1i1Lemf9+vW9fn1yWTKz8uAehFkvRESPD4Dq6mrq6+sZNGgQ9fX1VFdX9/r1DgcrN+5BmBXRnj17WLNmDcDeZ7NK5R6EmZllckCYmVkmB4SZmWVyQJiZWSYHhFkO+nNYrFm5cECY5cCHrNrhwAFhZmaZHBBmZpbJAWFmZpkcEGZmlskBYVZkVVVVXHPNNVRVVZW6FLN+8bWYzIqss7OTuXPnlroMs35zD8LMzDI5IMzMLJMDwszMMjkgzIrsxBNPZNCgQZx44omlLsWsX7yT2qyIhg4dyvLly+ns7KSqqooZM2awdevWUpdl1idlFRCS3gNcC1QB34yIRSUuyeyQ1NTUMGvWLNatW8f48eOpqalxQFjFKptNTJKqgK8C7wUmAo2SJpa2KrPeq6mpYfr06dTW1gJQW1vL9OnTqampKXFlZn1TTj2I04DVEfE0gKRvAzOAJ0talVkvXXLJJSxZsoTFixczceJEnnzySebNm8ell15a6tLM+qScAmIMsL5gvAP4i+4zSZoDzAGoq6ujtbV1QIoz68l5551HR0cH8+fPZ/fu3Rx11FGcffbZnHfeef6cWkUqp4DIusPKfhfVj4ilwFKAyZMnR0NDQ85lmfVe1+extbUVfzat0pXNPgiSHsO4gvGxwIYS1WJmdsQrp4D4OXCKpNdIOhq4ALijxDWZmR2xymYTU0TskXQ58P9JDnO9PiKeKHFZZmZHrLIJCICIuBu4u9R1mJlZeW1iMjOzMqKI/Q4UqhiSfg+sLXUdZhlOAF4odRFmB3BSRLy6p5kqOiDMypWkVRExudR1mPWHNzGZmVkmB4SZmWVyQJjlY2mpCzDrL++DMDOzTO5BmJlZJgeEmZllckCYZZD0OUmfKMJyhkv6WMH4aEm39ne5ZgPBAWHWT5IOdsma4cDegIiIDRHxwfyrMus/B4RZStICSb+W9G/An6VtrZImp8MnSFqTDs+U9D1JPwTulTRE0gpJj0p6XNKMdLGLgNdK+oWkL0mql9SWLmOwpBvS+f9D0pSCZf9A0j2SnpJ09QD/KcyAMrtYn1mpSHorySXm30zy/+JR4JEeXvZ24I0RsSntRXwgIrZIOgF4SNIdwHxgUkScmq6nvuD1lwFExBsk/TlJ0Lw+nXZqWssu4NeSrouIwjsumuXOAWGWeCdwW0TsAEi/3HtyX0RsSocFfFHSu4BXSG6hW9fD688ArgOIiF9JWgt0BcSKiNic1vIkcBL73pLXLHcOCLM/yTopaA9/2hQ7uNu07QXDHwZeDbw1Inanm6K6z99d1m12u+wqGO7E/1etBLwPwizxY+ADko6RNBQ4J21fA7w1HT7YzuVhwPNpOEwh+cUPsBUYepB1fhgg3bQ0Hvh1n9+BWZE5IMyAiHgU+A7wC+D7wE/SSf8X+DtJD5JcwvtAlgGTJa0i+dL/VbrcPwD/LqlN0pe6veZrQJWkx9N1z4yIXZiVCV9qw8zMMrkHYWZmmRwQZmaWyQFhZmaZHBBmZpbJAWFmZpkcEGZmlskBYWZmmf4Tlt/YqbAS2EwAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"movie.boxplot(column='duration')\n",
"plt.title('Boxplot Duration Movie')\n",
"plt.ylabel('Waktu')"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2.81395543 2.4562438 1.62158335 ... 1.24010965 0.28621199 0.68366935]\n"
]
}
],
"source": [
"from scipy import stats\n",
"z = np.abs(stats.zscore(movie['duration']))\n",
"print(z)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(array([ 9, 22, 24, 25, 31, 110, 112, 146, 164, 274, 278,\n",
" 307, 325, 338, 478, 644, 697, 705, 711, 756, 815, 825,\n",
" 832, 839, 882, 907, 916, 1037, 1123, 1124, 1143, 1159, 1174,\n",
" 1216, 1264, 1373, 1428, 1500, 1522, 1570, 1663, 1709, 1713, 1812,\n",
" 1817, 1841, 1873, 1907, 1946, 1971, 1979, 2048, 2087, 2190, 2240,\n",
" 2256, 2265, 2341, 2344, 2354, 2355, 2465, 2560, 2628, 2643, 2726,\n",
" 2815, 2836, 2969, 3025, 3047, 3074, 3083, 3266, 3302, 3310, 3328,\n",
" 3509, 3649, 3660, 3870, 3950, 3969, 3981, 4076, 4078, 4086, 4280,\n",
" 4326, 4438, 4481, 4633, 4672, 4693, 4707, 4746, 4802, 4936],\n",
" dtype=int64),)\n"
]
}
],
"source": [
"print(np.where(z > 3))"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#scatterplot\n",
"plt.scatter(movie[\"actor_1_facebook_likes\"], movie[\"actor_2_facebook_likes\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" <th>cat_actor1_fb</th>\n",
" <th>scorebin_actor1_fb</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class cat_actor1_fb scorebin_actor1_fb \n",
"0 7.9 Good Low (500, 2000] \n",
"1 7.1 Good Very high (10000, 700000] \n",
"2 6.8 Good Very high (10000, 700000] \n",
"3 8.5 Excellent Very high (10000, 700000] \n",
"4 6.6 Good Low (500, 2000] "
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Binning\n",
"bins = [-1, 500, 2000, 5000, 10000, 700000] #bin\n",
"group_names = ['Very low', 'Low', 'Medium', 'High', 'Very high'] #labelling each bin\n",
"\n",
"movie['cat_actor1_fb'] = pd.cut(movie['actor_1_facebook_likes'], bins, labels=group_names)\n",
"movie['scorebin_actor1_fb'] = pd.cut(movie['actor_1_facebook_likes'], bins)\n",
"\n",
"movie.head() # view the dataframe"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" <th>cat_actor1_fb</th>\n",
" <th>scorebin_actor1_fb</th>\n",
" <th>score_code</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" <td>Very high</td>\n",
" <td>(10000, 700000]</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" <td>Low</td>\n",
" <td>(500, 2000]</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class cat_actor1_fb scorebin_actor1_fb score_code \n",
"0 7.9 Good Low (500, 2000] 3 \n",
"1 7.1 Good Very high (10000, 700000] 3 \n",
"2 6.8 Good Very high (10000, 700000] 3 \n",
"3 8.5 Excellent Very high (10000, 700000] 1 \n",
"4 6.6 Good Low (500, 2000] 3 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"LE = LabelEncoder()\n",
"movie['score_code'] = LE.fit_transform(movie['imdb_score_class'])\n",
"movie.head()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\asus\\Anaconda3\\lib\\site-packages\\numpy\\core\\fromnumeric.py:2389: FutureWarning: Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.\n",
" return ptp(axis=axis, out=out, **kwargs)\n"
]
},
{
"data": {
"text/plain": [
"const 0.997801\n",
"duration 0.997994\n",
"facenumber_in_poster 0.998582\n",
"budget 0.000000\n",
"gross 0.000000\n",
"num_voted_users 0.924973\n",
"num_user_for_reviews 0.978588\n",
"num_critic_for_reviews 0.997783\n",
"movie_facebook_likes 0.983995\n",
"director_facebook_likes 0.994986\n",
"actor_1_facebook_likes 0.997458\n",
"actor_2_facebook_likes 0.997804\n",
"actor_3_facebook_likes 0.996341\n",
"cast_total_facebook_likes 0.997823\n",
"imdb_score 0.999620\n",
"score_code 0.000000\n",
"dtype: float64"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Feature Selection with Backward Elimination\n",
"import statsmodels.api as sm\n",
"\n",
"y = movie['score_code']\n",
"x = movie.drop(['movie_code','country','imdb_score_class', 'cat_actor1_fb','scorebin_actor1_fb'],axis=1)\n",
"x = x.astype(int)\n",
"\n",
"\n",
"x_1 = sm.add_constant(x) #Fitting sm.OLS model\n",
"model = sm.OLS(y,x_1).fit()\n",
"model.pvalues"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie_code</th>\n",
" <th>duration</th>\n",
" <th>facenumber_in_poster</th>\n",
" <th>country</th>\n",
" <th>budget</th>\n",
" <th>gross</th>\n",
" <th>num_voted_users</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>movie_facebook_likes</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>cast_total_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>imdb_score_class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>237000000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>886204</td>\n",
" <td>3054.0</td>\n",
" <td>723.0</td>\n",
" <td>33000</td>\n",
" <td>0.0</td>\n",
" <td>1000.0</td>\n",
" <td>936.0</td>\n",
" <td>855.0</td>\n",
" <td>4834</td>\n",
" <td>7.9</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>169.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>300000000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>471220</td>\n",
" <td>1238.0</td>\n",
" <td>302.0</td>\n",
" <td>0</td>\n",
" <td>563.0</td>\n",
" <td>40000.0</td>\n",
" <td>5000.0</td>\n",
" <td>1000.0</td>\n",
" <td>48350</td>\n",
" <td>7.1</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>148.0</td>\n",
" <td>1.0</td>\n",
" <td>UK</td>\n",
" <td>245000000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>275868</td>\n",
" <td>994.0</td>\n",
" <td>602.0</td>\n",
" <td>85000</td>\n",
" <td>0.0</td>\n",
" <td>11000.0</td>\n",
" <td>393.0</td>\n",
" <td>161.0</td>\n",
" <td>11700</td>\n",
" <td>6.8</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>164.0</td>\n",
" <td>0.0</td>\n",
" <td>USA</td>\n",
" <td>250000000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>1144337</td>\n",
" <td>2701.0</td>\n",
" <td>813.0</td>\n",
" <td>164000</td>\n",
" <td>22000.0</td>\n",
" <td>27000.0</td>\n",
" <td>23000.0</td>\n",
" <td>23000.0</td>\n",
" <td>106759</td>\n",
" <td>8.5</td>\n",
" <td>Excellent</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>132.0</td>\n",
" <td>1.0</td>\n",
" <td>USA</td>\n",
" <td>263700000.0</td>\n",
" <td>73058679.0</td>\n",
" <td>212204</td>\n",
" <td>738.0</td>\n",
" <td>462.0</td>\n",
" <td>24000</td>\n",
" <td>475.0</td>\n",
" <td>640.0</td>\n",
" <td>632.0</td>\n",
" <td>530.0</td>\n",
" <td>1873</td>\n",
" <td>6.6</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie_code duration facenumber_in_poster country budget \\\n",
"0 1 178.0 0.0 USA 237000000.0 \n",
"1 2 169.0 0.0 USA 300000000.0 \n",
"2 3 148.0 1.0 UK 245000000.0 \n",
"3 4 164.0 0.0 USA 250000000.0 \n",
"4 5 132.0 1.0 USA 263700000.0 \n",
"\n",
" gross num_voted_users num_user_for_reviews num_critic_for_reviews \\\n",
"0 760505847.0 886204 3054.0 723.0 \n",
"1 309404152.0 471220 1238.0 302.0 \n",
"2 200074175.0 275868 994.0 602.0 \n",
"3 448130642.0 1144337 2701.0 813.0 \n",
"4 73058679.0 212204 738.0 462.0 \n",
"\n",
" movie_facebook_likes director_facebook_likes actor_1_facebook_likes \\\n",
"0 33000 0.0 1000.0 \n",
"1 0 563.0 40000.0 \n",
"2 85000 0.0 11000.0 \n",
"3 164000 22000.0 27000.0 \n",
"4 24000 475.0 640.0 \n",
"\n",
" actor_2_facebook_likes actor_3_facebook_likes cast_total_facebook_likes \\\n",
"0 936.0 855.0 4834 \n",
"1 5000.0 1000.0 48350 \n",
"2 393.0 161.0 11700 \n",
"3 23000.0 23000.0 106759 \n",
"4 632.0 530.0 1873 \n",
"\n",
" imdb_score imdb_score_class \n",
"0 7.9 Good \n",
"1 7.1 Good \n",
"2 6.8 Good \n",
"3 8.5 Excellent \n",
"4 6.6 Good "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Merging to dataset\n",
"# Import data\n",
"movie1 = pd.read_csv(\"movie1.csv\")\n",
"movie2 = pd.read_csv(\"movie2.csv\")\n",
"\n",
"movie_complete = pd.merge(movie1, movie2, on='movie_code')\n",
"\n",
"movie_complete.head()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"#export data to csv\n",
"movie_complete.to_csv(\"movie_complete.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment