Skip to content

Instantly share code, notes, and snippets.

@astrojuanlu
Created October 2, 2023 12:28
Show Gist options
  • Save astrojuanlu/f3e2bc336568a18cc5ca507e05206dd7 to your computer and use it in GitHub Desktop.
Save astrojuanlu/f3e2bc336568a18cc5ca507e05206dd7 to your computer and use it in GitHub Desktop.
Notebook version of the Spaceflights Kedro tutorial https://docs.kedro.org/en/stable/tutorial/
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "7cca8e81-e49c-4c75-b80b-e27b2c61682f",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "34d2e416",
"metadata": {},
"outputs": [],
"source": [
"companies = pd.read_csv('../data/01_raw/companies.csv')\n",
"reviews = pd.read_csv('../data/01_raw/reviews.csv')\n",
"shuttles = pd.read_excel('../data/01_raw/shuttles.xlsx', engine='openpyxl')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8f3f2ed2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>company_rating</th>\n",
" <th>company_location</th>\n",
" <th>total_fleet_count</th>\n",
" <th>iata_approved</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>35029</td>\n",
" <td>100%</td>\n",
" <td>Niue</td>\n",
" <td>4.0</td>\n",
" <td>f</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30292</td>\n",
" <td>67%</td>\n",
" <td>Anguilla</td>\n",
" <td>6.0</td>\n",
" <td>f</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19032</td>\n",
" <td>67%</td>\n",
" <td>Russian Federation</td>\n",
" <td>4.0</td>\n",
" <td>f</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8238</td>\n",
" <td>91%</td>\n",
" <td>Barbados</td>\n",
" <td>15.0</td>\n",
" <td>t</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>30342</td>\n",
" <td>NaN</td>\n",
" <td>Sao Tome and Principe</td>\n",
" <td>2.0</td>\n",
" <td>t</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id company_rating company_location total_fleet_count \\\n",
"0 35029 100% Niue 4.0 \n",
"1 30292 67% Anguilla 6.0 \n",
"2 19032 67% Russian Federation 4.0 \n",
"3 8238 91% Barbados 15.0 \n",
"4 30342 NaN Sao Tome and Principe 2.0 \n",
"\n",
" iata_approved \n",
"0 f \n",
"1 f \n",
"2 f \n",
"3 t \n",
"4 t "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"companies.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fe609fb7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>shuttle_id</th>\n",
" <th>review_scores_rating</th>\n",
" <th>review_scores_comfort</th>\n",
" <th>review_scores_amenities</th>\n",
" <th>review_scores_trip</th>\n",
" <th>review_scores_crew</th>\n",
" <th>review_scores_location</th>\n",
" <th>review_scores_price</th>\n",
" <th>number_of_reviews</th>\n",
" <th>reviews_per_month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63561</td>\n",
" <td>97.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>133</td>\n",
" <td>1.65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>36260</td>\n",
" <td>90.0</td>\n",
" <td>8.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>3</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>57015</td>\n",
" <td>95.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>14</td>\n",
" <td>0.14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14035</td>\n",
" <td>93.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>39</td>\n",
" <td>0.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10036</td>\n",
" <td>98.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>92</td>\n",
" <td>0.94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" shuttle_id review_scores_rating review_scores_comfort \\\n",
"0 63561 97.0 10.0 \n",
"1 36260 90.0 8.0 \n",
"2 57015 95.0 9.0 \n",
"3 14035 93.0 10.0 \n",
"4 10036 98.0 10.0 \n",
"\n",
" review_scores_amenities review_scores_trip review_scores_crew \\\n",
"0 9.0 10.0 10.0 \n",
"1 9.0 10.0 9.0 \n",
"2 10.0 9.0 10.0 \n",
"3 9.0 9.0 9.0 \n",
"4 10.0 10.0 10.0 \n",
"\n",
" review_scores_location review_scores_price number_of_reviews \\\n",
"0 9.0 10.0 133 \n",
"1 9.0 9.0 3 \n",
"2 9.0 9.0 14 \n",
"3 10.0 9.0 39 \n",
"4 9.0 9.0 92 \n",
"\n",
" reviews_per_month \n",
"0 1.65 \n",
"1 0.09 \n",
"2 0.14 \n",
"3 0.42 \n",
"4 0.94 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reviews.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0a1b7d66",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>shuttle_location</th>\n",
" <th>shuttle_type</th>\n",
" <th>engine_type</th>\n",
" <th>engine_vendor</th>\n",
" <th>engines</th>\n",
" <th>passenger_capacity</th>\n",
" <th>cancellation_policy</th>\n",
" <th>crew</th>\n",
" <th>d_check_complete</th>\n",
" <th>moon_clearance_complete</th>\n",
" <th>price</th>\n",
" <th>company_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63561</td>\n",
" <td>Niue</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>f</td>\n",
" <td>f</td>\n",
" <td>$1,325.0</td>\n",
" <td>35029</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>36260</td>\n",
" <td>Anguilla</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>t</td>\n",
" <td>f</td>\n",
" <td>$1,780.0</td>\n",
" <td>30292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>57015</td>\n",
" <td>Russian Federation</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>moderate</td>\n",
" <td>0.0</td>\n",
" <td>f</td>\n",
" <td>f</td>\n",
" <td>$1,715.0</td>\n",
" <td>19032</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14035</td>\n",
" <td>Barbados</td>\n",
" <td>Type V5</td>\n",
" <td>Plasma</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>3.0</td>\n",
" <td>6</td>\n",
" <td>strict</td>\n",
" <td>3.0</td>\n",
" <td>f</td>\n",
" <td>f</td>\n",
" <td>$4,770.0</td>\n",
" <td>8238</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10036</td>\n",
" <td>Sao Tome and Principe</td>\n",
" <td>Type V2</td>\n",
" <td>Plasma</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>2.0</td>\n",
" <td>4</td>\n",
" <td>strict</td>\n",
" <td>2.0</td>\n",
" <td>f</td>\n",
" <td>f</td>\n",
" <td>$2,820.0</td>\n",
" <td>30342</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id shuttle_location shuttle_type engine_type engine_vendor \\\n",
"0 63561 Niue Type V5 Quantum ThetaBase Services \n",
"1 36260 Anguilla Type V5 Quantum ThetaBase Services \n",
"2 57015 Russian Federation Type V5 Quantum ThetaBase Services \n",
"3 14035 Barbados Type V5 Plasma ThetaBase Services \n",
"4 10036 Sao Tome and Principe Type V2 Plasma ThetaBase Services \n",
"\n",
" engines passenger_capacity cancellation_policy crew d_check_complete \\\n",
"0 1.0 2 strict 1.0 f \n",
"1 1.0 2 strict 1.0 t \n",
"2 1.0 2 moderate 0.0 f \n",
"3 3.0 6 strict 3.0 f \n",
"4 2.0 4 strict 2.0 f \n",
"\n",
" moon_clearance_complete price company_id \n",
"0 f $1,325.0 35029 \n",
"1 f $1,780.0 30292 \n",
"2 f $1,715.0 19032 \n",
"3 f $4,770.0 8238 \n",
"4 f $2,820.0 30342 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"shuttles.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "baba1b9e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>company_rating</th>\n",
" <th>company_location</th>\n",
" <th>total_fleet_count</th>\n",
" <th>iata_approved</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>35029</td>\n",
" <td>100.0</td>\n",
" <td>Niue</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30292</td>\n",
" <td>67.0</td>\n",
" <td>Anguilla</td>\n",
" <td>6.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19032</td>\n",
" <td>67.0</td>\n",
" <td>Russian Federation</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8238</td>\n",
" <td>91.0</td>\n",
" <td>Barbados</td>\n",
" <td>15.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>30342</td>\n",
" <td>NaN</td>\n",
" <td>Sao Tome and Principe</td>\n",
" <td>2.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id company_rating company_location total_fleet_count \\\n",
"0 35029 100.0 Niue 4.0 \n",
"1 30292 67.0 Anguilla 6.0 \n",
"2 19032 67.0 Russian Federation 4.0 \n",
"3 8238 91.0 Barbados 15.0 \n",
"4 30342 NaN Sao Tome and Principe 2.0 \n",
"\n",
" iata_approved \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 True \n",
"4 True "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"companies[\"iata_approved\"] = companies[\"iata_approved\"] == \"t\"\n",
"companies[\"company_rating\"] = companies[\"company_rating\"].str.replace(\"%\", \"\").astype(float)\n",
"\n",
"companies.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "10df3b2a-826e-4e58-97df-626715077c27",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>shuttle_location</th>\n",
" <th>shuttle_type</th>\n",
" <th>engine_type</th>\n",
" <th>engine_vendor</th>\n",
" <th>engines</th>\n",
" <th>passenger_capacity</th>\n",
" <th>cancellation_policy</th>\n",
" <th>crew</th>\n",
" <th>d_check_complete</th>\n",
" <th>moon_clearance_complete</th>\n",
" <th>price</th>\n",
" <th>company_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63561</td>\n",
" <td>Niue</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>1325.0</td>\n",
" <td>35029</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>36260</td>\n",
" <td>Anguilla</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>1780.0</td>\n",
" <td>30292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>57015</td>\n",
" <td>Russian Federation</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>moderate</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>1715.0</td>\n",
" <td>19032</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14035</td>\n",
" <td>Barbados</td>\n",
" <td>Type V5</td>\n",
" <td>Plasma</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>3.0</td>\n",
" <td>6</td>\n",
" <td>strict</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>4770.0</td>\n",
" <td>8238</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10036</td>\n",
" <td>Sao Tome and Principe</td>\n",
" <td>Type V2</td>\n",
" <td>Plasma</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>2.0</td>\n",
" <td>4</td>\n",
" <td>strict</td>\n",
" <td>2.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2820.0</td>\n",
" <td>30342</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id shuttle_location shuttle_type engine_type engine_vendor \\\n",
"0 63561 Niue Type V5 Quantum ThetaBase Services \n",
"1 36260 Anguilla Type V5 Quantum ThetaBase Services \n",
"2 57015 Russian Federation Type V5 Quantum ThetaBase Services \n",
"3 14035 Barbados Type V5 Plasma ThetaBase Services \n",
"4 10036 Sao Tome and Principe Type V2 Plasma ThetaBase Services \n",
"\n",
" engines passenger_capacity cancellation_policy crew d_check_complete \\\n",
"0 1.0 2 strict 1.0 False \n",
"1 1.0 2 strict 1.0 True \n",
"2 1.0 2 moderate 0.0 False \n",
"3 3.0 6 strict 3.0 False \n",
"4 2.0 4 strict 2.0 False \n",
"\n",
" moon_clearance_complete price company_id \n",
"0 False 1325.0 35029 \n",
"1 False 1780.0 30292 \n",
"2 False 1715.0 19032 \n",
"3 False 4770.0 8238 \n",
"4 False 2820.0 30342 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"shuttles[\"d_check_complete\"] = shuttles[\"d_check_complete\"] == \"t\"\n",
"shuttles[\"moon_clearance_complete\"] = shuttles[\"moon_clearance_complete\"] == \"t\"\n",
"shuttles[\"price\"] = shuttles[\"price\"].str.replace(\"$\", \"\").str.replace(\",\", \"\").astype(float)\n",
"\n",
"shuttles.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f28328f8-e78a-4453-82fd-8e3b42d9ec70",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>shuttle_location</th>\n",
" <th>shuttle_type</th>\n",
" <th>engine_type</th>\n",
" <th>engine_vendor</th>\n",
" <th>engines</th>\n",
" <th>passenger_capacity</th>\n",
" <th>cancellation_policy</th>\n",
" <th>crew</th>\n",
" <th>d_check_complete</th>\n",
" <th>...</th>\n",
" <th>shuttle_id</th>\n",
" <th>review_scores_rating</th>\n",
" <th>review_scores_comfort</th>\n",
" <th>review_scores_amenities</th>\n",
" <th>review_scores_trip</th>\n",
" <th>review_scores_crew</th>\n",
" <th>review_scores_location</th>\n",
" <th>review_scores_price</th>\n",
" <th>number_of_reviews</th>\n",
" <th>reviews_per_month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63561</td>\n",
" <td>Niue</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>63561</td>\n",
" <td>97.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>133</td>\n",
" <td>1.65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>36260</td>\n",
" <td>Anguilla</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>36260</td>\n",
" <td>90.0</td>\n",
" <td>8.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>3</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>57015</td>\n",
" <td>Russian Federation</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>moderate</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>57015</td>\n",
" <td>95.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>14</td>\n",
" <td>0.14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14035</td>\n",
" <td>Barbados</td>\n",
" <td>Type V5</td>\n",
" <td>Plasma</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>3.0</td>\n",
" <td>6</td>\n",
" <td>strict</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>14035</td>\n",
" <td>93.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>39</td>\n",
" <td>0.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10036</td>\n",
" <td>Sao Tome and Principe</td>\n",
" <td>Type V2</td>\n",
" <td>Plasma</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>2.0</td>\n",
" <td>4</td>\n",
" <td>strict</td>\n",
" <td>2.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>10036</td>\n",
" <td>98.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>92</td>\n",
" <td>0.94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 23 columns</p>\n",
"</div>"
],
"text/plain": [
" id shuttle_location shuttle_type engine_type engine_vendor \\\n",
"0 63561 Niue Type V5 Quantum ThetaBase Services \n",
"1 36260 Anguilla Type V5 Quantum ThetaBase Services \n",
"2 57015 Russian Federation Type V5 Quantum ThetaBase Services \n",
"3 14035 Barbados Type V5 Plasma ThetaBase Services \n",
"4 10036 Sao Tome and Principe Type V2 Plasma ThetaBase Services \n",
"\n",
" engines passenger_capacity cancellation_policy crew d_check_complete \\\n",
"0 1.0 2 strict 1.0 False \n",
"1 1.0 2 strict 1.0 True \n",
"2 1.0 2 moderate 0.0 False \n",
"3 3.0 6 strict 3.0 False \n",
"4 2.0 4 strict 2.0 False \n",
"\n",
" ... shuttle_id review_scores_rating review_scores_comfort \\\n",
"0 ... 63561 97.0 10.0 \n",
"1 ... 36260 90.0 8.0 \n",
"2 ... 57015 95.0 9.0 \n",
"3 ... 14035 93.0 10.0 \n",
"4 ... 10036 98.0 10.0 \n",
"\n",
" review_scores_amenities review_scores_trip review_scores_crew \\\n",
"0 9.0 10.0 10.0 \n",
"1 9.0 10.0 9.0 \n",
"2 10.0 9.0 10.0 \n",
"3 9.0 9.0 9.0 \n",
"4 10.0 10.0 10.0 \n",
"\n",
" review_scores_location review_scores_price number_of_reviews \\\n",
"0 9.0 10.0 133 \n",
"1 9.0 9.0 3 \n",
"2 9.0 9.0 14 \n",
"3 10.0 9.0 39 \n",
"4 9.0 9.0 92 \n",
"\n",
" reviews_per_month \n",
"0 1.65 \n",
"1 0.09 \n",
"2 0.14 \n",
"3 0.42 \n",
"4 0.94 \n",
"\n",
"[5 rows x 23 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rated_shuttles = shuttles.merge(reviews, left_on=\"id\", right_on=\"shuttle_id\")\n",
"rated_shuttles.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b23c9053-18df-4c2d-ac9a-ea1010e31e69",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id_x</th>\n",
" <th>shuttle_location</th>\n",
" <th>shuttle_type</th>\n",
" <th>engine_type</th>\n",
" <th>engine_vendor</th>\n",
" <th>engines</th>\n",
" <th>passenger_capacity</th>\n",
" <th>cancellation_policy</th>\n",
" <th>crew</th>\n",
" <th>d_check_complete</th>\n",
" <th>...</th>\n",
" <th>review_scores_crew</th>\n",
" <th>review_scores_location</th>\n",
" <th>review_scores_price</th>\n",
" <th>number_of_reviews</th>\n",
" <th>reviews_per_month</th>\n",
" <th>id_y</th>\n",
" <th>company_rating</th>\n",
" <th>company_location</th>\n",
" <th>total_fleet_count</th>\n",
" <th>iata_approved</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63561</td>\n",
" <td>Niue</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>133</td>\n",
" <td>1.65</td>\n",
" <td>35029</td>\n",
" <td>100.0</td>\n",
" <td>Niue</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>53260</td>\n",
" <td>Niue</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>Banks, Wood and Phillips</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>37</td>\n",
" <td>0.48</td>\n",
" <td>35029</td>\n",
" <td>100.0</td>\n",
" <td>Niue</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>51019</td>\n",
" <td>Niue</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>flexible</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>10</td>\n",
" <td>0.15</td>\n",
" <td>35029</td>\n",
" <td>100.0</td>\n",
" <td>Niue</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>53898</td>\n",
" <td>Niue</td>\n",
" <td>Type V5</td>\n",
" <td>Plasma</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>3.0</td>\n",
" <td>5</td>\n",
" <td>strict</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>11</td>\n",
" <td>0.21</td>\n",
" <td>35029</td>\n",
" <td>100.0</td>\n",
" <td>Niue</td>\n",
" <td>4.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>36260</td>\n",
" <td>Anguilla</td>\n",
" <td>Type V5</td>\n",
" <td>Quantum</td>\n",
" <td>ThetaBase Services</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>strict</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>3</td>\n",
" <td>0.09</td>\n",
" <td>30292</td>\n",
" <td>67.0</td>\n",
" <td>Anguilla</td>\n",
" <td>6.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" id_x shuttle_location shuttle_type engine_type engine_vendor \\\n",
"0 63561 Niue Type V5 Quantum ThetaBase Services \n",
"1 53260 Niue Type V5 Quantum Banks, Wood and Phillips \n",
"2 51019 Niue Type V5 Quantum ThetaBase Services \n",
"3 53898 Niue Type V5 Plasma ThetaBase Services \n",
"4 36260 Anguilla Type V5 Quantum ThetaBase Services \n",
"\n",
" engines passenger_capacity cancellation_policy crew d_check_complete \\\n",
"0 1.0 2 strict 1.0 False \n",
"1 1.0 2 strict 1.0 False \n",
"2 1.0 2 flexible 1.0 False \n",
"3 3.0 5 strict 3.0 False \n",
"4 1.0 2 strict 1.0 True \n",
"\n",
" ... review_scores_crew review_scores_location review_scores_price \\\n",
"0 ... 10.0 9.0 10.0 \n",
"1 ... 10.0 9.0 10.0 \n",
"2 ... 10.0 9.0 9.0 \n",
"3 ... 10.0 9.0 10.0 \n",
"4 ... 9.0 9.0 9.0 \n",
"\n",
" number_of_reviews reviews_per_month id_y company_rating \\\n",
"0 133 1.65 35029 100.0 \n",
"1 37 0.48 35029 100.0 \n",
"2 10 0.15 35029 100.0 \n",
"3 11 0.21 35029 100.0 \n",
"4 3 0.09 30292 67.0 \n",
"\n",
" company_location total_fleet_count iata_approved \n",
"0 Niue 4.0 False \n",
"1 Niue 4.0 False \n",
"2 Niue 4.0 False \n",
"3 Niue 4.0 False \n",
"4 Anguilla 6.0 False \n",
"\n",
"[5 rows x 28 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_input_table = rated_shuttles.merge(companies, left_on=\"company_id\", right_on=\"id\")\n",
"model_input_table = model_input_table.dropna()\n",
"model_input_table.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d7338948",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "be44b0e1",
"metadata": {},
"outputs": [],
"source": [
"X = model_input_table[[\n",
" \"engines\",\n",
" \"passenger_capacity\",\n",
" \"crew\",\n",
" \"d_check_complete\",\n",
" \"moon_clearance_complete\",\n",
" \"iata_approved\",\n",
" \"company_rating\",\n",
" \"review_scores_rating\",\n",
"]]\n",
"y = model_input_table[\"price\"]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c74ed715",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>engines</th>\n",
" <th>passenger_capacity</th>\n",
" <th>crew</th>\n",
" <th>d_check_complete</th>\n",
" <th>moon_clearance_complete</th>\n",
" <th>iata_approved</th>\n",
" <th>company_rating</th>\n",
" <th>review_scores_rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>100.0</td>\n",
" <td>97.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>100.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>100.0</td>\n",
" <td>92.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3.0</td>\n",
" <td>5</td>\n",
" <td>3.0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>100.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>67.0</td>\n",
" <td>90.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" engines passenger_capacity crew d_check_complete \\\n",
"0 1.0 2 1.0 False \n",
"1 1.0 2 1.0 False \n",
"2 1.0 2 1.0 False \n",
"3 3.0 5 3.0 False \n",
"4 1.0 2 1.0 True \n",
"\n",
" moon_clearance_complete iata_approved company_rating \\\n",
"0 False False 100.0 \n",
"1 False False 100.0 \n",
"2 False False 100.0 \n",
"3 False False 100.0 \n",
"4 False False 67.0 \n",
"\n",
" review_scores_rating \n",
"0 97.0 \n",
"1 98.0 \n",
"2 92.0 \n",
"3 98.0 \n",
"4 90.0 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a20d5ab0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1325.0\n",
"1 1325.0\n",
"2 1260.0\n",
"3 2196.0\n",
"4 1780.0\n",
"Name: price, dtype: float64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "107b428f",
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "edf51576",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(23814, 5954)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(X_train), len(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "95e183ec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(23814, 5954)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(y_train), len(y_test)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "7e982659",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "9bd33c88",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
],
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = LinearRegression()\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "914c9e8a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
],
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "0e5d17fb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1819.36118051, 1831.15053486, 1470.80678637, ..., 1725.91448542,\n",
" 3273.71348352, 5199.674609 ])"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "a0499d1d",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import r2_score"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "cb57dc0e",
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "a770979d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.4244296831243134"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r2_score(y_test, y_pred)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment