Skip to content

Instantly share code, notes, and snippets.

@susanli2016
Created October 5, 2018 03:42
Show Gist options
  • Save susanli2016/56f695fd59064a6f68f4f6b66fd9cb75 to your computer and use it in GitHub Desktop.
Save susanli2016/56f695fd59064a6f68f4f6b66fd9cb75 to your computer and use it in GitHub Desktop.
from sklearn.model_selection import train_test_split
train, test = train_test_split(taxi, test_size=0.3, random_state=42)
import numpy as np
import shutil
def distance_between(lat1, lon1, lat2, lon2):
# Haversine formula to compute distance
dist = np.degrees(np.arccos(np.sin(np.radians(lat1)) * np.sin(np.radians(lat2)) + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.cos(np.radians(lon2 - lon1)))) * 60 * 1.515 * 1.609344
return dist
def estimate_distance(df):
return distance_between(df['pickup_latitude'], df['pickup_longitude'], df['dropoff_latitude'], df['dropoff_longitude'])
def compute_rmse(actual, predicted):
return np.sqrt(np.mean((actual - predicted)**2))
def print_rmse(df, rate, name):
print("{1} RMSE = {0}".format(compute_rmse(df['fare_amount'], rate * estimate_distance(df)), name))
rate = train['fare_amount'].mean() / estimate_distance(train).mean()
print("Rate = ${0}/km".format(rate))
print_rmse(train, rate, 'Train')
print_rmse(test, rate, 'Test')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment