sourabhxyz/model6789.py

## model6789.py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import math
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
import xgboost
eps = 1e-6
def getMH (x):  # get normalised time from time
    x = datetime.fromtimestamp (x)
    y = x.hour + (x.minute) / 60 + x.second / 3600
    return y
def getDay (x):  # return weekday as an integer
    x = datetime.fromtimestamp (x)
    return x.weekday ()
def getEndTime (row_):
    st = row_['StartTime']
    len_ = row_['Len']
    endTime = st + (len_ * 15) / 3600
    while (endTime > 24 - eps):
        endTime = endTime - 24
    return endTime
def correlateTime (row_):
    st = row_['StartTime']
    len_ = row_['Len']
    return st / (len_ + 1)
def haverSine (row_):
    slat = row_['StartLat']
    slon = row_['StartLon']
    clat = row_['CutLat']
    clon = row_['CutLon']
    lonDiff = np.abs (clon - slon) * np.pi / 360
    latDiff = np.abs (clat - slat) * np.pi / 360
    a = np.sin (latDiff)**2 + np.cos (clat * np.pi / 180) * np.cos (slat * np.pi / 180) * np.sin(lonDiff)**2
    d = 2 * 6371 * np.arctan2 (np.sqrt(a), np.sqrt (1 - a))
    return d
def satisfy_ (row_, snap_):  # Assumption, each trip is less than 24 hours
    st = row_['StartTime']
    et = row_['EndTime']
    ret_ = False
    if (st < et):
        if (st < snap_ and snap_ < et):
            ret_ = True
    else:
        if ((snap_ >= st and snap_ <= 24) or (snap_ >= 0 and snap_ <= et)):
            ret_ = True
    return ret_
def getCutLonLat (row_, snap_):
    st = row_['StartTime']
    pos_ = math.ceil ((snap_ - st) / 15)
    pos_ = max (0, min (pos_, len (row_['POLYLINE']) - 1))
    at = row_['POLYLINE'][pos_]
    return (at[0], at[1])
def getDistance (row_, snap_):
    st = row_['StartTime']
    pos_ = math.ceil ((snap_ - st) / 15)
    pos_ = max (0, min (pos_, len (row_['POLYLINE']) - 1))
    return pos_ + 1

def Drop_ (df):
    df.drop ("TRIP_ID", axis = 1, inplace = True)
    df.drop ("TAXI_ID", axis = 1, inplace = True)
    df.drop ("DAY_TYPE", axis = 1, inplace = True)
    df.drop ("MISSING_DATA", axis = 1, inplace = True)
snaps = [18.0, 8.5, 17.75, 4.0, 14.5]
def getClosest (at_):
    dif_ = 1000
    ans_ = -1
    for i in range (len (snaps)):
        if (abs (snaps[i] - at_) < dif_):
            dif_ = abs (snaps[i] - at_)
            ans_ = i
    return ans_
test = pd.read_csv ('../input/test.csv')
train = pd.read_csv ('../input/train.csv')
sub1 = pd.DataFrame ()
sub2 = pd.DataFrame ()
sub1['TRIP_ID'] = test.TRIP_ID
sub2['TRIP_ID'] = test.TRIP_ID
sub3 = pd.DataFrame ()
sub4 = pd.DataFrame ()
sub3['TRIP_ID'] = test.TRIP_ID
sub4['TRIP_ID'] = test.TRIP_ID
Drop_ (test)
test['POLYLINE'] = test['POLYLINE'].apply(json.loads)
train['POLYLINE'] = train['POLYLINE'].apply(json.loads)
train['Len'] = train.POLYLINE.apply (lambda x : len (x) - 1)
test['Len'] = test.POLYLINE.apply (lambda x : len (x) - 1)
train = train[train['MISSING_DATA'] == False]   # removing insignificant data (as its amount is very low)
train = train[train['Len'] > 7]  # removing short trips
train = train[train['Len'] < 480] # removing long trips
Drop_ (train)
test['StartTime'] = test.TIMESTAMP.apply (getMH)
test['StartLon'] = test.POLYLINE.apply (lambda x : x[0][0])
test['StartLat'] = test.POLYLINE.apply (lambda x : x[0][1])
test['CutLon'] = test.POLYLINE.apply (lambda x : x[len (x) - 1][0])
test['CutLat'] = test.POLYLINE.apply (lambda x : x[len (x) - 1][1])
test['DiffLon'] = test['CutLon'] - test['StartLon']
test['DiffLat'] = test['CutLat'] - test['StartLat']
test['EndTime'] = test.apply (getEndTime, axis = 1)
heuristic = pd.DataFrame ()
heuristic['Guess'] = test.Len.apply (lambda x : x * 15 + 500)
heuristic['Len'] = test['Len']
test['StartTime'] = test.apply (correlateTime, axis = 1)
test['Distance'] = test.apply (haverSine, axis = 1)
test['WeekDay'] = test.TIMESTAMP.apply (getDay)
test.drop ("POLYLINE", axis = 1, inplace = True)
test.drop ("Len", axis = 1, inplace = True)
testEndTime = test.loc[:, 'EndTime']
test.drop ("EndTime", axis = 1, inplace = True)
test.drop ("TIMESTAMP", axis = 1, inplace = True)
print (test.head ())
train['StartTime'] = train.TIMESTAMP.apply (getMH)
train['StartLon'] = train.POLYLINE.apply (lambda x : x[0][0])
train['StartLat'] = train.POLYLINE.apply (lambda x : x[0][1])
train['EndTime'] = train.apply (getEndTime, axis = 1)

trainSets = [i for i in range (len (snaps))]
for i in range (len (snaps)):
    train['temp'] = train.apply (satisfy_, axis = 1, snap_ = snaps[i])
    trainSets[i] = train[train['temp'] == True]
    trainSets[i].drop ('temp', axis = 1, inplace = True)
    trainSets[i]['CutLonLat'] = trainSets[i].apply (getCutLonLat, axis = 1, snap_ = snaps[i])
    trainSets[i]['StartTime'] = train.apply (correlateTime, axis = 1)
    trainSets[i].drop ('EndTime', axis = 1, inplace = True)
    trainSets[i].drop ('POLYLINE', axis = 1, inplace = True)
    trainSets[i]['CutLon'] = trainSets[i].CutLonLat.apply (lambda x : x[0])
    trainSets[i]['CutLat'] = trainSets[i].CutLonLat.apply (lambda x : x[1])
    trainSets[i].drop ('CutLonLat', axis = 1, inplace = True)
    trainSets[i]['Len'] = trainSets[i].Len.apply (lambda x : x * 15)
    trainSets[i]['DiffLon'] = trainSets[i]['CutLon'] - trainSets[i]['StartLon']
    trainSets[i]['DiffLat'] = trainSets[i]['CutLat'] - trainSets[i]['StartLat']
    trainSets[i]['Distance'] = trainSets[i].apply (haverSine, axis = 1)
    trainSets[i]['WeekDay'] = train.TIMESTAMP.apply (getDay)
    trainSets[i].drop ("TIMESTAMP", axis = 1, inplace = True)
print (trainSets[0].head ())
newTrainSets = [i for i in range (15)]
for i in range (len (snaps)):  # for the five training sets, further dividing them
    newTrainSets[i * 3] = trainSets[i][trainSets[i]['CALL_TYPE'] == 'A']
    newTrainSets[i * 3].drop ('CALL_TYPE', axis = 1, inplace = True)
    newTrainSets[i * 3].drop ('ORIGIN_STAND', axis = 1, inplace = True)
    newTrainSets[i * 3 + 1] = trainSets[i][trainSets[i]['CALL_TYPE'] == 'B']
    newTrainSets[i * 3 + 1].drop ('CALL_TYPE', axis = 1, inplace = True)
    newTrainSets[i * 3 + 1].drop ('ORIGIN_CALL', axis = 1, inplace = True)
    newTrainSets[i * 3 + 2] = trainSets[i][trainSets[i]['CALL_TYPE'] == 'C']
    newTrainSets[i * 3 + 2].drop ('CALL_TYPE', axis = 1, inplace = True)
    newTrainSets[i * 3 + 2].drop ('ORIGIN_CALL', axis = 1, inplace = True)
    newTrainSets[i * 3 + 2].drop ('ORIGIN_STAND', axis = 1, inplace = True)
    # print ("----")
    # print (newTrainSets[i * 3].head ())
    # print (newTrainSets[i * 3 + 1].head ())
    # print (newTrainSets[i * 3 + 2].head ())
models = [i for i in range (15)]
for i in range (15):
    models[i] = xgboost.XGBRegressor ()
    #models[i] = RandomForestRegressor (n_estimators = 10, max_depth = 7, random_state = 0)
    if (i % 3 != 2):
        X = newTrainSets[i].iloc[:, [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]].values
        y = newTrainSets[i].iloc[:, 1].values
    else:
        X = newTrainSets[i].iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9]].values
        y = newTrainSets[i].iloc[:, 0].values
    models[i].fit (X, y)
out_ = [i for i in range (test.shape[0])]
for i in range (test.shape[0]):
    at = test.iloc[i, 0]  #.values #.reshape (1, -1)
    offset_ = 0
    if (at == 'A'):
        at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
    elif (at == 'B'):
        at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 1
    else:
        at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 2
    index_ = getClosest (testEndTime[i])
    index_ = index_ * 3 + offset_
    out_[i] = models[index_].predict (at)
    out_[i] = out_[i][0]
sub1['TRAVEL_TIME'] = pd.Series (out_)
sub1.to_csv ('xgBoostWithoutHeu.csv', index = False)

for i in range (test.shape[0]):
    at = test.iloc[i, 0]  #.values #.reshape (1, -1)
    offset_ = 0
    if (at == 'A'):
        at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
    elif (at == 'B'):
        at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 1
    else:
        at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 2
    index_ = getClosest (testEndTime[i])
    index_ = index_ * 3 + offset_
    out_[i] = models[index_].predict (at)
    out_[i] = out_[i][0]
    if (out_[i] > 5000):
        out_[i] = heuristic.iloc[i, 0]
    elif (heuristic.iloc[i, 1] < 27):
        out_[i] = heuristic.iloc[i, 0]
sub3['TRAVEL_TIME'] = pd.Series (out_)
sub3.to_csv ('xgBoostWithHeu.csv', index = False)

for i in range (len (snaps)):
    newTrainSets[i * 3 + 1].drop (index = newTrainSets[i * 3 + 1].loc[newTrainSets[i * 3 + 1]['ORIGIN_STAND'].isnull ()].index, inplace = True)
models = [i for i in range (15)]
for i in range (15):
    #models[i] = xgboost.XGBRegressor ()
    models[i] = RandomForestRegressor (n_estimators = 10, max_depth = 7, random_state = 0)
    if (i % 3 != 2):
        X = newTrainSets[i].iloc[:, [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]].values
        y = newTrainSets[i].iloc[:, 1].values
    else:
        X = newTrainSets[i].iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9]].values
        y = newTrainSets[i].iloc[:, 0].values
    models[i].fit (X, y)
out_ = [i for i in range (test.shape[0])]
for i in range (test.shape[0]):
    at = test.iloc[i, 0]  #.values #.reshape (1, -1)
    offset_ = 0
    if (at == 'A'):
        at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
    elif (at == 'B'):
        at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 1
    else:
        at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 2
    index_ = getClosest (testEndTime[i])
    index_ = index_ * 3 + offset_
    out_[i] = models[index_].predict (at)
    out_[i] = out_[i][0]
sub2['TRAVEL_TIME'] = pd.Series (out_)
sub2.to_csv ('RForestWithoutHeu.csv', index = False)
for i in range (test.shape[0]):
    at = test.iloc[i, 0]  #.values #.reshape (1, -1)
    offset_ = 0
    if (at == 'A'):
        at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
    elif (at == 'B'):
        at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 1
    else:
        at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
        offset_ = 2
    index_ = getClosest (testEndTime[i])
    index_ = index_ * 3 + offset_
    out_[i] = models[index_].predict (at)
    out_[i] = out_[i][0]
    if (out_[i] > 5000):
        out_[i] = heuristic.iloc[i, 0]
    elif (heuristic.iloc[i, 1] < 27):
        out_[i] = heuristic.iloc[i, 0]
sub4['TRAVEL_TIME'] = pd.Series (out_)
sub4.to_csv ('RForestWithHeu.csv', index = False)
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	import json
	import math
	from datetime import datetime
	from sklearn.ensemble import RandomForestRegressor
	import xgboost
	eps = 1e-6
	def getMH (x): # get normalised time from time
	x = datetime.fromtimestamp (x)
	y = x.hour + (x.minute) / 60 + x.second / 3600
	return y
	def getDay (x): # return weekday as an integer
	x = datetime.fromtimestamp (x)
	return x.weekday ()
	def getEndTime (row_):
	st = row_['StartTime']
	len_ = row_['Len']
	endTime = st + (len_ * 15) / 3600
	while (endTime > 24 - eps):
	endTime = endTime - 24
	return endTime
	def correlateTime (row_):
	st = row_['StartTime']
	len_ = row_['Len']
	return st / (len_ + 1)
	def haverSine (row_):
	slat = row_['StartLat']
	slon = row_['StartLon']
	clat = row_['CutLat']
	clon = row_['CutLon']
	lonDiff = np.abs (clon - slon) * np.pi / 360
	latDiff = np.abs (clat - slat) * np.pi / 360
	a = np.sin (latDiff)*2 + np.cos (clat np.pi / 180) * np.cos (slat * np.pi / 180) * np.sin(lonDiff)**2
	d = 2 * 6371 * np.arctan2 (np.sqrt(a), np.sqrt (1 - a))
	return d
	def satisfy_ (row_, snap_): # Assumption, each trip is less than 24 hours
	st = row_['StartTime']
	et = row_['EndTime']
	ret_ = False
	if (st < et):
	if (st < snap_ and snap_ < et):
	ret_ = True
	else:
	if ((snap_ >= st and snap_ <= 24) or (snap_ >= 0 and snap_ <= et)):
	ret_ = True
	return ret_
	def getCutLonLat (row_, snap_):
	st = row_['StartTime']
	pos_ = math.ceil ((snap_ - st) / 15)
	pos_ = max (0, min (pos_, len (row_['POLYLINE']) - 1))
	at = row_['POLYLINE'][pos_]
	return (at[0], at[1])
	def getDistance (row_, snap_):
	st = row_['StartTime']
	pos_ = math.ceil ((snap_ - st) / 15)
	pos_ = max (0, min (pos_, len (row_['POLYLINE']) - 1))
	return pos_ + 1

	def Drop_ (df):
	df.drop ("TRIP_ID", axis = 1, inplace = True)
	df.drop ("TAXI_ID", axis = 1, inplace = True)
	df.drop ("DAY_TYPE", axis = 1, inplace = True)
	df.drop ("MISSING_DATA", axis = 1, inplace = True)
	snaps = [18.0, 8.5, 17.75, 4.0, 14.5]
	def getClosest (at_):
	dif_ = 1000
	ans_ = -1
	for i in range (len (snaps)):
	if (abs (snaps[i] - at_) < dif_):
	dif_ = abs (snaps[i] - at_)
	ans_ = i
	return ans_
	test = pd.read_csv ('../input/test.csv')
	train = pd.read_csv ('../input/train.csv')
	sub1 = pd.DataFrame ()
	sub2 = pd.DataFrame ()
	sub1['TRIP_ID'] = test.TRIP_ID
	sub2['TRIP_ID'] = test.TRIP_ID
	sub3 = pd.DataFrame ()
	sub4 = pd.DataFrame ()
	sub3['TRIP_ID'] = test.TRIP_ID
	sub4['TRIP_ID'] = test.TRIP_ID
	Drop_ (test)
	test['POLYLINE'] = test['POLYLINE'].apply(json.loads)
	train['POLYLINE'] = train['POLYLINE'].apply(json.loads)
	train['Len'] = train.POLYLINE.apply (lambda x : len (x) - 1)
	test['Len'] = test.POLYLINE.apply (lambda x : len (x) - 1)
	train = train[train['MISSING_DATA'] == False] # removing insignificant data (as its amount is very low)
	train = train[train['Len'] > 7] # removing short trips
	train = train[train['Len'] < 480] # removing long trips
	Drop_ (train)
	test['StartTime'] = test.TIMESTAMP.apply (getMH)
	test['StartLon'] = test.POLYLINE.apply (lambda x : x[0][0])
	test['StartLat'] = test.POLYLINE.apply (lambda x : x[0][1])
	test['CutLon'] = test.POLYLINE.apply (lambda x : x[len (x) - 1][0])
	test['CutLat'] = test.POLYLINE.apply (lambda x : x[len (x) - 1][1])
	test['DiffLon'] = test['CutLon'] - test['StartLon']
	test['DiffLat'] = test['CutLat'] - test['StartLat']
	test['EndTime'] = test.apply (getEndTime, axis = 1)
	heuristic = pd.DataFrame ()
	heuristic['Guess'] = test.Len.apply (lambda x : x * 15 + 500)
	heuristic['Len'] = test['Len']
	test['StartTime'] = test.apply (correlateTime, axis = 1)
	test['Distance'] = test.apply (haverSine, axis = 1)
	test['WeekDay'] = test.TIMESTAMP.apply (getDay)
	test.drop ("POLYLINE", axis = 1, inplace = True)
	test.drop ("Len", axis = 1, inplace = True)
	testEndTime = test.loc[:, 'EndTime']
	test.drop ("EndTime", axis = 1, inplace = True)
	test.drop ("TIMESTAMP", axis = 1, inplace = True)
	print (test.head ())
	train['StartTime'] = train.TIMESTAMP.apply (getMH)
	train['StartLon'] = train.POLYLINE.apply (lambda x : x[0][0])
	train['StartLat'] = train.POLYLINE.apply (lambda x : x[0][1])
	train['EndTime'] = train.apply (getEndTime, axis = 1)

	trainSets = [i for i in range (len (snaps))]
	for i in range (len (snaps)):
	train['temp'] = train.apply (satisfy_, axis = 1, snap_ = snaps[i])
	trainSets[i] = train[train['temp'] == True]
	trainSets[i].drop ('temp', axis = 1, inplace = True)
	trainSets[i]['CutLonLat'] = trainSets[i].apply (getCutLonLat, axis = 1, snap_ = snaps[i])
	trainSets[i]['StartTime'] = train.apply (correlateTime, axis = 1)
	trainSets[i].drop ('EndTime', axis = 1, inplace = True)
	trainSets[i].drop ('POLYLINE', axis = 1, inplace = True)
	trainSets[i]['CutLon'] = trainSets[i].CutLonLat.apply (lambda x : x[0])
	trainSets[i]['CutLat'] = trainSets[i].CutLonLat.apply (lambda x : x[1])
	trainSets[i].drop ('CutLonLat', axis = 1, inplace = True)
	trainSets[i]['Len'] = trainSets[i].Len.apply (lambda x : x * 15)
	trainSets[i]['DiffLon'] = trainSets[i]['CutLon'] - trainSets[i]['StartLon']
	trainSets[i]['DiffLat'] = trainSets[i]['CutLat'] - trainSets[i]['StartLat']
	trainSets[i]['Distance'] = trainSets[i].apply (haverSine, axis = 1)
	trainSets[i]['WeekDay'] = train.TIMESTAMP.apply (getDay)
	trainSets[i].drop ("TIMESTAMP", axis = 1, inplace = True)
	print (trainSets[0].head ())
	newTrainSets = [i for i in range (15)]
	for i in range (len (snaps)): # for the five training sets, further dividing them
	newTrainSets[i * 3] = trainSets[i][trainSets[i]['CALL_TYPE'] == 'A']
	newTrainSets[i * 3].drop ('CALL_TYPE', axis = 1, inplace = True)
	newTrainSets[i * 3].drop ('ORIGIN_STAND', axis = 1, inplace = True)
	newTrainSets[i * 3 + 1] = trainSets[i][trainSets[i]['CALL_TYPE'] == 'B']
	newTrainSets[i * 3 + 1].drop ('CALL_TYPE', axis = 1, inplace = True)
	newTrainSets[i * 3 + 1].drop ('ORIGIN_CALL', axis = 1, inplace = True)
	newTrainSets[i * 3 + 2] = trainSets[i][trainSets[i]['CALL_TYPE'] == 'C']
	newTrainSets[i * 3 + 2].drop ('CALL_TYPE', axis = 1, inplace = True)
	newTrainSets[i * 3 + 2].drop ('ORIGIN_CALL', axis = 1, inplace = True)
	newTrainSets[i * 3 + 2].drop ('ORIGIN_STAND', axis = 1, inplace = True)
	# print ("----")
	# print (newTrainSets[i * 3].head ())
	# print (newTrainSets[i * 3 + 1].head ())
	# print (newTrainSets[i * 3 + 2].head ())
	models = [i for i in range (15)]
	for i in range (15):
	models[i] = xgboost.XGBRegressor ()
	#models[i] = RandomForestRegressor (n_estimators = 10, max_depth = 7, random_state = 0)
	if (i % 3 != 2):
	X = newTrainSets[i].iloc[:, [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]].values
	y = newTrainSets[i].iloc[:, 1].values
	else:
	X = newTrainSets[i].iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9]].values
	y = newTrainSets[i].iloc[:, 0].values
	models[i].fit (X, y)
	out_ = [i for i in range (test.shape[0])]
	for i in range (test.shape[0]):
	at = test.iloc[i, 0] #.values #.reshape (1, -1)
	offset_ = 0
	if (at == 'A'):
	at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	elif (at == 'B'):
	at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 1
	else:
	at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 2
	index_ = getClosest (testEndTime[i])
	index_ = index_ * 3 + offset_
	out_[i] = models[index_].predict (at)
	out_[i] = out_[i][0]
	sub1['TRAVEL_TIME'] = pd.Series (out_)
	sub1.to_csv ('xgBoostWithoutHeu.csv', index = False)

	for i in range (test.shape[0]):
	at = test.iloc[i, 0] #.values #.reshape (1, -1)
	offset_ = 0
	if (at == 'A'):
	at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	elif (at == 'B'):
	at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 1
	else:
	at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 2
	index_ = getClosest (testEndTime[i])
	index_ = index_ * 3 + offset_
	out_[i] = models[index_].predict (at)
	out_[i] = out_[i][0]
	if (out_[i] > 5000):
	out_[i] = heuristic.iloc[i, 0]
	elif (heuristic.iloc[i, 1] < 27):
	out_[i] = heuristic.iloc[i, 0]
	sub3['TRAVEL_TIME'] = pd.Series (out_)
	sub3.to_csv ('xgBoostWithHeu.csv', index = False)

	for i in range (len (snaps)):
	newTrainSets[i * 3 + 1].drop (index = newTrainSets[i * 3 + 1].loc[newTrainSets[i * 3 + 1]['ORIGIN_STAND'].isnull ()].index, inplace = True)
	models = [i for i in range (15)]
	for i in range (15):
	#models[i] = xgboost.XGBRegressor ()
	models[i] = RandomForestRegressor (n_estimators = 10, max_depth = 7, random_state = 0)
	if (i % 3 != 2):
	X = newTrainSets[i].iloc[:, [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]].values
	y = newTrainSets[i].iloc[:, 1].values
	else:
	X = newTrainSets[i].iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9]].values
	y = newTrainSets[i].iloc[:, 0].values
	models[i].fit (X, y)
	out_ = [i for i in range (test.shape[0])]
	for i in range (test.shape[0]):
	at = test.iloc[i, 0] #.values #.reshape (1, -1)
	offset_ = 0
	if (at == 'A'):
	at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	elif (at == 'B'):
	at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 1
	else:
	at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 2
	index_ = getClosest (testEndTime[i])
	index_ = index_ * 3 + offset_
	out_[i] = models[index_].predict (at)
	out_[i] = out_[i][0]
	sub2['TRAVEL_TIME'] = pd.Series (out_)
	sub2.to_csv ('RForestWithoutHeu.csv', index = False)
	for i in range (test.shape[0]):
	at = test.iloc[i, 0] #.values #.reshape (1, -1)
	offset_ = 0
	if (at == 'A'):
	at = test.iloc[i, [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	elif (at == 'B'):
	at = test.iloc[i, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 1
	else:
	at = test.iloc[i, [3, 4, 5, 6, 7, 8, 9, 10, 11]].values.reshape (1, -1)
	offset_ = 2
	index_ = getClosest (testEndTime[i])
	index_ = index_ * 3 + offset_
	out_[i] = models[index_].predict (at)
	out_[i] = out_[i][0]
	if (out_[i] > 5000):
	out_[i] = heuristic.iloc[i, 0]
	elif (heuristic.iloc[i, 1] < 27):
	out_[i] = heuristic.iloc[i, 0]
	sub4['TRAVEL_TIME'] = pd.Series (out_)
	sub4.to_csv ('RForestWithHeu.csv', index = False)