analyticsindiamagazine/CreatingTabularList.py

## CreatingTabularList.py
#Defining the keyword arguments for fastai's TabularList

#Path / default location for saving/loading models
path = ''

#The dependent variable/target
dep_var = 'Price'

#The list of categorical features in the dataset
cat_names = ['Brand', 'Model', 'Location', 'Fuel_Type', 'Transmission', 'Owner_Type']

#The list of continuous features in the dataset
#Exclude the Dependent variable 'Price'
cont_names =['Year', 'Kilometers_Driven', 'Mileage', 'Engine', 'Power', 'Seats', 'New_Price']

#List of Processes/transforms to be applied to the dataset
procs = [FillMissing, Categorify, Normalize]

#Start index for creating a validation set from train_data
start_indx = len(train_data) - int(len(train_data) * 0.2)

#End index for creating a validation set from train_data
end_indx = len(train_data)


#TabularList for Validation
val = (TabularList.from_df(train_data.iloc[start_indx:end_indx].copy(), path=path, cat_names=cat_names, cont_names=cont_names))

test = (TabularList.from_df(test_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs))

#TabularList for training
data = (TabularList.from_df(train_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(start_indx,end_indx)))
                           .label_from_df(cols=dep_var)
                           .add_test(test)
                           .databunch())
	#Defining the keyword arguments for fastai's TabularList

	#Path / default location for saving/loading models
	path = ''

	#The dependent variable/target
	dep_var = 'Price'

	#The list of categorical features in the dataset
	cat_names = ['Brand', 'Model', 'Location', 'Fuel_Type', 'Transmission', 'Owner_Type']

	#The list of continuous features in the dataset
	#Exclude the Dependent variable 'Price'
	cont_names =['Year', 'Kilometers_Driven', 'Mileage', 'Engine', 'Power', 'Seats', 'New_Price']

	#List of Processes/transforms to be applied to the dataset
	procs = [FillMissing, Categorify, Normalize]

	#Start index for creating a validation set from train_data
	start_indx = len(train_data) - int(len(train_data) * 0.2)

	#End index for creating a validation set from train_data
	end_indx = len(train_data)


	#TabularList for Validation
	val = (TabularList.from_df(train_data.iloc[start_indx:end_indx].copy(), path=path, cat_names=cat_names, cont_names=cont_names))

	test = (TabularList.from_df(test_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs))

	#TabularList for training
	data = (TabularList.from_df(train_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
	.split_by_idx(list(range(start_indx,end_indx)))
	.label_from_df(cols=dep_var)
	.add_test(test)
	.databunch())