Laura Lewis L-Lewis

## traffic-model3-build-model.py
from keras.models import Sequential
from keras.optimizers import Adam # Other optimisers are available

# Create the MLP and CNN models
mlp = create_mlp(trainAttrX.shape[1])
cnn = create_cnn(128, 128, 3)

# Create the input to the final set of layers as the output of both the MLP and CNN
combinedInput = concatenate([mlp.output, cnn.output])

## traffic-model3-create-cnn.py
from keras.layers import Flatten, Input, concatenate
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation, Dropout, Dense
from keras.layers.normalization import BatchNormalization
from keras.models import Model

def create_cnn(width, height, depth, filters=(16, 32, 64), regularizer=None):
    """
    Creates a CNN with the given input dimension and filter numbers.
    """

## traffic-model3-create-mlp.py
from keras.models import Sequential
from keras.layers.core import Dense

def create_mlp(dim, regularizer=None):
    """Creates a simple two-layer MLP with inputs of the given dimension"""
    model = Sequential()
    model.add(Dense(8, input_dim=dim, activation="relu", kernel_regularizer=regularizer))
    model.add(Dense(4, activation="relu", kernel_regularizer=regularizer))
    return model

## traffic-model3-process-structured-data.py
from sklearn.preprocessing import MinMaxScaler

def process_structured_data(df, train, test):
    """
    Pre-processes the given dataframe by minmaxscaling the continuous features
    (fit-transforming the training data and transforming the test data)
    """
    continuous = ["population_per_hectare", "bicycle_aadf", "motor_vehicle_aadf"]
    cs = MinMaxScaler()
    trainX = cs.fit_transform(train[continuous])

## traffic-model3-preparing-data.py
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd

# Getting the images and rescaling
image_folder = 'model3_images/'
image_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
        image_folder, shuffle=False, class_mode='binary',
        target_size=(128, 128), batch_size=20000)
images, labels = next(image_generator)
# Output: Found 20000 images belonging to 2 classes.

## traffic-accidents-model2-v1.py
# Building the first model iteration
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64 ,64,  3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(32, (4, 4), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

## nn-evaluation.py
def nn_model_evaluation(model, skip_epochs=0, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test):
    """
    For a given neural network model that has already been fit, prints for the train and tests sets the MSE and r squared
    values, a line graph of the loss in each epoch, and a scatterplot of predicted vs. actual values with a line
    representing where predicted = actual values. Optionally, a value for skip_epoch can be provided, which skips that
    number of epochs in the line graph of losses (useful in cases where the loss in the first epoch is orders of magnitude
    larger than subsequent epochs). Training and test sets can also optionally be specified.
    """

    # MSE and r squared values

## three-layer-nn.py
from keras import models, layers, optimizers, regularizers
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG

# Building the model
nn2 = models.Sequential()
nn2.add(layers.Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
nn2.add(layers.Dense(256, activation='relu'))
nn2.add(layers.Dense(256, activation='relu'))
nn2.add(layers.Dense(1, activation='linear'))

## airbnb-xgboost.py
import xgboost as xgb

# Fitting the model
xgb_reg = xgb.XGBRegressor()
xgb_reg.fit(X_train, y_train)
training_preds_xgb_reg = xgb_reg.predict(X_train)
val_preds_xgb_reg = xgb_reg.predict(X_test)

# Printing the results
print(f"Time taken to run: {round((xgb_reg_end - xgb_reg_start)/60,1)} minutes")

## infrequent-amenities.py
# Produces a list of amenity features where one category (true or false) contains fewer than 10% of listings
infrequent_amenities = []
for col in df.iloc[:,41:].columns:
    if df[col].sum() < len(df)/10:
        infrequent_amenities.append(col)
print(infrequent_amenities)

# Dropping infrequent amenity features
df.drop(infrequent_amenities, axis=1, inplace=True)
	from keras.models import Sequential
	from keras.optimizers import Adam # Other optimisers are available

	# Create the MLP and CNN models
	mlp = create_mlp(trainAttrX.shape[1])
	cnn = create_cnn(128, 128, 3)

	# Create the input to the final set of layers as the output of both the MLP and CNN
	combinedInput = concatenate([mlp.output, cnn.output])
	from keras.layers import Flatten, Input, concatenate
	from keras.layers.convolutional import Conv2D, MaxPooling2D
	from keras.layers.core import Activation, Dropout, Dense
	from keras.layers.normalization import BatchNormalization
	from keras.models import Model

	def create_cnn(width, height, depth, filters=(16, 32, 64), regularizer=None):
	"""
	Creates a CNN with the given input dimension and filter numbers.
	"""
	from keras.models import Sequential
	from keras.layers.core import Dense

	def create_mlp(dim, regularizer=None):
	"""Creates a simple two-layer MLP with inputs of the given dimension"""
	model = Sequential()
	model.add(Dense(8, input_dim=dim, activation="relu", kernel_regularizer=regularizer))
	model.add(Dense(4, activation="relu", kernel_regularizer=regularizer))
	return model
	from sklearn.preprocessing import MinMaxScaler

	def process_structured_data(df, train, test):
	"""
	Pre-processes the given dataframe by minmaxscaling the continuous features
	(fit-transforming the training data and transforming the test data)
	"""
	continuous = ["population_per_hectare", "bicycle_aadf", "motor_vehicle_aadf"]
	cs = MinMaxScaler()
	trainX = cs.fit_transform(train[continuous])
	from keras.preprocessing.image import ImageDataGenerator
	import pandas as pd

	# Getting the images and rescaling
	image_folder = 'model3_images/'
	image_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
	image_folder, shuffle=False, class_mode='binary',
	target_size=(128, 128), batch_size=20000)
	images, labels = next(image_generator)
	# Output: Found 20000 images belonging to 2 classes.
	# Building the first model iteration
	model = models.Sequential()
	model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64 ,64, 3)))
	model.add(layers.MaxPooling2D((2, 2)))

	model.add(layers.Conv2D(32, (4, 4), activation='relu'))
	model.add(layers.MaxPooling2D((2, 2)))

	model.add(layers.Conv2D(64, (3, 3), activation='relu'))
	model.add(layers.MaxPooling2D((2, 2)))
	def nn_model_evaluation(model, skip_epochs=0, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test):
	"""
	For a given neural network model that has already been fit, prints for the train and tests sets the MSE and r squared
	values, a line graph of the loss in each epoch, and a scatterplot of predicted vs. actual values with a line
	representing where predicted = actual values. Optionally, a value for skip_epoch can be provided, which skips that
	number of epochs in the line graph of losses (useful in cases where the loss in the first epoch is orders of magnitude
	larger than subsequent epochs). Training and test sets can also optionally be specified.
	"""

	# MSE and r squared values
	from keras import models, layers, optimizers, regularizers
	from keras.utils.vis_utils import model_to_dot
	from IPython.display import SVG

	# Building the model
	nn2 = models.Sequential()
	nn2.add(layers.Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
	nn2.add(layers.Dense(256, activation='relu'))
	nn2.add(layers.Dense(256, activation='relu'))
	nn2.add(layers.Dense(1, activation='linear'))
	import xgboost as xgb

	# Fitting the model
	xgb_reg = xgb.XGBRegressor()
	xgb_reg.fit(X_train, y_train)
	training_preds_xgb_reg = xgb_reg.predict(X_train)
	val_preds_xgb_reg = xgb_reg.predict(X_test)

	# Printing the results
	print(f"Time taken to run: {round((xgb_reg_end - xgb_reg_start)/60,1)} minutes")
	# Produces a list of amenity features where one category (true or false) contains fewer than 10% of listings
	infrequent_amenities = []
	for col in df.iloc[:,41:].columns:
	if df[col].sum() < len(df)/10:
	infrequent_amenities.append(col)
	print(infrequent_amenities)

	# Dropping infrequent amenity features
	df.drop(infrequent_amenities, axis=1, inplace=True)