Skip to content

Instantly share code, notes, and snippets.

View L-Lewis's full-sized avatar

Laura Lewis L-Lewis

View GitHub Profile
from keras.models import Sequential
from keras.optimizers import Adam # Other optimisers are available
# Create the MLP and CNN models
mlp = create_mlp(trainAttrX.shape[1])
cnn = create_cnn(128, 128, 3)
# Create the input to the final set of layers as the output of both the MLP and CNN
combinedInput = concatenate([mlp.output, cnn.output])
from keras.layers import Flatten, Input, concatenate
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation, Dropout, Dense
from keras.layers.normalization import BatchNormalization
from keras.models import Model
def create_cnn(width, height, depth, filters=(16, 32, 64), regularizer=None):
"""
Creates a CNN with the given input dimension and filter numbers.
"""
from keras.models import Sequential
from keras.layers.core import Dense
def create_mlp(dim, regularizer=None):
"""Creates a simple two-layer MLP with inputs of the given dimension"""
model = Sequential()
model.add(Dense(8, input_dim=dim, activation="relu", kernel_regularizer=regularizer))
model.add(Dense(4, activation="relu", kernel_regularizer=regularizer))
return model
from sklearn.preprocessing import MinMaxScaler
def process_structured_data(df, train, test):
"""
Pre-processes the given dataframe by minmaxscaling the continuous features
(fit-transforming the training data and transforming the test data)
"""
continuous = ["population_per_hectare", "bicycle_aadf", "motor_vehicle_aadf"]
cs = MinMaxScaler()
trainX = cs.fit_transform(train[continuous])
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
# Getting the images and rescaling
image_folder = 'model3_images/'
image_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
image_folder, shuffle=False, class_mode='binary',
target_size=(128, 128), batch_size=20000)
images, labels = next(image_generator)
# Output: Found 20000 images belonging to 2 classes.
# Building the first model iteration
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64 ,64, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (4, 4), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
@L-Lewis
L-Lewis / nn-evaluation.py
Created May 16, 2019 20:15
Function for evaluating a neural network for regression
def nn_model_evaluation(model, skip_epochs=0, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test):
"""
For a given neural network model that has already been fit, prints for the train and tests sets the MSE and r squared
values, a line graph of the loss in each epoch, and a scatterplot of predicted vs. actual values with a line
representing where predicted = actual values. Optionally, a value for skip_epoch can be provided, which skips that
number of epochs in the line graph of losses (useful in cases where the loss in the first epoch is orders of magnitude
larger than subsequent epochs). Training and test sets can also optionally be specified.
"""
# MSE and r squared values
@L-Lewis
L-Lewis / three-layer-nn.py
Last active May 16, 2019 20:13
Building, compiling and visualising a three-layer neural network
from keras import models, layers, optimizers, regularizers
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG
# Building the model
nn2 = models.Sequential()
nn2.add(layers.Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
nn2.add(layers.Dense(256, activation='relu'))
nn2.add(layers.Dense(256, activation='relu'))
nn2.add(layers.Dense(1, activation='linear'))
@L-Lewis
L-Lewis / airbnb-xgboost.py
Last active May 17, 2019 10:24
Fitting and evaluating an XGBoost regression model for the Airbnb data
import xgboost as xgb
# Fitting the model
xgb_reg = xgb.XGBRegressor()
xgb_reg.fit(X_train, y_train)
training_preds_xgb_reg = xgb_reg.predict(X_train)
val_preds_xgb_reg = xgb_reg.predict(X_test)
# Printing the results
print(f"Time taken to run: {round((xgb_reg_end - xgb_reg_start)/60,1)} minutes")
@L-Lewis
L-Lewis / infrequent-amenities.py
Created May 16, 2019 14:41
Removing features for infrequent Airbnb amenities
# Produces a list of amenity features where one category (true or false) contains fewer than 10% of listings
infrequent_amenities = []
for col in df.iloc[:,41:].columns:
if df[col].sum() < len(df)/10:
infrequent_amenities.append(col)
print(infrequent_amenities)
# Dropping infrequent amenity features
df.drop(infrequent_amenities, axis=1, inplace=True)