This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
amenities_list = list(df.amenities) | |
amenities_list_string = " ".join(amenities_list) | |
amenities_list_string = amenities_list_string.replace('{', '') | |
amenities_list_string = amenities_list_string.replace('}', ',') | |
amenities_list_string = amenities_list_string.replace('"', '') | |
amenities_set = [x.strip() for x in amenities_list_string.split(',')] | |
amenities_set = set(amenities_set) | |
amenities_set |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geopandas as gpd | |
# Importing the London borough boundary GeoJSON file as a dataframe in geopandas | |
map_df = gpd.read_file('data/neighbourhoods.geojson') | |
# Creating a dataframe of listing counts and median price by borough | |
borough_df = pd.DataFrame(df.groupby('borough').size()) | |
borough_df.rename(columns={0: 'number_of_listings'}, inplace=True) | |
borough_df['median_price'] = df.groupby('borough').price.median().values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Produces a list of amenity features where one category (true or false) contains fewer than 10% of listings | |
infrequent_amenities = [] | |
for col in df.iloc[:,41:].columns: | |
if df[col].sum() < len(df)/10: | |
infrequent_amenities.append(col) | |
print(infrequent_amenities) | |
# Dropping infrequent amenity features | |
df.drop(infrequent_amenities, axis=1, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xgboost as xgb | |
# Fitting the model | |
xgb_reg = xgb.XGBRegressor() | |
xgb_reg.fit(X_train, y_train) | |
training_preds_xgb_reg = xgb_reg.predict(X_train) | |
val_preds_xgb_reg = xgb_reg.predict(X_test) | |
# Printing the results | |
print(f"Time taken to run: {round((xgb_reg_end - xgb_reg_start)/60,1)} minutes") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras import models, layers, optimizers, regularizers | |
from keras.utils.vis_utils import model_to_dot | |
from IPython.display import SVG | |
# Building the model | |
nn2 = models.Sequential() | |
nn2.add(layers.Dense(128, input_shape=(X_train.shape[1],), activation='relu')) | |
nn2.add(layers.Dense(256, activation='relu')) | |
nn2.add(layers.Dense(256, activation='relu')) | |
nn2.add(layers.Dense(1, activation='linear')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def nn_model_evaluation(model, skip_epochs=0, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test): | |
""" | |
For a given neural network model that has already been fit, prints for the train and tests sets the MSE and r squared | |
values, a line graph of the loss in each epoch, and a scatterplot of predicted vs. actual values with a line | |
representing where predicted = actual values. Optionally, a value for skip_epoch can be provided, which skips that | |
number of epochs in the line graph of losses (useful in cases where the loss in the first epoch is orders of magnitude | |
larger than subsequent epochs). Training and test sets can also optionally be specified. | |
""" | |
# MSE and r squared values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Building the first model iteration | |
model = models.Sequential() | |
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64 ,64, 3))) | |
model.add(layers.MaxPooling2D((2, 2))) | |
model.add(layers.Conv2D(32, (4, 4), activation='relu')) | |
model.add(layers.MaxPooling2D((2, 2))) | |
model.add(layers.Conv2D(64, (3, 3), activation='relu')) | |
model.add(layers.MaxPooling2D((2, 2))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.preprocessing.image import ImageDataGenerator | |
import pandas as pd | |
# Getting the images and rescaling | |
image_folder = 'model3_images/' | |
image_generator = ImageDataGenerator(rescale=1./255).flow_from_directory( | |
image_folder, shuffle=False, class_mode='binary', | |
target_size=(128, 128), batch_size=20000) | |
images, labels = next(image_generator) | |
# Output: Found 20000 images belonging to 2 classes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import MinMaxScaler | |
def process_structured_data(df, train, test): | |
""" | |
Pre-processes the given dataframe by minmaxscaling the continuous features | |
(fit-transforming the training data and transforming the test data) | |
""" | |
continuous = ["population_per_hectare", "bicycle_aadf", "motor_vehicle_aadf"] | |
cs = MinMaxScaler() | |
trainX = cs.fit_transform(train[continuous]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.models import Sequential | |
from keras.layers.core import Dense | |
def create_mlp(dim, regularizer=None): | |
"""Creates a simple two-layer MLP with inputs of the given dimension""" | |
model = Sequential() | |
model.add(Dense(8, input_dim=dim, activation="relu", kernel_regularizer=regularizer)) | |
model.add(Dense(4, activation="relu", kernel_regularizer=regularizer)) | |
return model |
OlderNewer