Skip to content

Instantly share code, notes, and snippets.

@rdelassus
Last active July 23, 2023 12:21
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save rdelassus/5b908efd07ae030a2650584e199ff25b to your computer and use it in GitHub Desktop.
Save rdelassus/5b908efd07ae030a2650584e199ff25b to your computer and use it in GitHub Desktop.
a segnet-like architecture for building detection in the spacenet dataset
#from __future__ import absolute_import
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Layer, Dense, Dropout, Activation, Flatten, Reshape, Merge, Permute
from keras.layers import ZeroPadding2D, UpSampling2D
from keras.layers.normalization import BatchNormalization
import sys
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
#from preprocessing.visualize_prepro import shiftedColorMap
import itertools
import tensorflow as tf
path = sys.argv[1]
# input image dimensions
img_rows, img_cols = 400, 400
# output image dimensions
label_rows, label_cols = 400, 400
with tf.device('/gpu:1'):
# we create two instances with the same arguments
img_data_gen_args = dict(
# featurewise_center=True,
# featurewise_std_normalization=True,
rescale=1. / 255,
rotation_range=90.,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2,
fill_mode="constant",
cval=0
)
label_data_gen_args = dict(
rotation_range=90.,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2,
fill_mode="constant",
cval=1
)
image_datagen = ImageDataGenerator(**img_data_gen_args)
mask_datagen = ImageDataGenerator(**label_data_gen_args)
# Provide the same seed and keyword arguments to the fit and flow methods
seed = 1
# image_datagen.fit(images, augment=True, seed=seed)
# mask_datagen.fit(masks, augment=True, seed=seed)
image_generator = image_datagen.flow_from_directory(
os.path.join(path, '3band/'),
target_size=(img_rows, img_cols),
class_mode=None,
batch_size=8,
shuffle=False,
seed=seed)
mask_generator = mask_datagen.flow_from_directory(
os.path.join(path, 'labels'),
target_size=(label_rows, label_cols),
class_mode=None,
batch_size=8,
shuffle=False,
color_mode='grayscale',
seed=seed)
# combine generators into one which yields image and masks
train_generator = itertools.izip(image_generator, mask_generator)
kernel = 3
filter_size = 64
pad = 1
pool_size = 2
model = Sequential()
model.add(Layer(input_shape=(img_rows, img_cols, 3)))
# encoding layers
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(filter_size, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(128, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(256, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(512, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
# decoding layers
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(512, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(UpSampling2D(size=(pool_size, pool_size)))
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(256, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(UpSampling2D(size=(pool_size, pool_size)))
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(128, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(UpSampling2D(size=(pool_size, pool_size)))
model.add(ZeroPadding2D(padding=(pad, pad)))
model.add(Convolution2D(filter_size, kernel, kernel, border_mode='valid'))
model.add(BatchNormalization())
model.add(Convolution2D(1, 1, 1, border_mode='valid',))
print model.output_shape
model.add(Reshape((label_rows * label_cols,)))
model.add(Activation('sigmoid'))
model.add(Reshape((label_rows, label_cols, 1)))
model.compile(loss="binary_crossentropy", optimizer='rmsprop',
metrics=['binary_accuracy'])
model.summary()
checkpointer = ModelCheckpoint(filepath="weights.hdf5", verbose=1, save_best_only=False)
model.fit_generator(
train_generator,
samples_per_epoch=1000,
nb_epoch=20,
callbacks=[checkpointer])
model.save('spacenetmodel2.h5')
@mnboos
Copy link

mnboos commented Jan 13, 2018

Hi

I'm currently doing something similar and because I'm new to the subject of machine learning, I've got a question regarding the masks:

If we want to identify buildings, we've got typically a single image with a lot of instances (the buildings). If the network then requests the mask(s) for a specific image, do I have to return a single image which consists of all the masks/buildings on this single image or do I have to return an image where only one building is masked at a time?

Thanks for your help!

@BBFelten
Copy link

Hello,
I would like to use your model for road detection. However, I get the following error from the Reshape function in line 128:
ValueError: total size of new array must be unchanged
The output of the print(model.output_shape) statement is (None, 0, 0, 1).
Do you know what the problem might be?
Thank you in advance!

@ankurshukla03
Copy link

Hi

I'm currently doing something similar and because I'm new to the subject of machine learning, I've got a question regarding the masks:

If we want to identify buildings, we've got typically a single image with a lot of instances (the buildings). If the network then requests the mask(s) for a specific image, do I have to return a single image which consists of all the masks/buildings on this single image or do I have to return an image where only one building is masked at a time?

Thanks for your help!

  • I think you should return a single image which consists all the masks/ buildings

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment