-
-
Save dansileshi/21b52113ce0ecb6c0f56d6f7534bbaca to your computer and use it in GitHub Desktop.
Example of 3D convolutional network with TensorFlow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
FC_SIZE = 1024 | |
DTYPE = tf.float32 | |
def _weight_variable(name, shape): | |
return tf.get_variable(name, shape, DTYPE, tf.truncated_normal_initializer(stddev=0.1)) | |
def _bias_variable(name, shape): | |
return tf.get_variable(name, shape, DTYPE, tf.constant_initializer(0.1, dtype=DTYPE)) | |
def inference(boxes, dataconfig): | |
prev_layer = boxes | |
in_filters = dataconfig.num_props | |
with tf.variable_scope('conv1') as scope: | |
out_filters = 16 | |
kernel = _weight_variable('weights', [5, 5, 5, in_filters, out_filters]) | |
conv = tf.nn.conv3d(prev_layer, kernel, [1, 1, 1, 1, 1], padding='SAME') | |
biases = _bias_variable('biases', [out_filters]) | |
bias = tf.nn.bias_add(conv, biases) | |
conv1 = tf.nn.relu(bias, name=scope.name) | |
prev_layer = conv1 | |
in_filters = out_filters | |
pool1 = tf.nn.max_pool3d(prev_layer, ksize=[1, 3, 3, 3, 1], strides=[1, 2, 2, 2, 1], padding='SAME') | |
norm1 = pool1 # tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta = 0.75, name='norm1') | |
prev_layer = norm1 | |
with tf.variable_scope('conv2') as scope: | |
out_filters = 32 | |
kernel = _weight_variable('weights', [5, 5, 5, in_filters, out_filters]) | |
conv = tf.nn.conv3d(prev_layer, kernel, [1, 1, 1, 1, 1], padding='SAME') | |
biases = _bias_variable('biases', [out_filters]) | |
bias = tf.nn.bias_add(conv, biases) | |
conv2 = tf.nn.relu(bias, name=scope.name) | |
prev_layer = conv2 | |
in_filters = out_filters | |
# normalize prev_layer here | |
prev_layer = tf.nn.max_pool3d(prev_layer, ksize=[1, 3, 3, 3, 1], strides=[1, 2, 2, 2, 1], padding='SAME') | |
with tf.variable_scope('conv3_1') as scope: | |
out_filters = 64 | |
kernel = _weight_variable('weights', [5, 5, 5, in_filters, out_filters]) | |
conv = tf.nn.conv3d(prev_layer, kernel, [1, 1, 1, 1, 1], padding='SAME') | |
biases = _bias_variable('biases', [out_filters]) | |
bias = tf.nn.bias_add(conv, biases) | |
prev_layer = tf.nn.relu(bias, name=scope.name) | |
in_filters = out_filters | |
with tf.variable_scope('conv3_2') as scope: | |
out_filters = 64 | |
kernel = _weight_variable('weights', [5, 5, 5, in_filters, out_filters]) | |
conv = tf.nn.conv3d(prev_layer, kernel, [1, 1, 1, 1, 1], padding='SAME') | |
biases = _bias_variable('biases', [out_filters]) | |
bias = tf.nn.bias_add(conv, biases) | |
prev_layer = tf.nn.relu(bias, name=scope.name) | |
in_filters = out_filters | |
with tf.variable_scope('conv3_3') as scope: | |
out_filters = 32 | |
kernel = _weight_variable('weights', [5, 5, 5, in_filters, out_filters]) | |
conv = tf.nn.conv3d(prev_layer, kernel, [1, 1, 1, 1, 1], padding='SAME') | |
biases = _bias_variable('biases', [out_filters]) | |
bias = tf.nn.bias_add(conv, biases) | |
prev_layer = tf.nn.relu(bias, name=scope.name) | |
in_filters = out_filters | |
# normalize prev_layer here | |
prev_layer = tf.nn.max_pool3d(prev_layer, ksize=[1, 3, 3, 3, 1], strides=[1, 2, 2, 2, 1], padding='SAME') | |
with tf.variable_scope('local3') as scope: | |
dim = np.prod(prev_layer.get_shape().as_list()[1:]) | |
prev_layer_flat = tf.reshape(prev_layer, [-1, dim]) | |
weights = _weight_variable('weights', [dim, FC_SIZE]) | |
biases = _bias_variable('biases', [FC_SIZE]) | |
local3 = tf.nn.relu(tf.matmul(prev_layer_flat, weights) + biases, name=scope.name) | |
prev_layer = local3 | |
with tf.variable_scope('local4') as scope: | |
dim = np.prod(prev_layer.get_shape().as_list()[1:]) | |
prev_layer_flat = tf.reshape(prev_layer, [-1, dim]) | |
weights = _weight_variable('weights', [dim, FC_SIZE]) | |
biases = _bias_variable('biases', [FC_SIZE]) | |
local4 = tf.nn.relu(tf.matmul(prev_layer_flat, weights) + biases, name=scope.name) | |
prev_layer = local4 | |
with tf.variable_scope('softmax_linear') as scope: | |
dim = np.prod(prev_layer.get_shape().as_list()[1:]) | |
weights = _weight_variable('weights', [dim, dataconfig.num_classes]) | |
biases = _bias_variable('biases', [dataconfig.num_classes]) | |
softmax_linear = tf.add(tf.matmul(prev_layer, weights), biases, name=scope.name) | |
return softmax_linear | |
def loss(logits, labels): | |
cross_entropy = tf.nn.softmax_cross_entropy_with_logits( | |
logits, labels, name='cross_entropy_per_example') | |
return tf.reduce_mean(cross_entropy, name='xentropy_mean') |
Hi dansileshi, I am also interested in what @rceballos98 is asking. I am also interested in running a similar kind of classification. @rceballos98, what are you using the demo for, sounds cool! I'm just trying to see if I can do better than just 2D image classification by taking advantage of point cloud data.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi dansileshi. I was wondering if you had the rest of the code that you used to make this run. I'm trying to adapt this into a demo 3D CNN that will classify weather there is a sphere or a cube in a set of synthetic 3D images I made. Specifically, I'm wondering what trainer you used and how to connect the inference and loss to the trainer and run it on a 4D matrix containing the 3D images and an array of labels.
Thanks!