nervetumer/alexnet_trans.py

## alexnet_trans.py
#!/usr/bin/env python
# ----------------------------------------------------------------------------
# Copyright 2015 Nervana Systems Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------
"""
Caffenet implementation:
An Alexnet like model adapted to neon
See:
    http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel

To run the complete training for 60 epochs
    alexnet_neon.py -e 60 -eval 1 -s <save-path> -w <path-to-saved-batches>

To load a pretrained model and run it on the validation set:
    alexnet_neon.py -w <path-to-saved-batches> --test_only \
            --model_file <saved weights file>
"""

from neon.util.argparser import NeonArgparser
from neon.initializers import Constant, Gaussian
from neon.layers import Conv, Dropout, Pooling, GeneralizedCost, Affine, LRN
from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule
from neon.transforms import Rectlin, Softmax, CrossEntropyMulti, TopKMisclassification
from neon.models import Model
from neon.data import ImageLoader
from neon.callbacks.callbacks import Callbacks

# parse the command line arguments (generates the backend)
parser = NeonArgparser(__doc__)
parser.add_argument('--subset_pct', type=float, default=100,
                    help='subset of training dataset to use (percentage)')
parser.add_argument('--test_only', action='store_true',
                    help='skip fitting - evaluate metrics on trained model weights')
args = parser.parse_args()

if args.test_only:
    if args.model_file is None:
        raise ValueError('To test model, trained weights need to be provided')

# setup data provider
img_set_options = dict(repo_dir=args.data_dir,
                       inner_size=224,
                       subset_pct=args.subset_pct)
train = ImageLoader(set_name='train', scale_range=(256, 384),
                    shuffle=True, **img_set_options)
test = ImageLoader(set_name='validation', scale_range=(256, 256),
                   do_transforms=False, **img_set_options)

init_g1 = Gaussian(scale=0.01)
init_g2 = Gaussian(scale=0.005)

relu = Rectlin()

layers = []
layers.append(Conv((11, 11, 96), padding=0, strides=4,
                   init=init_g1, bias=Constant(0), activation=relu, name='conv1'))

layers.append(Pooling(3, strides=2, name='pool1'))
layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm1'))
layers.append(Conv((5, 5, 256), padding=2, init=init_g1,
                    bias=Constant(1.0), activation=relu, name='conv2'))

layers.append(Pooling(3, strides=2, name='pool2'))
layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm2'))
layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(0),
                    activation=relu, name='conv3'))

layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(1.0),
                    activation=relu, name='conv4'))

layers.append(Conv((3, 3, 256), padding=1, init=init_g1, bias=Constant(1.0),
                    activation=relu, name='conv5'))

layers.append(Pooling(3, strides=2, name='pool5'))
layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0),
                      activation=relu, name='fc6'))

layers.append(Dropout(keep=0.5, name='drop6'))
layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0),
                      activation=relu, name='fc7'))

layers.append(Dropout(keep=0.5, name='drop7'))
# this layer is nout=10 now instead of 1000
layers.append(Affine(nout=10, init=init_g1, bias=Constant(0.0),
                      activation=Softmax(), name='fc8'))

# setup Alexnet like model with 10 output classes
model = Model(layers=layers)

# load the trained model params from the neon Model Zoo
from neon.util.persist import load_obj
model_dict = load_obj('alexnet_neon.p')

# load the model_dict into the container class ModelDesc
# to access some of the helper functions
from neon.util.modeldesc import ModelDescription
model_desc = ModelDescription(model_dict)

# go layer by layer and load up the weights

for layer in model.layers_to_optimize:
    name = layer.name
    # skip the last layer and its associated bias layer
    # use find to skip the bias layer (named fc8_bias)
    if name.find('fc8') > -1:
        continue
    print name
    # find this layer in the serialized file by name
    trained_layer = model_desc.getlayer(name)
    layer.load_weights(trained_layer)
	#!/usr/bin/env python
	# ----------------------------------------------------------------------------
	# Copyright 2015 Nervana Systems Inc.
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ----------------------------------------------------------------------------
	"""
	Caffenet implementation:
	An Alexnet like model adapted to neon
	See:
	http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel

	To run the complete training for 60 epochs
	alexnet_neon.py -e 60 -eval 1 -s <save-path> -w <path-to-saved-batches>

	To load a pretrained model and run it on the validation set:
	alexnet_neon.py -w <path-to-saved-batches> --test_only \
	--model_file <saved weights file>
	"""

	from neon.util.argparser import NeonArgparser
	from neon.initializers import Constant, Gaussian
	from neon.layers import Conv, Dropout, Pooling, GeneralizedCost, Affine, LRN
	from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule
	from neon.transforms import Rectlin, Softmax, CrossEntropyMulti, TopKMisclassification
	from neon.models import Model
	from neon.data import ImageLoader
	from neon.callbacks.callbacks import Callbacks

	# parse the command line arguments (generates the backend)
	parser = NeonArgparser(__doc__)
	parser.add_argument('--subset_pct', type=float, default=100,
	help='subset of training dataset to use (percentage)')
	parser.add_argument('--test_only', action='store_true',
	help='skip fitting - evaluate metrics on trained model weights')
	args = parser.parse_args()

	if args.test_only:
	if args.model_file is None:
	raise ValueError('To test model, trained weights need to be provided')

	# setup data provider
	img_set_options = dict(repo_dir=args.data_dir,
	inner_size=224,
	subset_pct=args.subset_pct)
	train = ImageLoader(set_name='train', scale_range=(256, 384),
	shuffle=True, **img_set_options)
	test = ImageLoader(set_name='validation', scale_range=(256, 256),
	do_transforms=False, **img_set_options)

	init_g1 = Gaussian(scale=0.01)
	init_g2 = Gaussian(scale=0.005)

	relu = Rectlin()

	layers = []
	layers.append(Conv((11, 11, 96), padding=0, strides=4,
	init=init_g1, bias=Constant(0), activation=relu, name='conv1'))

	layers.append(Pooling(3, strides=2, name='pool1'))
	layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm1'))
	layers.append(Conv((5, 5, 256), padding=2, init=init_g1,
	bias=Constant(1.0), activation=relu, name='conv2'))

	layers.append(Pooling(3, strides=2, name='pool2'))
	layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm2'))
	layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(0),
	activation=relu, name='conv3'))

	layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(1.0),
	activation=relu, name='conv4'))

	layers.append(Conv((3, 3, 256), padding=1, init=init_g1, bias=Constant(1.0),
	activation=relu, name='conv5'))

	layers.append(Pooling(3, strides=2, name='pool5'))
	layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0),
	activation=relu, name='fc6'))

	layers.append(Dropout(keep=0.5, name='drop6'))
	layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0),
	activation=relu, name='fc7'))

	layers.append(Dropout(keep=0.5, name='drop7'))
	# this layer is nout=10 now instead of 1000
	layers.append(Affine(nout=10, init=init_g1, bias=Constant(0.0),
	activation=Softmax(), name='fc8'))

	# setup Alexnet like model with 10 output classes
	model = Model(layers=layers)

	# load the trained model params from the neon Model Zoo
	from neon.util.persist import load_obj
	model_dict = load_obj('alexnet_neon.p')

	# load the model_dict into the container class ModelDesc
	# to access some of the helper functions
	from neon.util.modeldesc import ModelDescription
	model_desc = ModelDescription(model_dict)

	# go layer by layer and load up the weights

	for layer in model.layers_to_optimize:
	name = layer.name
	# skip the last layer and its associated bias layer
	# use find to skip the bias layer (named fc8_bias)
	if name.find('fc8') > -1:
	continue
	print name
	# find this layer in the serialized file by name
	trained_layer = model_desc.getlayer(name)
	layer.load_weights(trained_layer)