Skip to content

Instantly share code, notes, and snippets.

@nervetumer
Created May 10, 2016 04:02
Show Gist options
  • Save nervetumer/6c5777f31f0951bb2a1a54b10d9e3b42 to your computer and use it in GitHub Desktop.
Save nervetumer/6c5777f31f0951bb2a1a54b10d9e3b42 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# ----------------------------------------------------------------------------
# Copyright 2015 Nervana Systems Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------
"""
Caffenet implementation:
An Alexnet like model adapted to neon
See:
http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel
To run the complete training for 60 epochs
alexnet_neon.py -e 60 -eval 1 -s <save-path> -w <path-to-saved-batches>
To load a pretrained model and run it on the validation set:
alexnet_neon.py -w <path-to-saved-batches> --test_only \
--model_file <saved weights file>
"""
from neon.util.argparser import NeonArgparser
from neon.initializers import Constant, Gaussian
from neon.layers import Conv, Dropout, Pooling, GeneralizedCost, Affine, LRN
from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule
from neon.transforms import Rectlin, Softmax, CrossEntropyMulti, TopKMisclassification
from neon.models import Model
from neon.data import ImageLoader
from neon.callbacks.callbacks import Callbacks
# parse the command line arguments (generates the backend)
parser = NeonArgparser(__doc__)
parser.add_argument('--subset_pct', type=float, default=100,
help='subset of training dataset to use (percentage)')
parser.add_argument('--test_only', action='store_true',
help='skip fitting - evaluate metrics on trained model weights')
args = parser.parse_args()
if args.test_only:
if args.model_file is None:
raise ValueError('To test model, trained weights need to be provided')
# setup data provider
img_set_options = dict(repo_dir=args.data_dir,
inner_size=224,
subset_pct=args.subset_pct)
train = ImageLoader(set_name='train', scale_range=(256, 384),
shuffle=True, **img_set_options)
test = ImageLoader(set_name='validation', scale_range=(256, 256),
do_transforms=False, **img_set_options)
init_g1 = Gaussian(scale=0.01)
init_g2 = Gaussian(scale=0.005)
relu = Rectlin()
layers = []
layers.append(Conv((11, 11, 96), padding=0, strides=4,
init=init_g1, bias=Constant(0), activation=relu, name='conv1'))
layers.append(Pooling(3, strides=2, name='pool1'))
layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm1'))
layers.append(Conv((5, 5, 256), padding=2, init=init_g1,
bias=Constant(1.0), activation=relu, name='conv2'))
layers.append(Pooling(3, strides=2, name='pool2'))
layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm2'))
layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(0),
activation=relu, name='conv3'))
layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(1.0),
activation=relu, name='conv4'))
layers.append(Conv((3, 3, 256), padding=1, init=init_g1, bias=Constant(1.0),
activation=relu, name='conv5'))
layers.append(Pooling(3, strides=2, name='pool5'))
layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0),
activation=relu, name='fc6'))
layers.append(Dropout(keep=0.5, name='drop6'))
layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0),
activation=relu, name='fc7'))
layers.append(Dropout(keep=0.5, name='drop7'))
# this layer is nout=10 now instead of 1000
layers.append(Affine(nout=10, init=init_g1, bias=Constant(0.0),
activation=Softmax(), name='fc8'))
# setup Alexnet like model with 10 output classes
model = Model(layers=layers)
# load the trained model params from the neon Model Zoo
from neon.util.persist import load_obj
model_dict = load_obj('alexnet_neon.p')
# load the model_dict into the container class ModelDesc
# to access some of the helper functions
from neon.util.modeldesc import ModelDescription
model_desc = ModelDescription(model_dict)
# go layer by layer and load up the weights
for layer in model.layers_to_optimize:
name = layer.name
# skip the last layer and its associated bias layer
# use find to skip the bias layer (named fc8_bias)
if name.find('fc8') > -1:
continue
print name
# find this layer in the serialized file by name
trained_layer = model_desc.getlayer(name)
layer.load_weights(trained_layer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment