Created
May 10, 2016 04:02
-
-
Save nervetumer/6c5777f31f0951bb2a1a54b10d9e3b42 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# ---------------------------------------------------------------------------- | |
# Copyright 2015 Nervana Systems Inc. | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ---------------------------------------------------------------------------- | |
""" | |
Caffenet implementation: | |
An Alexnet like model adapted to neon | |
See: | |
http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel | |
To run the complete training for 60 epochs | |
alexnet_neon.py -e 60 -eval 1 -s <save-path> -w <path-to-saved-batches> | |
To load a pretrained model and run it on the validation set: | |
alexnet_neon.py -w <path-to-saved-batches> --test_only \ | |
--model_file <saved weights file> | |
""" | |
from neon.util.argparser import NeonArgparser | |
from neon.initializers import Constant, Gaussian | |
from neon.layers import Conv, Dropout, Pooling, GeneralizedCost, Affine, LRN | |
from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule | |
from neon.transforms import Rectlin, Softmax, CrossEntropyMulti, TopKMisclassification | |
from neon.models import Model | |
from neon.data import ImageLoader | |
from neon.callbacks.callbacks import Callbacks | |
# parse the command line arguments (generates the backend) | |
parser = NeonArgparser(__doc__) | |
parser.add_argument('--subset_pct', type=float, default=100, | |
help='subset of training dataset to use (percentage)') | |
parser.add_argument('--test_only', action='store_true', | |
help='skip fitting - evaluate metrics on trained model weights') | |
args = parser.parse_args() | |
if args.test_only: | |
if args.model_file is None: | |
raise ValueError('To test model, trained weights need to be provided') | |
# setup data provider | |
img_set_options = dict(repo_dir=args.data_dir, | |
inner_size=224, | |
subset_pct=args.subset_pct) | |
train = ImageLoader(set_name='train', scale_range=(256, 384), | |
shuffle=True, **img_set_options) | |
test = ImageLoader(set_name='validation', scale_range=(256, 256), | |
do_transforms=False, **img_set_options) | |
init_g1 = Gaussian(scale=0.01) | |
init_g2 = Gaussian(scale=0.005) | |
relu = Rectlin() | |
layers = [] | |
layers.append(Conv((11, 11, 96), padding=0, strides=4, | |
init=init_g1, bias=Constant(0), activation=relu, name='conv1')) | |
layers.append(Pooling(3, strides=2, name='pool1')) | |
layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm1')) | |
layers.append(Conv((5, 5, 256), padding=2, init=init_g1, | |
bias=Constant(1.0), activation=relu, name='conv2')) | |
layers.append(Pooling(3, strides=2, name='pool2')) | |
layers.append(LRN(5, ascale=0.0001, bpower=0.75, name='norm2')) | |
layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(0), | |
activation=relu, name='conv3')) | |
layers.append(Conv((3, 3, 384), padding=1, init=init_g1, bias=Constant(1.0), | |
activation=relu, name='conv4')) | |
layers.append(Conv((3, 3, 256), padding=1, init=init_g1, bias=Constant(1.0), | |
activation=relu, name='conv5')) | |
layers.append(Pooling(3, strides=2, name='pool5')) | |
layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0), | |
activation=relu, name='fc6')) | |
layers.append(Dropout(keep=0.5, name='drop6')) | |
layers.append(Affine(nout=4096, init=init_g2, bias=Constant(1.0), | |
activation=relu, name='fc7')) | |
layers.append(Dropout(keep=0.5, name='drop7')) | |
# this layer is nout=10 now instead of 1000 | |
layers.append(Affine(nout=10, init=init_g1, bias=Constant(0.0), | |
activation=Softmax(), name='fc8')) | |
# setup Alexnet like model with 10 output classes | |
model = Model(layers=layers) | |
# load the trained model params from the neon Model Zoo | |
from neon.util.persist import load_obj | |
model_dict = load_obj('alexnet_neon.p') | |
# load the model_dict into the container class ModelDesc | |
# to access some of the helper functions | |
from neon.util.modeldesc import ModelDescription | |
model_desc = ModelDescription(model_dict) | |
# go layer by layer and load up the weights | |
for layer in model.layers_to_optimize: | |
name = layer.name | |
# skip the last layer and its associated bias layer | |
# use find to skip the bias layer (named fc8_bias) | |
if name.find('fc8') > -1: | |
continue | |
print name | |
# find this layer in the serialized file by name | |
trained_layer = model_desc.getlayer(name) | |
layer.load_weights(trained_layer) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment