nervetumer/googlenet_example_fix

## googlenet_example_fix
from neon.initializers import Constant, Gaussian, Uniform, Xavier
from neon.layers import Affine, BranchNode, Conv, Dropout, LRN, MergeBroadcast, Pooling, SingleOutputTree
from neon.transforms import Rectlin, Softmax
from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule
from neon.transforms import CrossEntropyMulti, TopKMisclassification, Misclassification
from neon.layers import GeneralizedCost
from neon.callbacks.callbacks import Callbacks
from neon.data import ImageLoader
from neon.models import Model

import numpy as np

from neon.backends import gen_backend

be = gen_backend(batch_size=32, backend='gpu')

init1 = Xavier(local=False)
initx = Xavier(local=True)
relu = Rectlin()
common = dict(activation=relu, init=initx)
pool_common = dict(fshape=3, padding=1, strides=2, op='max')


b1 = BranchNode(name="b1")
b2 = BranchNode(name="b2")

full_num = 1024

final_dropout_rate = 0.4

dropout_rate = 0.3

nclass = 1000

def inception(kvals):
    (p1, p2, p3, p4) = kvals

    commonp1 = dict(activation=relu, init=initx, padding=1)
    commonp2 = dict(activation=relu, init=initx, padding=2)
    poolmed = dict(fshape=3, padding=1, strides=1)


    branch1 = [Conv((1, 1, p1[0]), **common)]
    branch2 = [Conv((1, 1, p2[0]), **common),  Conv((3, 3, p2[1]), **commonp1)]
    branch3 = [Conv((1, 1, p3[0]), **common),  Conv((5, 5, p3[1]), **commonp2)]
    branch4 = [Pooling(op="max", **poolmed), Conv((1, 1, p4[0]), **common)]
    return MergeBroadcast(layers=[branch1, branch2, branch3, branch4], merge="depth")

depth = 5
ascale = 0.0001
bpower = 0.75

branch1 = [Conv((7, 7, 64), padding=3, strides=2, **common),
           Pooling(**pool_common),
           LRN(depth=depth, ascale=ascale, bpower=bpower),
           Conv((1, 1, 64), strides=2, **common),
           Conv((3, 3, 192), padding=1, strides=1, **common),
           LRN(depth=depth, ascale=ascale, bpower=bpower),
           Pooling(**pool_common),
           inception(kvals=[[64], [96, 128], [16, 32], [32]]),
           inception(kvals=[[128], [128, 192], [32, 96], [64]]),
           Pooling(**pool_common),
           inception(kvals=[[192], [96, 208], [16, 48], [64]]),
           b1,
           inception(kvals=[[160], [112, 224], [24, 64], [64]]),
           inception(kvals=[[128], [128, 256], [24, 64], [64]]),
           inception(kvals=[[112], [144, 288], [32, 64], [64]]),
           b2,
           inception(kvals=[[256], [160, 320], [32, 128], [128]]),
           Pooling(**pool_common),
           inception(kvals=[[256], [160, 320], [32, 128], [128]]),
           inception(kvals=[[384], [192, 384], [48, 128], [128]]),
           Pooling(fshape=4, strides=1, op='avg', name='hey'),
           Affine(nout=full_num, **common),
           Dropout(keep=final_dropout_rate),
           Affine(nout=nclass, activation=Softmax(), init=init1)]

branch2 = [b1,
           Pooling(5, strides=3, op='avg', name='hey2'),
           Conv((1, 1, 128), strides=1, **common),
           Affine(nout=full_num, **common),
           Affine(nout=full_num, **common),
           Dropout(keep=dropout_rate),
           Affine(nout=nclass, activation=Softmax(), init=init1)]

branch3 = [b2,
           Pooling(5, strides=3, op='avg', name='hey3'),
           Conv((1, 1, 128), strides=1, **common),
           Affine(nout=full_num, **common),
           Affine(nout=full_num, **common),
           Dropout(keep=dropout_rate),
           Affine(nout=nclass, activation=Softmax(), init=init1)]

layers_list = [branch1, branch2, branch3]

layers = SingleOutputTree(layers=layers_list, alphas=[1., 0.3, 0.3])
model = Model(layers=layers)

train = ImageLoader(set_name='train',
                   scale_range=(256, 256),
                   shuffle=True,    # but this is ignored if do_transforms is False - irritating
                   repo_dir='/path/to/data',
                   inner_size=256,
                   subset_pct=100,
                   do_transforms=False)

opt = GradientDescentMomentum(0.1, momentum_coef=0.9)

cost = GeneralizedCost(costfunc=CrossEntropyMulti())
from neon.layers import Multicost
cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()),
                        GeneralizedCost(costfunc=CrossEntropyMulti()),
                        GeneralizedCost(costfunc=CrossEntropyMulti())],
                        weights=[1, 0., 0.])
callbacks = Callbacks(model)

# train model
model.fit(train, optimizer=opt, num_epochs=100, cost=cost, callbacks=callbacks)

print('Misclassification error on test = %.1f%%' % (model.eval(test, metric=Misclassification())*100))
print('Misclassification error on train = %.1f%%' % (model.eval(train, metric=Misclassification())*100))
	from neon.initializers import Constant, Gaussian, Uniform, Xavier
	from neon.layers import Affine, BranchNode, Conv, Dropout, LRN, MergeBroadcast, Pooling, SingleOutputTree
	from neon.transforms import Rectlin, Softmax
	from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule
	from neon.transforms import CrossEntropyMulti, TopKMisclassification, Misclassification
	from neon.layers import GeneralizedCost
	from neon.callbacks.callbacks import Callbacks
	from neon.data import ImageLoader
	from neon.models import Model

	import numpy as np

	from neon.backends import gen_backend

	be = gen_backend(batch_size=32, backend='gpu')

	init1 = Xavier(local=False)
	initx = Xavier(local=True)
	relu = Rectlin()
	common = dict(activation=relu, init=initx)
	pool_common = dict(fshape=3, padding=1, strides=2, op='max')


	b1 = BranchNode(name="b1")
	b2 = BranchNode(name="b2")

	full_num = 1024

	final_dropout_rate = 0.4

	dropout_rate = 0.3

	nclass = 1000

	def inception(kvals):
	(p1, p2, p3, p4) = kvals

	commonp1 = dict(activation=relu, init=initx, padding=1)
	commonp2 = dict(activation=relu, init=initx, padding=2)
	poolmed = dict(fshape=3, padding=1, strides=1)


	branch1 = [Conv((1, 1, p1[0]), **common)]
	branch2 = [Conv((1, 1, p2[0]), common), Conv((3, 3, p2[1]), commonp1)]
	branch3 = [Conv((1, 1, p3[0]), common), Conv((5, 5, p3[1]), commonp2)]
	branch4 = [Pooling(op="max", poolmed), Conv((1, 1, p4[0]), common)]
	return MergeBroadcast(layers=[branch1, branch2, branch3, branch4], merge="depth")

	depth = 5
	ascale = 0.0001
	bpower = 0.75

	branch1 = [Conv((7, 7, 64), padding=3, strides=2, **common),
	Pooling(**pool_common),
	LRN(depth=depth, ascale=ascale, bpower=bpower),
	Conv((1, 1, 64), strides=2, **common),
	Conv((3, 3, 192), padding=1, strides=1, **common),
	LRN(depth=depth, ascale=ascale, bpower=bpower),
	Pooling(**pool_common),
	inception(kvals=[[64], [96, 128], [16, 32], [32]]),
	inception(kvals=[[128], [128, 192], [32, 96], [64]]),
	Pooling(**pool_common),
	inception(kvals=[[192], [96, 208], [16, 48], [64]]),
	b1,
	inception(kvals=[[160], [112, 224], [24, 64], [64]]),
	inception(kvals=[[128], [128, 256], [24, 64], [64]]),
	inception(kvals=[[112], [144, 288], [32, 64], [64]]),
	b2,
	inception(kvals=[[256], [160, 320], [32, 128], [128]]),
	Pooling(**pool_common),
	inception(kvals=[[256], [160, 320], [32, 128], [128]]),
	inception(kvals=[[384], [192, 384], [48, 128], [128]]),
	Pooling(fshape=4, strides=1, op='avg', name='hey'),
	Affine(nout=full_num, **common),
	Dropout(keep=final_dropout_rate),
	Affine(nout=nclass, activation=Softmax(), init=init1)]

	branch2 = [b1,
	Pooling(5, strides=3, op='avg', name='hey2'),
	Conv((1, 1, 128), strides=1, **common),
	Affine(nout=full_num, **common),
	Affine(nout=full_num, **common),
	Dropout(keep=dropout_rate),
	Affine(nout=nclass, activation=Softmax(), init=init1)]

	branch3 = [b2,
	Pooling(5, strides=3, op='avg', name='hey3'),
	Conv((1, 1, 128), strides=1, **common),
	Affine(nout=full_num, **common),
	Affine(nout=full_num, **common),
	Dropout(keep=dropout_rate),
	Affine(nout=nclass, activation=Softmax(), init=init1)]

	layers_list = [branch1, branch2, branch3]

	layers = SingleOutputTree(layers=layers_list, alphas=[1., 0.3, 0.3])
	model = Model(layers=layers)

	train = ImageLoader(set_name='train',
	scale_range=(256, 256),
	shuffle=True, # but this is ignored if do_transforms is False - irritating
	repo_dir='/path/to/data',
	inner_size=256,
	subset_pct=100,
	do_transforms=False)

	opt = GradientDescentMomentum(0.1, momentum_coef=0.9)

	cost = GeneralizedCost(costfunc=CrossEntropyMulti())
	from neon.layers import Multicost
	cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()),
	GeneralizedCost(costfunc=CrossEntropyMulti()),
	GeneralizedCost(costfunc=CrossEntropyMulti())],
	weights=[1, 0., 0.])
	callbacks = Callbacks(model)

	# train model
	model.fit(train, optimizer=opt, num_epochs=100, cost=cost, callbacks=callbacks)

	print('Misclassification error on test = %.1f%%' % (model.eval(test, metric=Misclassification())*100))
	print('Misclassification error on train = %.1f%%' % (model.eval(train, metric=Misclassification())*100))