Last active
April 10, 2017 09:07
-
-
Save nudles/0e84a3f6e191a863c9926ae1823797d7 to your computer and use it in GitHub Desktop.
run alexnet over cifar10 in parallel with a GPU device and a CPU device. replace the original alexnet-parallel.cc in the example folder; compile singa with "cmake -DENABLE_TEST=ON -DUSE_CUDA=ON _DUSE_PYTHON=OFF ..""
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/************************************************************ | |
* | |
* Licensed to the Apache Software Foundation (ASF) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. The ASF licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, | |
* software distributed under the License is distributed on an | |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
* KIND, either express or implied. See the License for the | |
* specific language governing permissions and limitations | |
* under the License. | |
* | |
*************************************************************/ | |
#include "cifar10.h" | |
#include "singa/model/feed_forward_net.h" | |
#include "singa/model/optimizer.h" | |
#include "singa/model/updater.h" | |
#include "singa/model/initializer.h" | |
#include "singa/model/metric.h" | |
#include "singa/utils/channel.h" | |
#include "singa/utils/string.h" | |
#include "singa/core/memory.h" | |
#include <thread> | |
#include <memory> | |
namespace singa { | |
LayerConf GenConvConf(string engine, string name, int nb_filter, int kernel, int stride, | |
int pad, float std) { | |
LayerConf conf; | |
conf.set_name(name); | |
conf.set_type(engine + "_convolution"); | |
ConvolutionConf *conv = conf.mutable_convolution_conf(); | |
conv->set_num_output(nb_filter); | |
conv->add_kernel_size(kernel); | |
conv->add_stride(stride); | |
conv->add_pad(pad); | |
conv->set_bias_term(true); | |
ParamSpec *wspec = conf.add_param(); | |
wspec->set_name(name + "_weight"); | |
auto wfill = wspec->mutable_filler(); | |
wfill->set_type("Gaussian"); | |
wfill->set_std(std); | |
ParamSpec *bspec = conf.add_param(); | |
bspec->set_name(name + "_bias"); | |
bspec->set_lr_mult(2); | |
// bspec->set_decay_mult(0); | |
return conf; | |
} | |
LayerConf GenPoolingConf(string engine, string name, bool max_pool, int kernel, int stride, | |
int pad) { | |
LayerConf conf; | |
conf.set_name(name); | |
conf.set_type(engine + "_pooling"); | |
PoolingConf *pool = conf.mutable_pooling_conf(); | |
pool->set_kernel_size(kernel); | |
pool->set_stride(stride); | |
pool->set_pad(pad); | |
if (!max_pool) pool->set_pool(PoolingConf_PoolMethod_AVE); | |
return conf; | |
} | |
LayerConf GenReLUConf(string engine, string name) { | |
LayerConf conf; | |
conf.set_name(name); | |
conf.set_type(engine + "_relu"); | |
return conf; | |
} | |
LayerConf GenDenseConf(string name, int num_output, float std, float wd) { | |
LayerConf conf; | |
conf.set_name(name); | |
conf.set_type("singa_dense"); | |
DenseConf *dense = conf.mutable_dense_conf(); | |
dense->set_num_output(num_output); | |
ParamSpec *wspec = conf.add_param(); | |
wspec->set_name(name + "_weight"); | |
wspec->set_decay_mult(wd); | |
auto wfill = wspec->mutable_filler(); | |
wfill->set_type("Gaussian"); | |
wfill->set_std(std); | |
ParamSpec *bspec = conf.add_param(); | |
bspec->set_name(name + "_bias"); | |
bspec->set_lr_mult(2); | |
bspec->set_decay_mult(0); | |
return conf; | |
} | |
LayerConf GenLRNConf(string engine, string name) { | |
LayerConf conf; | |
conf.set_name(name); | |
conf.set_type(engine + "_lrn"); | |
LRNConf *lrn = conf.mutable_lrn_conf(); | |
lrn->set_local_size(3); | |
lrn->set_alpha(5e-05); | |
lrn->set_beta(0.75); | |
return conf; | |
} | |
LayerConf GenFlattenConf(string name) { | |
LayerConf conf; | |
conf.set_name(name); | |
conf.set_type("singa_flatten"); | |
return conf; | |
} | |
FeedForwardNet CreateNet(string engine) { | |
FeedForwardNet net; | |
Shape s{3, 32, 32}; | |
net.Add(GenConvConf(engine, "conv1", 32, 5, 1, 2, 0.0001), &s); | |
net.Add(GenPoolingConf(engine, "pool1", true, 3, 2, 1)); | |
net.Add(GenReLUConf(engine, "relu1")); | |
net.Add(GenLRNConf(engine, "lrn1")); | |
net.Add(GenConvConf(engine, "conv2", 32, 5, 1, 2, 0.01)); | |
net.Add(GenReLUConf(engine, "relu2")); | |
net.Add(GenPoolingConf(engine, "pool2", false, 3, 2, 1)); | |
net.Add(GenLRNConf(engine, "lrn2")); | |
net.Add(GenConvConf(engine, "conv3", 64, 5, 1, 2, 0.01)); | |
net.Add(GenReLUConf(engine, "relu3")); | |
net.Add(GenPoolingConf(engine, "pool3", false, 3, 2, 1)); | |
net.Add(GenFlattenConf("flat")); | |
net.Add(GenDenseConf("ip", 10, 0.01, 250)); | |
return net; | |
} | |
void Train(float lr, int num_epoch, string data_dir) { | |
Cifar10 data(data_dir); | |
Tensor train_x, train_y, test_x, test_y; | |
Tensor train_x_1, train_x_2, train_y_1, train_y_2; | |
{ | |
auto train = data.ReadTrainData(); | |
size_t nsamples = train.first.shape(0); | |
auto mtrain = | |
Reshape(train.first, Shape{nsamples, train.first.Size() / nsamples}); | |
const Tensor &mean = Average(mtrain, 0); | |
SubRow(mean, &mtrain); | |
train_x = Reshape(mtrain, train.first.shape()); | |
train_y = train.second; | |
LOG(INFO) << "Slicing training data..."; | |
train_x_1.Reshape(Shape{nsamples / 2, train.first.shape(1), | |
train.first.shape(2), train.first.shape(3)}); | |
LOG(INFO) << "Copying first data slice..."; | |
CopyDataToFrom(&train_x_1, train_x, train_x.Size() / 2); | |
train_x_2.Reshape(Shape{nsamples / 2, train.first.shape(1), | |
train.first.shape(2), train.first.shape(3)}); | |
LOG(INFO) << "Copying second data slice..."; | |
CopyDataToFrom(&train_x_2, train_x, train_x.Size() / 2, 0, | |
train_x.Size() / 2); | |
train_y_1.Reshape(Shape{nsamples / 2}); | |
train_y_1.AsType(kInt); | |
LOG(INFO) << "Copying first label slice..."; | |
CopyDataToFrom(&train_y_1, train_y, train_y.Size() / 2); | |
train_y_2.Reshape(Shape{nsamples / 2}); | |
train_y_2.AsType(kInt); | |
LOG(INFO) << "Copying second label slice..."; | |
CopyDataToFrom(&train_y_2, train_y, train_y.Size() / 2, 0, | |
train_y.Size() / 2); | |
auto test = data.ReadTestData(); | |
nsamples = test.first.shape(0); | |
auto mtest = | |
Reshape(test.first, Shape{nsamples, test.first.Size() / nsamples}); | |
SubRow(mean, &mtest); | |
test_x = Reshape(mtest, test.first.shape()); | |
test_y = test.second; | |
} | |
CHECK_EQ(train_x.shape(0), train_y.shape(0)); | |
CHECK_EQ(test_x.shape(0), test_y.shape(0)); | |
LOG(INFO) << "Total Training samples = " << train_y.shape(0) | |
<< ", Total Test samples = " << test_y.shape(0); | |
CHECK_EQ(train_x_1.shape(0), train_y_1.shape(0)); | |
LOG(INFO) << "On net 1, Training samples = " << train_y_1.shape(0) | |
<< ", Test samples = " << test_y.shape(0); | |
CHECK_EQ(train_x_2.shape(0), train_y_2.shape(0)); | |
LOG(INFO) << "On net 2, Training samples = " << train_y_2.shape(0); | |
auto net_1 = CreateNet("cudnn"); | |
auto net_2 = CreateNet("singacpp"); | |
SGD sgd; | |
OptimizerConf opt_conf; | |
opt_conf.set_momentum(0.9); | |
auto reg = opt_conf.mutable_regularizer(); | |
reg->set_coefficient(0.004); | |
sgd.Setup(opt_conf); | |
sgd.SetLearningRateGenerator([lr](int step) { | |
if (step <= 120) | |
return 0.001; | |
else if (step <= 130) | |
return 0.0001; | |
else | |
return 0.00001; | |
}); | |
SoftmaxCrossEntropy loss_1, loss_2; | |
Accuracy acc_1, acc_2; | |
/// Create updater aggregating gradient on CPU | |
std::shared_ptr<Updater> updater = std::make_shared<LocalUpdater>(2, &sgd); | |
/// Only need to register parameter once. | |
net_1.Compile(true, true, updater, &loss_1, &acc_1); | |
net_2.Compile(true, false, updater, &loss_2, &acc_1); | |
auto dev = std::make_shared<CudaGPU>(); | |
net_1.ToDevice(dev); | |
train_x_1.ToDevice(dev); | |
train_y_1.ToDevice(dev); | |
test_x.ToDevice(dev); | |
test_y.ToDevice(dev); | |
// net.Train(100, num_epoch, train_x, train_y, test_x, test_y); | |
LOG(INFO) << "Launching thread..."; | |
std::thread t1 = | |
net_1.TrainThread(50, num_epoch, train_x_1, train_y_1, test_x, test_y); | |
std::thread t2 = net_2.TrainThread(50, num_epoch, train_x_2, train_y_2); | |
t1.join(); | |
t2.join(); | |
} | |
} | |
int main(int argc, char **argv) { | |
singa::InitChannel(nullptr); | |
int pos = singa::ArgPos(argc, argv, "-epoch"); | |
int nEpoch = 1; | |
if (pos != -1) nEpoch = atoi(argv[pos + 1]); | |
pos = singa::ArgPos(argc, argv, "-lr"); | |
float lr = 0.001; | |
if (pos != -1) lr = atof(argv[pos + 1]); | |
pos = singa::ArgPos(argc, argv, "-data"); | |
string data = "cifar-10-batches-bin"; | |
if (pos != -1) data = argv[pos + 1]; | |
LOG(INFO) << "Start training"; | |
singa::Train(lr, nEpoch, data); | |
LOG(INFO) << "End training"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment