Created
November 26, 2020 23:55
-
-
Save darsnack/28289e94e267b62ca04bb990567ae37a to your computer and use it in GitHub Desktop.
GPU Allocation Snippets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DiffEqFlux, OrdinaryDiffEq, Flux, Printf | |
using Flux.Losses: logitcrossentropy | |
using Flux.Data: DataLoader | |
using MLDatasets | |
using MLDataUtils: LabelEnc, convertlabel, stratifiedobs | |
using CUDA | |
CUDA.allowscalar(false) | |
function loadmnist(batchsize = bs, train_split = 0.9) | |
# Use MLDataUtils LabelEnc for natural onehot conversion | |
onehot(labels_raw) = convertlabel(LabelEnc.OneOfK, labels_raw, | |
LabelEnc.NativeLabels(collect(0:9))) | |
# Load MNIST | |
imgs, labels_raw = MNIST.traindata(); | |
# Process images into (H,W,C,BS) batches | |
x_data = Float32.(reshape(imgs, size(imgs,1), size(imgs,2), 1, size(imgs,3))) | |
y_data = onehot(labels_raw) | |
(x_train, y_train), (x_test, y_test) = stratifiedobs((x_data, y_data), | |
p = train_split) | |
return ( | |
# Use Flux's DataLoader to automatically minibatch and shuffle the data | |
DataLoader((x_train, Float32.(y_train)); batchsize = batchsize, | |
shuffle = true), | |
# Don't shuffle the test data | |
DataLoader((x_test, Float32.(y_test)); batchsize = batchsize, | |
shuffle = false) | |
) | |
end | |
const bs = 128 | |
const train_split = 0.9 | |
train_dataloader, test_dataloader = loadmnist(bs, train_split); | |
down = Chain( | |
Conv((3,3),1=>64,relu), GroupNorm(64,64), | |
Conv((4,4),64=>64,relu,stride=(2,2),pad=(1,1)), GroupNorm(64,64), | |
Conv((4,4),64=>64,stride=(2,2),pad=(1,1)), | |
) |> gpu | |
dudt = Chain( | |
Conv((3,3),64=>64,relu,pad=(1,1)), | |
Conv((3,3),64=>64,relu,pad=(1,1)) | |
) |> gpu | |
fc = Chain(GroupNorm(64,64), x -> relu.(x), | |
# fc = Chain( # x->relu.(x), | |
MeanPool((6,6)), | |
x -> reshape(x, 64,:), | |
Dense(64,10) | |
) |> gpu | |
nn_ode = NeuralODE(dudt, (0.f0, 1.f0), Tsit5(), | |
save_everystep = false, | |
reltol = 1e-3, abstol = 1e-3, | |
save_start = false) |> gpu | |
function DiffEqArray_to_Array(x) | |
xarr = gpu(x) | |
return xarr[:,:,:,:,1] | |
end | |
model = Chain( | |
down, # (28,28,1,BS) -> (6,6,64,BS) | |
nn_ode, # (6,6,64,BS) -> (6,6,64,BS) | |
DiffEqArray_to_Array, | |
x -> reshape(x, 6,6, 64, :), | |
fc # (6,6,64,BS) -> (10, BS) | |
) | |
img, lab = gpu(train_dataloader.data[1][:, :, :, 1:1]), gpu(train_dataloader.data[2][:, 1:1]) | |
x_d = down(img) | |
x_m = model(img) | |
classify(x) = argmax.(eachcol(x)) | |
function accuracy(model, data; n_batches = 100) | |
total_correct = 0 | |
total = 0 | |
for (i, (x, y)) in enumerate(data) | |
# Only evaluate accuracy for n_batches | |
i > n_batches && break | |
target_class = classify(cpu(y)) | |
predicted_class = classify(cpu(model(x))) | |
total_correct += sum(target_class .== predicted_class) | |
total += length(target_class) | |
end | |
return total_correct / total | |
end | |
loss(x, y) = logitcrossentropy(model(x), y) | |
# loss(x, y) = Flux.mse(model(x), y) | |
# burn in loss | |
loss(img, lab) | |
# burn in accuracy | |
accuracy(model, CuIterator(train_dataloader)) | |
opt = ADAM(0.001) | |
iter = 0 | |
function cb() | |
global iter += 1 | |
# Monitor that the weights do infact update | |
# Every 10 training iterations show accuracy | |
if iter % 10 == 1 | |
# train_accuracy = accuracy(model, CuIterator(train_dataloader)) * 100 | |
test_accuracy = accuracy(model, CuIterator(test_dataloader); | |
n_batches = length(test_dataloader)) * 100 | |
@printf("Iter: %3d || Test Accuracy: %2.3f\n", | |
iter, test_accuracy) | |
GC.gc() | |
CUDA.reclaim() | |
# CUDA.memory_status() | |
end | |
end | |
@time Flux.train!(loss, Flux.params(down, nn_ode.p, fc), CuIterator(train_dataloader), opt, cb=cb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DiffEqFlux, OrdinaryDiffEq, Flux, Printf | |
using Flux.Losses: logitcrossentropy | |
using Flux.Data: DataLoader | |
using MLDatasets | |
using MLDataUtils: LabelEnc, convertlabel, stratifiedobs | |
using CUDA | |
CUDA.allowscalar(false) | |
function loadmnist(batchsize = bs, train_split = 0.9) | |
# Use MLDataUtils LabelEnc for natural onehot conversion | |
onehot(labels_raw) = convertlabel(LabelEnc.OneOfK, labels_raw, | |
LabelEnc.NativeLabels(collect(0:9))) | |
# Load MNIST | |
imgs, labels_raw = MNIST.traindata(); | |
# Process images into (H,W,C,BS) batches | |
x_data = Float32.(reshape(imgs, size(imgs,1), size(imgs,2), 1, size(imgs,3))) | |
y_data = onehot(labels_raw) | |
(x_train, y_train), (x_test, y_test) = stratifiedobs((x_data, y_data), | |
p = train_split) | |
return ( | |
# Use Flux's DataLoader to automatically minibatch and shuffle the data | |
DataLoader((x_train, Float32.(y_train)); batchsize = batchsize, | |
shuffle = true), | |
# Don't shuffle the test data | |
DataLoader((x_test, Float32.(y_test)); batchsize = batchsize, | |
shuffle = false) | |
) | |
end | |
const bs = 128 | |
const train_split = 0.9 | |
train_dataloader, test_dataloader = loadmnist(bs, train_split); | |
down = Chain( | |
Conv((3,3),1=>64,relu), GroupNorm(64,64), | |
Conv((4,4),64=>64,relu,stride=(2,2),pad=(1,1)), GroupNorm(64,64), | |
Conv((4,4),64=>64,stride=(2,2),pad=(1,1)), | |
) |> gpu | |
dudt = Chain( | |
Conv((3,3),64=>64,relu,pad=(1,1)), | |
Conv((3,3),64=>64,relu,pad=(1,1)) | |
) |> gpu | |
fc = Chain(GroupNorm(64,64), x -> relu.(x), | |
# fc = Chain( # x->relu.(x), | |
MeanPool((6,6)), | |
x -> reshape(x, 64,:), | |
Dense(64,10) | |
) |> gpu | |
nn_ode = NeuralODE(dudt, (0.f0, 1.f0), Tsit5(), | |
save_everystep = false, | |
reltol = 1e-3, abstol = 1e-3, | |
save_start = false) |> gpu | |
function DiffEqArray_to_Array(x) | |
xarr = gpu(x) | |
return xarr[:,:,:,:,1] | |
end | |
model = Chain( | |
down, # (28,28,1,BS) -> (6,6,64,BS) | |
nn_ode, # (6,6,64,BS) -> (6,6,64,BS) | |
DiffEqArray_to_Array, | |
x -> reshape(x, 6,6, 64, :), | |
fc # (6,6,64,BS) -> (10, BS) | |
) | |
img, lab = gpu(train_dataloader.data[1][:, :, :, 1:1]), gpu(train_dataloader.data[2][:, 1:1]) | |
x_d = down(img) | |
x_m = model(img) | |
classify(x) = argmax.(eachcol(x)) | |
function accuracy(model, data; n_batches = 100) | |
total_correct = 0 | |
total = 0 | |
for (i, (x, y)) in enumerate(data) | |
# Only evaluate accuracy for n_batches | |
i > n_batches && break | |
target_class = classify(cpu(y)) | |
predicted_class = classify(cpu(model(x))) | |
total_correct += sum(target_class .== predicted_class) | |
total += length(target_class) | |
end | |
return total_correct / total | |
end | |
loss(x, y) = logitcrossentropy(model(x), y) | |
# loss(x, y) = Flux.mse(model(x), y) | |
# burn in loss | |
loss(img, lab) | |
# burn in accuracy | |
accuracy(model, CuIterator(train_dataloader)) | |
opt = ADAM(0.001) | |
iter = 0 | |
function cb() | |
global iter += 1 | |
# Monitor that the weights do infact update | |
# Every 10 training iterations show accuracy | |
if iter % 10 == 1 | |
# train_accuracy = accuracy(model, CuIterator(train_dataloader)) * 100 | |
test_accuracy = accuracy(model, CuIterator(test_dataloader); | |
n_batches = length(test_dataloader)) * 100 | |
@printf("Iter: %3d || Test Accuracy: %2.3f\n", | |
iter, test_accuracy) | |
# GC.gc() | |
# CUDA.reclaim() | |
# CUDA.memory_status() | |
end | |
end | |
@time Flux.train!(loss, Flux.params(down, nn_ode.p, fc), CuIterator(train_dataloader), opt, cb=cb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DiffEqFlux, OrdinaryDiffEq, Flux, NNlib, Printf | |
using Flux: logitcrossentropy | |
using DataLoaders | |
using MLDatasets | |
using MLDataPattern | |
using LearnBase | |
using MLDataUtils: LabelEnc, convertlabel | |
using CUDA | |
CUDA.allowscalar(false) | |
# Use MLDataUtils LabelEnc for natural onehot conversion | |
array_wrap(x::AbstractArray) = x | |
array_wrap(x::Number) = [x] | |
onehot(labels_raw) = convertlabel(LabelEnc.OneOfK, labels_raw, collect(0:9)) | |
# implement a MLDataPattern compatible interface | |
# long-term we will update MLDatasets to do this automatically | |
struct MNISTDataset{T, S} | |
imgs::T | |
labels::S | |
end | |
MNISTDataset() = MNISTDataset(MNIST.traindata(Float32)...) | |
LearnBase.nobs(d::MNISTDataset) = size(d.imgs, 3) | |
function LearnBase.getobs(d::MNISTDataset, idx) | |
imgs, labels = d.imgs[:, :, idx], d.labels[idx] | |
return Flux.unsqueeze(imgs, 3), Flux.squeezebatch(onehot(array_wrap(labels))) | |
end | |
# loadmnist is now returning lazy loaders | |
# the data is only read when the getobs call occurs | |
# this could be extended to only loading data from disk | |
function loadmnist(batchsize = bs, train_split = 0.9) | |
dataset = MNISTDataset() # reference our nobs and getobs | |
traindata, valdata = MLDataPattern.splitobs(dataset; at = train_split) # split training data | |
return ( | |
# Use DataLoaders.DataLoader instead | |
DataLoader(shuffleobs(traindata), batchsize), | |
# Don't shuffle the test data | |
DataLoader(valdata, batchsize) | |
) | |
end | |
# Main | |
const bs = 128 | |
const train_split = 0.9 | |
train_dataloader, test_dataloader = loadmnist(bs, train_split); | |
down = Chain(Conv((3,3),1=>64,relu,stride=1), GroupNorm(64,64), | |
Conv((4,4),64=>64,relu,stride=2,pad=1), GroupNorm(64,64), | |
Conv((4,4),64=>64,stride=2,pad=1)) |> gpu; | |
dudt = Chain(Conv((3,3),64=>64,relu,stride=1,pad=1), | |
Conv((3,3),64=>64,relu,stride=1,pad=1)) |> gpu; | |
fc = Chain(GroupNorm(64,64), | |
x->relu.(x), | |
MeanPool((6,6)), | |
flatten, | |
Dense(64,10)) |> gpu; | |
nn_ode = NeuralODE(dudt, (0.f0, 1.f0), Tsit5(), | |
save_everystep = false, | |
reltol = 1e-3, abstol = 1e-3, | |
save_start = false) |> gpu; | |
diffeqsol2arr(x) = Flux.squeezebatch(gpu(x)) | |
# Build our over-all model topology | |
model = Chain(down, #(28,28,1,BS) -> (6,6,64,BS) | |
nn_ode, #(6,6,64,BS) -> (6,6,64,BS) | |
diffeqsol2arr, | |
fc) #(6,6,64,BS) -> (10, BS) | |
# To understand the intermediate NN-ODE layer, we can examine it's dimensionality | |
img, lab = first(train_dataloader) .|> gpu; | |
x_d = down(img) | |
# We can see that we can compute the forward pass through the NN topology | |
# featuring an NNODE layer. | |
x_m = model(img) | |
classify(x) = argmax.(eachcol(x)) | |
function accuracy(model, data; n_batches = 100) | |
total_correct = 0 | |
total = 0 | |
for (i, (x, y)) in enumerate(data) | |
# Only evaluate accuracy for n_batches | |
i > n_batches && break | |
target_class = classify(cpu(y)) | |
predicted_class = classify(cpu(model(x))) | |
total_correct += sum(target_class .== predicted_class) | |
total += length(target_class) | |
end | |
return total_correct / total | |
end | |
loss(x, y) = logitcrossentropy(model(x), y) | |
# burn in loss | |
# do this before accuracy | |
# iterating train_dataloader will clear the buffers of img, lab | |
loss(img, lab) | |
# burn in accuracy | |
accuracy(model, CuIterator(train_dataloader)) | |
opt = ADAM(1e-3) | |
iter = 0 | |
function cb() | |
global iter += 1 | |
# Monitor that the weights do infact update | |
# Every 10 training iterations show accuracy | |
if iter % 10 == 1 | |
train_accuracy = accuracy(model, CuIterator(train_dataloader)) * 100 | |
test_accuracy = accuracy(model, CuIterator(test_dataloader); | |
n_batches = nobs(test_dataloader.data)) * 100 | |
@printf("Iter: %3d || Train Accuracy: %2.3f || Test Accuracy: %2.3f\n", | |
iter, train_accuracy, test_accuracy) | |
# GC.gc() | |
# CUDA.reclaim() | |
# CUDA.memory_status() | |
end | |
end | |
# Train the NN-ODE and monitor the loss and weights. | |
@time Flux.train!(loss, Flux.params(down, nn_ode.p, fc), CuIterator(train_dataloader), opt, cb=cb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DiffEqFlux, OrdinaryDiffEq, Flux, Printf | |
using Flux.Losses: logitcrossentropy | |
using Flux.Data: DataLoader | |
using MLDatasets | |
using MLDataUtils: LabelEnc, convertlabel, stratifiedobs | |
using CUDA | |
CUDA.allowscalar(false) | |
function loadmnist(batchsize = bs, train_split = 0.9) | |
# Use MLDataUtils LabelEnc for natural onehot conversion | |
onehot(labels_raw) = convertlabel(LabelEnc.OneOfK, labels_raw, | |
LabelEnc.NativeLabels(collect(0:9))) | |
# Load MNIST | |
imgs, labels_raw = MNIST.traindata(); | |
# Process images into (H,W,C,BS) batches | |
x_data = Float32.(reshape(imgs, size(imgs,1), size(imgs,2), 1, size(imgs,3))) | |
y_data = onehot(labels_raw) | |
(x_train, y_train), (x_test, y_test) = stratifiedobs((x_data, y_data), | |
p = train_split) | |
return ( | |
# Use Flux's DataLoader to automatically minibatch and shuffle the data | |
DataLoader(gpu.(collect.((x_train, y_train))); batchsize = batchsize, | |
shuffle = true), | |
# Don't shuffle the test data | |
DataLoader(gpu.(collect.((x_test, y_test))); batchsize = batchsize, | |
shuffle = false) | |
) | |
end | |
const bs = 128 | |
const train_split = 0.9 | |
train_dataloader, test_dataloader = loadmnist(bs, train_split); | |
down = Chain( | |
Conv((3,3),1=>64,relu), GroupNorm(64,64), | |
Conv((4,4),64=>64,relu,stride=(2,2),pad=(1,1)), GroupNorm(64,64), | |
Conv((4,4),64=>64,stride=(2,2),pad=(1,1)), | |
) |> gpu | |
dudt = Chain( | |
Conv((3,3),64=>64,relu,pad=(1,1)), | |
Conv((3,3),64=>64,relu,pad=(1,1)) | |
) |> gpu | |
fc = Chain(GroupNorm(64,64), x -> relu.(x), | |
# fc = Chain( # x->relu.(x), | |
MeanPool((6,6)), | |
x -> reshape(x, 64,:), | |
Dense(64,10) | |
) |> gpu | |
nn_ode = NeuralODE(dudt, (0.f0, 1.f0), Tsit5(), | |
save_everystep = false, | |
reltol = 1e-3, abstol = 1e-3, | |
save_start = false) |> gpu | |
function DiffEqArray_to_Array(x) | |
xarr = gpu(x) | |
return xarr[:,:,:,:,1] | |
end | |
model = Chain( | |
down, # (28,28,1,BS) -> (6,6,64,BS) | |
nn_ode, # (6,6,64,BS) -> (6,6,64,BS) | |
DiffEqArray_to_Array, | |
x -> reshape(x, 6,6, 64, :), | |
fc # (6,6,64,BS) -> (10, BS) | |
) | |
img, lab = train_dataloader.data[1][:, :, :, 1:1], train_dataloader.data[2][:, 1:1] | |
x_d = down(img) | |
x_m = model(img) | |
classify(x) = argmax.(eachcol(x)) | |
function accuracy(model, data; n_batches = 100) | |
total_correct = 0 | |
total = 0 | |
for (i, (x, y)) in enumerate(data) | |
# Only evaluate accuracy for n_batches | |
i > n_batches && break | |
target_class = classify(cpu(y)) | |
predicted_class = classify(cpu(model(x))) | |
total_correct += sum(target_class .== predicted_class) | |
total += length(target_class) | |
end | |
return total_correct / total | |
end | |
loss(x, y) = logitcrossentropy(model(x), y) | |
# loss(x, y) = Flux.mse(model(x), y) | |
# burn in loss | |
loss(img, lab) | |
# burn in accuracy | |
accuracy(model, train_dataloader) | |
opt = ADAM(0.001) | |
iter = 0 | |
function cb() | |
global iter += 1 | |
# Monitor that the weights do infact update | |
# Every 10 training iterations show accuracy | |
if iter % 10 == 1 | |
# train_accuracy = accuracy(model, CuIterator(train_dataloader)) * 100 | |
test_accuracy = accuracy(model, test_dataloader; | |
n_batches = length(test_dataloader)) * 100 | |
@printf("Iter: %3d || Test Accuracy: %2.3f\n", | |
iter, test_accuracy) | |
# GC.gc() | |
# CUDA.reclaim() | |
# CUDA.memory_status() | |
end | |
end | |
@time Flux.train!(loss, Flux.params(down, nn_ode.p, fc), train_dataloader, opt, cb=cb) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment