Created
March 10, 2022 09:35
-
-
Save skypenguins/65e0b950fe694c040a23e24041b73c11 to your computer and use it in GitHub Desktop.
Flux.jlのチュートリアルをA100で動かす
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "cc520fd5", | |
"metadata": {}, | |
"source": [ | |
"# Running Julia + Flux.jl on A100" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "0da985a9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Julia Version 1.6.0\n", | |
"Commit f9720dc2eb (2021-03-24 12:55 UTC)\n", | |
"Platform Info:\n", | |
" OS: Linux (x86_64-pc-linux-gnu)\n", | |
" CPU: Intel(R) Xeon(R) Gold 5320 CPU @ 2.20GHz\n", | |
" WORD_SIZE: 64\n", | |
" LIBM: libopenlibm\n", | |
" LLVM: libLLVM-11.0.1 (ORCJIT, icelake-server)\n" | |
] | |
} | |
], | |
"source": [ | |
"versioninfo()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "2a6d68a2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Thu Mar 10 18:30:31 2022 \n", | |
"+-----------------------------------------------------------------------------+\n", | |
"| NVIDIA-SMI 510.39.01 Driver Version: 510.39.01 CUDA Version: 11.6 |\n", | |
"|-------------------------------+----------------------+----------------------+\n", | |
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", | |
"| | | MIG M. |\n", | |
"|===============================+======================+======================|\n", | |
"| 0 NVIDIA A100-PCI... On | 00000000:17:00.0 Off | 0 |\n", | |
"| N/A 36C P0 39W / 250W | 0MiB / 40960MiB | 0% Default |\n", | |
"| | | Disabled |\n", | |
"+-------------------------------+----------------------+----------------------+\n", | |
"| 1 NVIDIA A100-PCI... On | 00000000:CA:00.0 Off | 0 |\n", | |
"| N/A 35C P0 37W / 250W | 0MiB / 40960MiB | 0% Default |\n", | |
"| | | Disabled |\n", | |
"+-------------------------------+----------------------+----------------------+\n", | |
" \n", | |
"+-----------------------------------------------------------------------------+\n", | |
"| Processes: |\n", | |
"| GPU GI CI PID Type Process name GPU Memory |\n", | |
"| ID ID Usage |\n", | |
"|=============================================================================|\n", | |
"| No running processes found |\n", | |
"+-----------------------------------------------------------------------------+\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"Process(`\u001b[4mnvidia-smi\u001b[24m`, ProcessExited(0))" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"run(`nvidia-smi`)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "25e3dfc0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" total used free shared buff/cache available\n", | |
"Mem: 503Gi 3.0Gi 493Gi 2.0Mi 7.2Gi 497Gi\n", | |
"Swap: 7.4Gi 0B 7.4Gi\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"Process(`\u001b[4mfree\u001b[24m \u001b[4m-h\u001b[24m`, ProcessExited(0))" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"run(`free -h`)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "bb930569", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"using Flux, Statistics\n", | |
"using Flux.Data: DataLoader\n", | |
"using Flux: onehotbatch, onecold, @epochs\n", | |
"using Flux.Losses: logitcrossentropy\n", | |
"using Base: @kwdef\n", | |
"using CUDA\n", | |
"using MLDatasets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "acf7e2c1", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CUDA toolkit 11.6, artifact installation\n", | |
"NVIDIA driver 510.39.1, for CUDA 11.6\n", | |
"CUDA driver 11.6\n", | |
"\n", | |
"Libraries: \n", | |
"- CUBLAS: 11.8.1\n", | |
"- CURAND: 10.2.9\n", | |
"- CUFFT: 10.7.0\n", | |
"- CUSOLVER: 11.3.2\n", | |
"- CUSPARSE: 11.7.1\n", | |
"- CUPTI: 16.0.0\n", | |
"- NVML: 11.0.0+510.39.1\n", | |
"- CUDNN: 8.30.2 (for CUDA 11.5.0)\n", | |
"- CUTENSOR: 1.4.0 (for CUDA 11.5.0)\n", | |
"\n", | |
"Toolchain:\n", | |
"- Julia: 1.6.0\n", | |
"- LLVM: 11.0.1\n", | |
"- PTX ISA support: 3.2, 4.0, 4.1, 4.2, 4.3, 5.0, 6.0, 6.1, 6.3, 6.4, 6.5, 7.0\n", | |
"- Device capability support: sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80\n", | |
"\n", | |
"2 devices:\n", | |
" 0: NVIDIA A100-PCIE-40GB (sm_80, 39.406 GiB / 40.000 GiB available)\n", | |
" 1: NVIDIA A100-PCIE-40GB (sm_80, 39.406 GiB / 40.000 GiB available)\n" | |
] | |
} | |
], | |
"source": [ | |
"CUDA.versioninfo()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "10ff0e98", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"getdata (generic function with 1 method)" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"function getdata(args, device)\n", | |
" ENV[\"DATADEPS_ALWAYS_ACCEPT\"] = \"true\"\n", | |
"\n", | |
" # Loading Dataset\n", | |
" xtrain, ytrain = MLDatasets.MNIST.traindata(Float32)\n", | |
" xtest, ytest = MLDatasets.MNIST.testdata(Float32)\n", | |
" \n", | |
" # Reshape Data in order to flatten each image into a linear array\n", | |
" xtrain = Flux.flatten(xtrain)\n", | |
" xtest = Flux.flatten(xtest)\n", | |
"\n", | |
" # One-hot-encode the labels\n", | |
" ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)\n", | |
"\n", | |
" # Create DataLoaders (mini-batch iterators)\n", | |
" train_loader = DataLoader((xtrain, ytrain), batchsize=args.batchsize, shuffle=true)\n", | |
" test_loader = DataLoader((xtest, ytest), batchsize=args.batchsize)\n", | |
"\n", | |
" return train_loader, test_loader\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "eacb500f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"build_model (generic function with 1 method)" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"function build_model(; imgsize=(28,28,1), nclasses=10)\n", | |
" return Chain( Dense(prod(imgsize), 32, relu),\n", | |
" Dense(32, nclasses))\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "c5cd06a4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"loss_and_accuracy (generic function with 1 method)" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"function loss_and_accuracy(data_loader, model, device)\n", | |
" acc = 0\n", | |
" ls = 0.0f0\n", | |
" num = 0\n", | |
" for (x, y) in data_loader\n", | |
" x, y = device(x), device(y)\n", | |
" ŷ = model(x)\n", | |
" ls += logitcrossentropy(ŷ, y, agg=sum)\n", | |
" acc += sum(onecold(ŷ) .== onecold(y))\n", | |
" num += size(x)[end]\n", | |
" end\n", | |
" return ls / num, acc / num\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "d2ebc80d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Args" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"@kwdef mutable struct Args\n", | |
" η::Float64 = 3e-4 # learning rate\n", | |
" batchsize::Int = 256 # batch size\n", | |
" epochs::Int = 10 # number of epochs\n", | |
" use_cuda::Bool = true # use gpu (if cuda available)\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "062f1bcc", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"train (generic function with 1 method)" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"function train(; kws...)\n", | |
" args = Args(; kws...) # collect options in a struct for convenience\n", | |
"\n", | |
" if CUDA.functional() && args.use_cuda\n", | |
" @info \"Training on CUDA GPU\"\n", | |
" CUDA.allowscalar(false)\n", | |
" device = gpu\n", | |
" else\n", | |
" @info \"Training on CPU\"\n", | |
" device = cpu\n", | |
" end\n", | |
"\n", | |
" # Create test and train dataloaders\n", | |
" train_loader, test_loader = getdata(args, device)\n", | |
"\n", | |
" # Construct model\n", | |
" model = build_model() |> device\n", | |
" ps = Flux.params(model) # model's trainable parameters\n", | |
" \n", | |
" ## Optimizer\n", | |
" opt = ADAM(args.η)\n", | |
" \n", | |
" ## Training\n", | |
" for epoch in 1:args.epochs\n", | |
" for (x, y) in train_loader\n", | |
" x, y = device(x), device(y) # transfer data to device\n", | |
" gs = gradient(() -> logitcrossentropy(model(x), y), ps) # compute gradient\n", | |
" Flux.Optimise.update!(opt, ps, gs) # update parameters\n", | |
" end\n", | |
" \n", | |
" # Report on train and test\n", | |
" train_loss, train_acc = loss_and_accuracy(train_loader, model, device)\n", | |
" test_loss, test_acc = loss_and_accuracy(test_loader, model, device)\n", | |
" println(\"Epoch=$epoch\")\n", | |
" println(\" train_loss = $train_loss, train_accuracy = $train_acc\")\n", | |
" println(\" test_loss = $test_loss, test_accuracy = $test_acc\")\n", | |
" end\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "43f6e32f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"┌ Info: Training on CUDA GPU\n", | |
"└ @ Main In[10]:5\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch=1\n", | |
" train_loss = 0.606988, train_accuracy = 0.85675\n", | |
" test_loss = 0.58632696, test_accuracy = 0.864\n", | |
"Epoch=2\n", | |
" train_loss = 0.40056038, train_accuracy = 0.8945166666666666\n", | |
" test_loss = 0.38613266, test_accuracy = 0.8997\n", | |
"Epoch=3\n", | |
" train_loss = 0.3332611, train_accuracy = 0.9094166666666667\n", | |
" test_loss = 0.3234049, test_accuracy = 0.9131\n", | |
"Epoch=4\n", | |
" train_loss = 0.29904968, train_accuracy = 0.9164666666666667\n", | |
" test_loss = 0.29198298, test_accuracy = 0.9198\n", | |
"Epoch=5\n", | |
" train_loss = 0.27560335, train_accuracy = 0.9225\n", | |
" test_loss = 0.27200863, test_accuracy = 0.9247\n", | |
"Epoch=6\n", | |
" train_loss = 0.25826827, train_accuracy = 0.9267833333333333\n", | |
" test_loss = 0.25732666, test_accuracy = 0.9283\n", | |
"Epoch=7\n", | |
" train_loss = 0.24577664, train_accuracy = 0.9297666666666666\n", | |
" test_loss = 0.24662031, test_accuracy = 0.9302\n", | |
"Epoch=8\n", | |
" train_loss = 0.23341878, train_accuracy = 0.9339\n", | |
" test_loss = 0.23410138, test_accuracy = 0.9335\n", | |
"Epoch=9\n", | |
" train_loss = 0.22294606, train_accuracy = 0.9370666666666667\n", | |
" test_loss = 0.22563022, test_accuracy = 0.9349\n", | |
"Epoch=10\n", | |
" train_loss = 0.21395192, train_accuracy = 0.9395166666666667\n", | |
" test_loss = 0.21730356, test_accuracy = 0.9362\n", | |
" 86.944780 seconds (195.17 M allocations: 15.019 GiB, 3.41% gc time, 30.42% compilation time)\n" | |
] | |
} | |
], | |
"source": [ | |
"### Run training \n", | |
"@time train()\n", | |
"# train(η=0.01) # can change hyperparameter" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Julia 1.6.0", | |
"language": "julia", | |
"name": "julia-1.6" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"mimetype": "application/julia", | |
"name": "julia", | |
"version": "1.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment