python3 neural_style.py -backend cudnn -cudnn_autotune -optimizer lbfgs -num_iterations 500 -gpu 0,1,2,3,4,5 -multidevice_strategy 0,4,10,16,28 -image_size 1024 -seed 876
Element type Size Used MEM
-------------------------------------------------------------------------------
Storage on cuda:0
Tensor0 (2359296,) 9.00M
Tensor1 (2359296,) 9.00M
Tensor2 (1,) 512.00B
Tensor3 (2359296,) 9.00M
Tensor4 (2359296,) 9.00M
Tensor5 (1,) 512.00B
Tensor6 (2359296,) 9.00M
Tensor7 (2359296,) 9.00M
Tensor8 (1,) 512.00B
Tensor9 (2359296,) 9.00M
Tensor10 (2359296,) 9.00M
Tensor11 (1,) 512.00B
Tensor12 (2359296,) 9.00M
Tensor13 (2359296,) 9.00M
Tensor14 (1,) 512.00B
Tensor15 (2359296,) 9.00M
Tensor16 (2359296,) 9.00M
Tensor17 (1,) 512.00B
Tensor18 (2359296,) 9.00M
Tensor19 (2359296,) 9.00M
Tensor20 (1,) 512.00B
Tensor21 (2359296,) 9.00M
Tensor22 (2359296,) 9.00M
Tensor23 (1,) 512.00B
Tensor24 (2359296,) 9.00M
Tensor25 (2359296,) 9.00M
Tensor26 (1,) 512.00B
Tensor27 (2359296,) 9.00M
Tensor28 (2359296,) 9.00M
Tensor29 (1,) 512.00B
Tensor30 (2359296,) 9.00M
Tensor31 (2359296,) 9.00M
Tensor32 (1,) 512.00B
Tensor33 (2359296,) 9.00M
Tensor34 (2359296,) 9.00M
Tensor35 (1,) 512.00B
Tensor36 (2359296,) 9.00M
Tensor37 (2359296,) 9.00M
Tensor38 (1,) 512.00B
Tensor39 (2359296,) 9.00M
Tensor40 (2359296,) 9.00M
Tensor41 (1,) 512.00B
Tensor42 (2359296,) 9.00M
Tensor43 (2359296,) 9.00M
Tensor44 (1,) 512.00B
Tensor45 (2359296,) 9.00M
Tensor46 (2359296,) 9.00M
Tensor47 (1,) 512.00B
Tensor48 (2359296,) 9.00M
Tensor49 (2359296,) 9.00M
Tensor50 (1,) 512.00B
Tensor51 (2359296,) 9.00M
Tensor52 (2359296,) 9.00M
Tensor53 (1,) 512.00B
Tensor54 (2359296,) 9.00M
Tensor55 (2359296,) 9.00M
Tensor56 (1,) 512.00B
Tensor57 (2359296,) 9.00M
Tensor58 (2359296,) 9.00M
Tensor59 (1,) 512.00B
Tensor60 (2359296,) 9.00M
Tensor61 (2359296,) 9.00M
Tensor62 (1,) 512.00B
Tensor63 (2359296,) 9.00M
Tensor64 (2359296,) 9.00M
Tensor65 (1,) 512.00B
Tensor66 (2359296,) 9.00M
Tensor67 (2359296,) 9.00M
Tensor68 (1,) 512.00B
Tensor69 (2359296,) 9.00M
Tensor70 (2359296,) 9.00M
Tensor71 (1,) 512.00B
Tensor72 (2359296,) 9.00M
Tensor73 (2359296,) 9.00M
Tensor74 (1,) 512.00B
Tensor75 (2359296,) 9.00M
Tensor76 (2359296,) 9.00M
Tensor77 (1,) 512.00B
Tensor78 (2359296,) 9.00M
Tensor79 (2359296,) 9.00M
Tensor80 (1,) 512.00B
Tensor81 (2359296,) 9.00M
Tensor82 (2359296,) 9.00M
Tensor83 (1,) 512.00B
Tensor84 (2359296,) 9.00M
Tensor85 (2359296,) 9.00M
Tensor86 (1,) 512.00B
Tensor87 (2359296,) 9.00M
Tensor88 (2359296,) 9.00M
Tensor89 (1,) 512.00B
Tensor90 (2359296,) 9.00M
Tensor91 (2359296,) 9.00M
Tensor92 (1,) 512.00B
Tensor93 (2359296,) 9.00M
Tensor94 (2359296,) 9.00M
Tensor95 (1,) 512.00B
Tensor96 (2359296,) 9.00M
Tensor97 (2359296,) 9.00M
Tensor98 (1,) 512.00B
Tensor99 (2359296,) 9.00M
Tensor100 (2359296,) 9.00M
Tensor101 (1,) 512.00B
Tensor102 (2359296,) 9.00M
Tensor103 (2359296,) 9.00M
Tensor104 (1,) 512.00B
Tensor105 (2359296,) 9.00M
Tensor106 (2359296,) 9.00M
Tensor107 (1,) 512.00B
Tensor108 (2359296,) 9.00M
Tensor109 (2359296,) 9.00M
Tensor110 (1,) 512.00B
Tensor111 (2359296,) 9.00M
Tensor112 (2359296,) 9.00M
Tensor113 (1,) 512.00B
Tensor114 (2359296,) 9.00M
Tensor115 (2359296,) 9.00M
Tensor116 (1,) 512.00B
Tensor117 (2359296,) 9.00M
Tensor118 (2359296,) 9.00M
Tensor119 (1,) 512.00B
Tensor120 (2359296,) 9.00M
Tensor121 (2359296,) 9.00M
Tensor122 (1,) 512.00B
Tensor123 (2359296,) 9.00M
Tensor124 (2359296,) 9.00M
Tensor125 (1,) 512.00B
Tensor126 (2359296,) 9.00M
Tensor127 (2359296,) 9.00M
Tensor128 (1,) 512.00B
Tensor129 (2359296,) 9.00M
Tensor130 (2359296,) 9.00M
Tensor131 (1,) 512.00B
Tensor132 (2359296,) 9.00M
Tensor133 (2359296,) 9.00M
Tensor134 (1,) 512.00B
Tensor135 (2359296,) 9.00M
Tensor136 (2359296,) 9.00M
Tensor137 (1,) 512.00B
Tensor138 (2359296,) 9.00M
Tensor139 (2359296,) 9.00M
Tensor140 (1,) 512.00B
Tensor141 (2359296,) 9.00M
Tensor142 (2359296,) 9.00M
Tensor143 (1,) 512.00B
Tensor144 (2359296,) 9.00M
Tensor145 (2359296,) 9.00M
Tensor146 (1,) 512.00B
Tensor147 (2359296,) 9.00M
Tensor148 (2359296,) 9.00M
Tensor149 (1,) 512.00B
Tensor150 (2359296,) 9.00M
Tensor151 (2359296,) 9.00M
Tensor152 (1,) 512.00B
Tensor153 (2359296,) 9.00M
Tensor154 (2359296,) 9.00M
Tensor155 (1,) 512.00B
Tensor156 (2359296,) 9.00M
Tensor157 (2359296,) 9.00M
Tensor158 (1,) 512.00B
Tensor159 (2359296,) 9.00M
Tensor160 (2359296,) 9.00M
Tensor161 (1,) 512.00B
Tensor162 (2359296,) 9.00M
Tensor163 (2359296,) 9.00M
Tensor164 (1,) 512.00B
Tensor165 (2359296,) 9.00M
Tensor166 (2359296,) 9.00M
Tensor167 (1,) 512.00B
Tensor168 (2359296,) 9.00M
Tensor169 (2359296,) 9.00M
Tensor170 (1,) 512.00B
Tensor171 (2359296,) 9.00M
Tensor172 (2359296,) 9.00M
Tensor173 (1,) 512.00B
Tensor174 (2359296,) 9.00M
Tensor175 (2359296,) 9.00M
Tensor176 (1,) 512.00B
Tensor177 (2359296,) 9.00M
Tensor178 (2359296,) 9.00M
Tensor179 (1,) 512.00B
Tensor180 (2359296,) 9.00M
Tensor181 (2359296,) 9.00M
Tensor182 (1,) 512.00B
Tensor183 (2359296,) 9.00M
Tensor184 (2359296,) 9.00M
Tensor185 (1,) 512.00B
Tensor186 (2359296,) 9.00M
Tensor187 (2359296,) 9.00M
Tensor188 (1,) 512.00B
Tensor189 (2359296,) 9.00M
Tensor190 (2359296,) 9.00M
Tensor191 (1,) 512.00B
Tensor192 (2359296,) 9.00M
Tensor193 (2359296,) 9.00M
Tensor194 (1,) 512.00B
Tensor195 (2359296,) 9.00M
Tensor196 (2359296,) 9.00M
Tensor197 (1,) 512.00B
Tensor198 (2359296,) 9.00M
Tensor199 (2359296,) 9.00M
Tensor200 (1,) 512.00B
Tensor201 (2359296,) 9.00M
Tensor202 (2359296,) 9.00M
Tensor203 (1,) 512.00B
Tensor204 (2359296,) 9.00M
Tensor205 (2359296,) 9.00M
Tensor206 (1,) 512.00B
Tensor207 (2359296,) 9.00M
Tensor208 (2359296,) 9.00M
Tensor209 (1,) 512.00B
Tensor210 (2359296,) 9.00M
Tensor211 (2359296,) 9.00M
Tensor212 (1,) 512.00B
Tensor213 (2359296,) 9.00M
Tensor214 (2359296,) 9.00M
Tensor215 (1,) 512.00B
Tensor216 (2359296,) 9.00M
Tensor217 (2359296,) 9.00M
Tensor218 (1,) 512.00B
Tensor219 (2359296,) 9.00M
Tensor220 (2359296,) 9.00M
Tensor221 (1,) 512.00B
Tensor222 (2359296,) 9.00M
Tensor223 (2359296,) 9.00M
Tensor224 (1,) 512.00B
Tensor225 (2359296,) 9.00M
Tensor226 (2359296,) 9.00M
Tensor227 (1,) 512.00B
Tensor228 (2359296,) 9.00M
Tensor229 (2359296,) 9.00M
Tensor230 (1,) 512.00B
Tensor231 (2359296,) 9.00M
Tensor232 (2359296,) 9.00M
Tensor233 (1,) 512.00B
Tensor234 (2359296,) 9.00M
Tensor235 (2359296,) 9.00M
Tensor236 (1,) 512.00B
Tensor237 (2359296,) 9.00M
Tensor238 (2359296,) 9.00M
Tensor239 (1,) 512.00B
Tensor240 (2359296,) 9.00M
Tensor241 (2359296,) 9.00M
Tensor242 (1,) 512.00B
Tensor243 (2359296,) 9.00M
Tensor244 (2359296,) 9.00M
Tensor245 (1,) 512.00B
Tensor246 (2359296,) 9.00M
Tensor247 (2359296,) 9.00M
Tensor248 (1,) 512.00B
Tensor249 (2359296,) 9.00M
Tensor250 (2359296,) 9.00M
Tensor251 (1,) 512.00B
Tensor252 (2359296,) 9.00M
Tensor253 (2359296,) 9.00M
Tensor254 (1,) 512.00B
Tensor255 (2359296,) 9.00M
Tensor256 (2359296,) 9.00M
Tensor257 (1,) 512.00B
Tensor258 (2359296,) 9.00M
Tensor259 (2359296,) 9.00M
Tensor260 (1,) 512.00B
Tensor261 (2359296,) 9.00M
Tensor262 (2359296,) 9.00M
Tensor263 (1,) 512.00B
Tensor264 (2359296,) 9.00M
Tensor265 (2359296,) 9.00M
Tensor266 (1,) 512.00B
Tensor267 (2359296,) 9.00M
Tensor268 (2359296,) 9.00M
Tensor269 (1,) 512.00B
Tensor270 (2359296,) 9.00M
Tensor271 (2359296,) 9.00M
Tensor272 (1,) 512.00B
Tensor273 (2359296,) 9.00M
Tensor274 (2359296,) 9.00M
Tensor275 (1,) 512.00B
Tensor276 (2359296,) 9.00M
Tensor277 (2359296,) 9.00M
Tensor278 (1,) 512.00B
Tensor279 (2359296,) 9.00M
Tensor280 (2359296,) 9.00M
Tensor281 (1,) 512.00B
Tensor282 (2359296,) 9.00M
Tensor283 (2359296,) 9.00M
Tensor284 (1,) 512.00B
Tensor285 (2359296,) 9.00M
Tensor286 (2359296,) 9.00M
Tensor287 (1,) 512.00B
Tensor288 (2359296,) 9.00M
Tensor289 (2359296,) 9.00M
Tensor290 (1,) 512.00B
Tensor291 (2359296,) 9.00M
Tensor292 (2359296,) 9.00M
Tensor293 (1,) 512.00B
Tensor294 (2359296,) 9.00M
Tensor295 (2359296,) 9.00M
Tensor296 (1,) 512.00B
Tensor297 (1, 3, 767, 1024) 8.99M
Tensor298 (1, 3, 768, 1023) 8.99M
Tensor299 (1,) 512.00B
Tensor300 (2359296,) 9.00M
Tensor301 (2359296,) 9.00M
Tensor302 (1,) 512.00B
Tensor303 (1,) 512.00B
Tensor304 (1,) 512.00B
Tensor305 (1,) 512.00B
Tensor306 (1,) 512.00B
Tensor307 (1,) 512.00B
Tensor308 (1,) 512.00B
Tensor309 (1,) 512.00B
Tensor310 (1,) 512.00B
Tensor311 (1,) 512.00B
Tensor312 (1,) 512.00B
Tensor313 (1,) 512.00B
Tensor314 (1,) 512.00B
Tensor315 (1,) 512.00B
Tensor316 (1,) 512.00B
Tensor317 (1,) 512.00B
Tensor318 (1,) 512.00B
Tensor319 (1,) 512.00B
Tensor320 (1,) 512.00B
Tensor321 (1,) 512.00B
Tensor322 (1,) 512.00B
Tensor323 (1,) 512.00B
Tensor324 (1,) 512.00B
Tensor325 (1,) 512.00B
Tensor326 (1,) 512.00B
Tensor327 (1,) 512.00B
Tensor328 (1,) 512.00B
Tensor329 (1,) 512.00B
Tensor330 (1,) 512.00B
Tensor331 (1,) 512.00B
Tensor332 (1,) 512.00B
Tensor333 (1,) 512.00B
Tensor334 (1,) 512.00B
Tensor335 (1,) 512.00B
Tensor336 (1,) 512.00B
Tensor337 (1,) 512.00B
Tensor338 (1,) 512.00B
Tensor339 (1,) 512.00B
Tensor340 (1,) 512.00B
Tensor341 (1,) 512.00B
Tensor342 (1,) 512.00B
Tensor343 (1,) 512.00B
Tensor344 (1,) 512.00B
Tensor345 (1,) 512.00B
Tensor346 (1,) 512.00B
Tensor347 (1,) 512.00B
Tensor348 (1,) 512.00B
Tensor349 (1,) 512.00B
Tensor350 (1,) 512.00B
Tensor351 (1,) 512.00B
Tensor352 (1,) 512.00B
Tensor353 (1,) 512.00B
Tensor354 (1,) 512.00B
Tensor355 (1,) 512.00B
Tensor356 (1,) 512.00B
Tensor357 (1,) 512.00B
Tensor358 (1,) 512.00B
Tensor359 (1,) 512.00B
Tensor360 (1,) 512.00B
Tensor361 (1,) 512.00B
Tensor362 (1,) 512.00B
Tensor363 (1,) 512.00B
Tensor364 (1,) 512.00B
Tensor365 (1,) 512.00B
Tensor366 (1,) 512.00B
Tensor367 (1,) 512.00B
Tensor368 (1,) 512.00B
Tensor369 (1,) 512.00B
Tensor370 (1,) 512.00B
Tensor371 (1,) 512.00B
Tensor372 (1,) 512.00B
Tensor373 (1,) 512.00B
Tensor374 (1,) 512.00B
Tensor375 (1,) 512.00B
Tensor376 (1,) 512.00B
Tensor377 (1,) 512.00B
Tensor378 (1,) 512.00B
Tensor379 (1,) 512.00B
Tensor380 (1,) 512.00B
Tensor381 (1,) 512.00B
Tensor382 (1,) 512.00B
Tensor383 (1,) 512.00B
Tensor384 (1,) 512.00B
Tensor385 (1,) 512.00B
Tensor386 (1,) 512.00B
Tensor387 (1,) 512.00B
Tensor388 (1,) 512.00B
Tensor389 (1,) 512.00B
Tensor390 (1,) 512.00B
Tensor391 (1,) 512.00B
Tensor392 (1,) 512.00B
Tensor393 (1,) 512.00B
Tensor394 (1,) 512.00B
Tensor395 (1,) 512.00B
Tensor396 (1,) 512.00B
Tensor397 (1,) 512.00B
Tensor398 (1,) 512.00B
Tensor399 (1,) 512.00B
Tensor400 (1,) 512.00B
Tensor401 (1,) 512.00B
Tensor402 (1,) 512.00B
Tensor403 (1,) 512.00B
Tensor404 (2359296,) 9.00M
Tensor405 (1, 3, 768, 1024) 9.00M
Tensor406 (1, 3, 1024, 787) 9.22M
Parameter407 (1, 3, 768, 1024) 9.00M
Parameter407.grad (1, 3, 768, 1024) 9.00M
Tensor408 (2359296,) 9.00M
Parameter409 (512, 512, 3, 3) 9.00M
Parameter410 (512,) 2.00K
Parameter411 (512, 512, 3, 3) 9.00M
Parameter412 (512,) 2.00K
Parameter413 (512, 512, 3, 3) 9.00M
Parameter414 (512,) 2.00K
-------------------------------------------------------------------------------
Total Tensors: 497866186 Used Memory: 1.85G
The allocated memory on cuda:0: 1.86G
Memory differs due to the matrix alignment or invisible gradient buffer tensors
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
Storage on cuda:1
Tensor415 (64, 64) 16.00K
Tensor416 (1,) 512.00B
Parameter417 (64, 3, 3, 3) 7.00K
Parameter417.grad (64, 3, 3, 3) 7.00K
Parameter418 (64,) 512.00B
Parameter418.grad (64,) 512.00B
Parameter419 (64, 64, 3, 3) 144.00K
Parameter419.grad (64, 64, 3, 3) 144.00K
Parameter420 (64,) 512.00B
Parameter420.grad (64,) 512.00B
Tensor421 (64, 64) 16.00K
-------------------------------------------------------------------------------
Total Tensors: 85633 Used Memory: 336.50K
The allocated memory on cuda:1: 336.50K
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
Storage on cuda:2
Tensor422 (128, 128) 64.00K
Tensor423 (1,) 512.00B
Parameter424 (128, 64, 3, 3) 288.00K
Parameter424.grad (128, 64, 3, 3) 288.00K
Parameter425 (128,) 512.00B
Parameter425.grad (128,) 512.00B
Parameter426 (128, 128, 3, 3) 576.00K
Parameter426.grad (128, 128, 3, 3) 576.00K
Parameter427 (128,) 512.00B
Parameter427.grad (128,) 512.00B
Tensor428 (128, 128) 64.00K
-------------------------------------------------------------------------------
Total Tensors: 475649 Used Memory: 1.81M
The allocated memory on cuda:2: 1.81M
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
Storage on cuda:3
Tensor429 (256, 256) 256.00K
Tensor430 (1,) 512.00B
Parameter431 (256, 128, 3, 3) 1.12M
Parameter431.grad (256, 128, 3, 3) 1.12M
Parameter432 (256,) 1.00K
Parameter432.grad (256,) 1.00K
Parameter433 (256, 256, 3, 3) 2.25M
Parameter433.grad (256, 256, 3, 3) 2.25M
Parameter434 (256,) 1.00K
Parameter434.grad (256,) 1.00K
Tensor435 (256, 256) 256.00K
-------------------------------------------------------------------------------
Total Tensors: 1901569 Used Memory: 7.25M
The allocated memory on cuda:3: 7.88M
Memory differs due to the matrix alignment or invisible gradient buffer tensors
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
Storage on cuda:4
Tensor436 (512, 512) 1.00M
Tensor437 (1,) 512.00B
Tensor438 (1,) 512.00B
Parameter439 (256, 256, 3, 3) 2.25M
Parameter439.grad (256, 256, 3, 3) 2.25M
Parameter440 (256,) 1.00K
Parameter440.grad (256,) 1.00K
Parameter441 (256, 256, 3, 3) 2.25M
Parameter441.grad (256, 256, 3, 3) 2.25M
Parameter442 (256,) 1.00K
Parameter442.grad (256,) 1.00K
Parameter443 (512, 256, 3, 3) 4.50M
Parameter443.grad (512, 256, 3, 3) 4.50M
Parameter444 (512,) 2.00K
Parameter444.grad (512,) 2.00K
Parameter445 (512, 512, 3, 3) 9.00M
Parameter445.grad (512, 512, 3, 3) 9.00M
Parameter446 (512,) 2.00K
Parameter446.grad (512,) 2.00K
Tensor447 (1, 512, 96, 128) 24.00M
Tensor448 (512, 512) 1.00M
-------------------------------------------------------------------------------
Total Tensors: 16256002 Used Memory: 62.01M
The allocated memory on cuda:4: 62.01M
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
Storage on cuda:5
Tensor449 (512, 512) 1.00M
Tensor450 (1,) 512.00B
Parameter451 (512, 512, 3, 3) 9.00M
Parameter451.grad (512, 512, 3, 3) 9.00M
Parameter452 (512,) 2.00K
Parameter452.grad (512,) 2.00K
Parameter453 (512, 512, 3, 3) 9.00M
Parameter453.grad (512, 512, 3, 3) 9.00M
Parameter454 (512,) 2.00K
Parameter454.grad (512,) 2.00K
Tensor455 (512, 512) 1.00M
Parameter456 (512, 512, 3, 3) 9.00M
Parameter456.grad (512, 512, 3, 3) 9.00M
Parameter457 (512,) 2.00K
Parameter457.grad (512,) 2.00K
-------------------------------------------------------------------------------
Total Tensors: 14683137 Used Memory: 56.01M
The allocated memory on cuda:5: 56.01M
-------------------------------------------------------------------------------
Memory usage line by line:
File: neural_style.py
Function: main at line 56
Line # Max usage Peak usage diff max diff peak Line Contents
===============================================================
56 @profile
57 #reporter = MemReporter()
58 def main():
59 0.00B 0.00B -1.85G -2.13G dtype, multidevice, backward_device = setup_gpu()
60
61 76.39M 574.00M 76.39M 574.00M cnn, layerList = loadCaffemodel(params.model_file, params.pooling, params.gpu, False)
62
63 85.39M 102.00M 9.00M -472.00M content_image = preprocess(params.content_image, params.image_size).type(dtype)
64 85.39M 102.00M 0.00B 0.00B style_image_input = params.style_image.split(',')
65 85.39M 102.00M 0.00B 0.00B style_image_list, ext = [], [".jpg",".png"]
66 85.39M 102.00M 0.00B 0.00B for image in style_image_input:
67 85.39M 102.00M 0.00B 0.00B if os.path.isdir(image):
68 images = (image + "/" + file for file in os.listdir(image)
69 if os.path.splitext(file)[1].lower() in ext)
70 style_image_list.extend(images)
71 else:
72 85.39M 102.00M 0.00B 0.00B style_image_list.append(image)
73 85.39M 102.00M 0.00B 0.00B style_images_caffe = []
74 94.61M 122.00M 9.22M 20.00M for image in style_image_list:
75 85.39M 102.00M -9.22M -20.00M style_size = int(params.image_size * params.style_scale)
76 94.61M 122.00M 9.22M 20.00M img_caffe = preprocess(image, style_size).type(dtype)
77 94.61M 122.00M 0.00B 0.00B style_images_caffe.append(img_caffe)
78
79 94.61M 122.00M 0.00B 0.00B if params.init_image != None:
80 image_size = (content_image.size(2), content_image.size(3))
81 init_image = preprocess(params.init_image, image_size).type(dtype)
82
83 # Handle style blending weights for multiple style inputs
84 94.61M 122.00M 0.00B 0.00B style_blend_weights = []
85 94.61M 122.00M 0.00B 0.00B if params.style_blend_weights == None:
86 # Style blending not specified, so use equal weighting
87 94.61M 122.00M 0.00B 0.00B for i in style_image_list:
88 94.61M 122.00M 0.00B 0.00B style_blend_weights.append(1.0)
89 94.61M 122.00M 0.00B 0.00B for i, blend_weights in enumerate(style_blend_weights):
90 94.61M 122.00M 0.00B 0.00B style_blend_weights[i] = int(style_blend_weights[i])
91 else:
92 style_blend_weights = params.style_blend_weights.split(',')
93 assert len(style_blend_weights) == len(style_image_list), \
94 "-style_blend_weights and -style_images must have the same number of elements!"
95
96 # Normalize the style blending weights so they sum to 1
97 94.61M 122.00M 0.00B 0.00B style_blend_sum = 0
98 94.61M 122.00M 0.00B 0.00B for i, blend_weights in enumerate(style_blend_weights):
99 94.61M 122.00M 0.00B 0.00B style_blend_weights[i] = float(style_blend_weights[i])
100 94.61M 122.00M 0.00B 0.00B style_blend_sum = float(style_blend_sum) + style_blend_weights[i]
101 94.61M 122.00M 0.00B 0.00B for i, blend_weights in enumerate(style_blend_weights):
102 94.61M 122.00M 0.00B 0.00B style_blend_weights[i] = float(style_blend_weights[i]) / float(style_blend_sum)
103
104 94.61M 122.00M 0.00B 0.00B content_layers = params.content_layers.split(',')
105 94.61M 122.00M 0.00B 0.00B style_layers = params.style_layers.split(',')
106
107 # Set up the network, inserting style and content loss modules
108 96.36M 204.00M 1.75M 82.00M cnn = copy.deepcopy(cnn)
109 96.36M 164.00M 0.00B -40.00M content_losses, style_losses, tv_losses = [], [], []
110 96.36M 164.00M 0.00B 0.00B next_content_idx, next_style_idx = 1, 1
111 96.36M 164.00M 0.00B 0.00B net = nn.Sequential()
112 96.36M 164.00M 0.00B 0.00B c, r = 0, 0
113 96.36M 164.00M 0.00B 0.00B if params.tv_weight > 0:
114 96.36M 164.00M 0.00B 0.00B tv_mod = TVLoss(params.tv_weight).type(dtype)
115 96.36M 164.00M 0.00B 0.00B net.add_module(str(len(net)), tv_mod)
116 96.36M 164.00M 0.00B 0.00B tv_losses.append(tv_mod)
117
118 96.36M 164.00M 0.00B 0.00B for i, layer in enumerate(list(cnn), 1):
119 96.36M 164.00M 0.00B 0.00B if next_content_idx <= len(content_layers) or next_style_idx <= len(style_layers):
120 96.36M 164.00M 0.00B 0.00B if isinstance(layer, nn.Conv2d):
121 96.36M 164.00M 0.00B 0.00B net.add_module(str(len(net)), layer)
122
123 96.36M 164.00M 0.00B 0.00B if layerList['C'][c] in content_layers:
124 print("Setting up content layer " + str(i) + ": " + str(layerList['C'][c]))
125 loss_module = ContentLoss(params.content_weight)
126 net.add_module(str(len(net)), loss_module)
127 content_losses.append(loss_module)
128
129 96.36M 164.00M 0.00B 0.00B if layerList['C'][c] in style_layers:
130 print("Setting up style layer " + str(i) + ": " + str(layerList['C'][c]))
131 loss_module = StyleLoss(params.style_weight)
132 net.add_module(str(len(net)), loss_module)
133 style_losses.append(loss_module)
134 96.36M 164.00M 0.00B 0.00B c+=1
135
136 96.36M 164.00M 0.00B 0.00B if isinstance(layer, nn.ReLU):
137 96.36M 164.00M 0.00B 0.00B net.add_module(str(len(net)), layer)
138
139 96.36M 164.00M 0.00B 0.00B if layerList['R'][r] in content_layers:
140 96.36M 164.00M 0.00B 0.00B print("Setting up content layer " + str(i) + ": " + str(layerList['R'][r]))
141 96.36M 164.00M 0.00B 0.00B loss_module = ContentLoss(params.content_weight)
142 96.36M 164.00M 0.00B 0.00B net.add_module(str(len(net)), loss_module)
143 96.36M 164.00M 0.00B 0.00B content_losses.append(loss_module)
144 96.36M 164.00M 0.00B 0.00B next_content_idx += 1
145
146 96.36M 164.00M 0.00B 0.00B if layerList['R'][r] in style_layers:
147 96.36M 164.00M 0.00B 0.00B print("Setting up style layer " + str(i) + ": " + str(layerList['R'][r]))
148 96.36M 164.00M 0.00B 0.00B loss_module = StyleLoss(params.style_weight)
149 96.36M 164.00M 0.00B 0.00B net.add_module(str(len(net)), loss_module)
150 96.36M 164.00M 0.00B 0.00B style_losses.append(loss_module)
151 96.36M 164.00M 0.00B 0.00B next_style_idx += 1
152 96.36M 164.00M 0.00B 0.00B r+=1
153
154 96.36M 164.00M 0.00B 0.00B if isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
155 96.36M 164.00M 0.00B 0.00B net.add_module(str(len(net)), layer)
156
157 96.36M 164.00M 0.00B 0.00B if multidevice:
158 45.23M 164.00M -51.13M 0.00B net = setup_multi_device(net)
159
160 # Capture content targets
161 45.23M 82.00M 0.00B -82.00M for i in content_losses:
162 45.23M 82.00M 0.00B 0.00B i.mode = 'capture'
163 45.23M 82.00M 0.00B 0.00B print("Capturing content targets")
164 45.23M 82.00M 0.00B 0.00B print_torch(net, multidevice)
165 63.22M 82.00M 17.99M 0.00B net(content_image)
166
167 # Capture style targets
168 63.22M 82.00M 0.00B 0.00B for i in content_losses:
169 63.22M 82.00M 0.00B 0.00B i.mode = 'None'
170
171 63.65M 102.00M 444.00K 20.00M for i, image in enumerate(style_images_caffe):
172 63.22M 82.00M -444.00K -20.00M print("Capturing style target " + str(i+1))
173 63.22M 82.00M 0.00B 0.00B for j in style_losses:
174 63.22M 82.00M 0.00B 0.00B j.mode = 'capture'
175 63.22M 82.00M 0.00B 0.00B j.blend_weight = style_blend_weights[i]
176 63.65M 102.00M 444.00K 20.00M net(style_images_caffe[i])
177
178 # Set all loss modules to loss mode
179 63.65M 102.00M 0.00B 0.00B for i in content_losses:
180 63.65M 102.00M 0.00B 0.00B i.mode = 'loss'
181 63.65M 102.00M 0.00B 0.00B for i in style_losses:
182 63.65M 102.00M 0.00B 0.00B i.mode = 'loss'
183
184 # Freeze the network in order to prevent
185 # unnecessary gradient calculations
186 63.65M 102.00M 0.00B 0.00B for param in net.parameters():
187 param.requires_grad = False
188
189 # Initialize the image
190 63.65M 102.00M 0.00B 0.00B if params.seed >= 0:
191 63.65M 102.00M 0.00B 0.00B torch.manual_seed(params.seed)
192 63.65M 102.00M 0.00B 0.00B torch.cuda.manual_seed(params.seed)
193 63.65M 102.00M 0.00B 0.00B torch.cuda.manual_seed_all(params.seed)
194 63.65M 102.00M 0.00B 0.00B torch.backends.cudnn.deterministic=True
195 63.65M 102.00M 0.00B 0.00B if params.init == 'random':
196 63.65M 102.00M 0.00B 0.00B B, C, H, W = content_image.size()
197 72.65M 102.00M 9.00M 0.00B img = torch.randn(C, H, W).mul(0.001).unsqueeze(0).type(dtype)
198 elif params.init == 'image':
199 if params.init_image != None:
200 img = init_image.clone()
201 else:
202 img = content_image.clone()
203 72.65M 102.00M 0.00B 0.00B img = nn.Parameter(img.type(dtype))
204
205 72.65M 102.00M 0.00B 0.00B def maybe_print(t, loss):
206 if params.print_iter > 0 and t % params.print_iter == 0:
207 print("Iteration " + str(t) + " / "+ str(params.num_iterations))
208 for i, loss_module in enumerate(content_losses):
209 print(" Content " + str(i+1) + " loss: " + str(loss_module.loss.item()))
210 for i, loss_module in enumerate(style_losses):
211 print(" Style " + str(i+1) + " loss: " + str(loss_module.loss.item()))
212 print(" Total loss: " + str(loss.item()))
213
214 72.65M 102.00M 0.00B 0.00B def maybe_save(t):
215 should_save = params.save_iter > 0 and t % params.save_iter == 0
216 should_save = should_save or t == params.num_iterations
217 if should_save:
218 output_filename, file_extension = os.path.splitext(params.output_image)
219 if t == params.num_iterations:
220 filename = output_filename + str(file_extension)
221 else:
222 filename = str(output_filename) + "_" + str(t) + str(file_extension)
223 disp = deprocess(img.clone())
224
225 # Maybe perform postprocessing for color-independent style transfer
226 if params.original_colors == 1:
227 disp = original_colors(deprocess(content_image.clone()), disp)
228
229 disp.save(str(filename))
230
231 # Function to evaluate loss and gradient. We run the net forward and
232 # backward to get the gradient, and sum up losses from the loss modules.
233 # optim.lbfgs internally handles iteration and calls this function many
234 # times, so we manually count the number of iterations to handle printing
235 # and saving intermediate results.
236 72.65M 102.00M 0.00B 0.00B num_calls = [0]
237 72.65M 102.00M 0.00B 0.00B def feval():
238 num_calls[0] += 1
239 optimizer.zero_grad()
240 net(img)
241 loss = 0
242
243 for mod in content_losses:
244 loss += mod.loss.to(backward_device)
245 for mod in style_losses:
246 loss += mod.loss.to(backward_device)
247 if params.tv_weight > 0:
248 for mod in tv_losses:
249 loss += mod.loss.to(backward_device)
250
251 loss.backward()
252
253 maybe_save(num_calls[0])
254 maybe_print(num_calls[0], loss)
255
256 return loss
257
258 72.65M 102.00M 72.65M 102.00M optimizer, loopVal = setup_optimizer(img)
259 1.85G 2.09G 1.78G 1.99G while num_calls[0] <= loopVal:
260 1.85G 2.13G 0.00B 40.00M optimizer.step(feval)
nvidia-smi: