bfarzin/prototype do_the_magic-inital_bs_test.ipynb

## prototype do_the_magic-inital_bs_test.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Find max model / batch_size combo for GPU Ram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastai.vision import *\n",
    "from fastai.callbacks.mem import *\n",
    "from ipyexperiments import *\n",
    "from ipyexperiments.utils.mem import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = 42\n",
    "\n",
    "# python RNG\n",
    "random.seed(seed)\n",
    "\n",
    "# pytorch RNGs\n",
    "import torch\n",
    "torch.manual_seed(seed)\n",
    "torch.backends.cudnn.deterministic = True\n",
    "if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)\n",
    "\n",
    "# numpy RNG\n",
    "import numpy as np\n",
    "np.random.seed(seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = untar_data(URLs.PETS)\n",
    "path_img = path/'images'\n",
    "fnames = get_image_files(path_img)\n",
    "pat = r'/([^/]+)_\\d+.jpg$'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastai.callback import Callback\n",
    "\n",
    "class FitNBatch(Callback):\n",
    "    def __init__(self, n_batch:int=2): \n",
    "        self.stop,self.n_batch = False,n_batch\n",
    "        \n",
    "    def on_batch_end(self, iteration, **kwargs):\n",
    "        if iteration >= self.n_batch: return {'stop_epoch': True, 'stop_training': True}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_from_max_bs(learn,max_bs:int=1024,r_factor:float=0.80,verbose:bool=True,memtrace=None)->int:\n",
    "    ''' Binary search for max batch size with given learner'''\n",
    "    if memtrace is None: memtrace = GPUMemTrace()\n",
    "    test_dict = dict()\n",
    "\n",
    "    low,hi = 2,max_bs\n",
    "    \n",
    "    #first test low, if that does not fit, return\n",
    "    try:\n",
    "        learn.data.batch_size = low\n",
    "        learn.fit(1)\n",
    "        out = low  #this could be our best if the next loop fails\n",
    "    except RuntimeError as e:\n",
    "        if verbose: print(e,f'tried bs:{bs}')\n",
    "        return None,test_dict #can't fit min\n",
    "        \n",
    "    bs = hi\n",
    "    while (hi-low) > 1:\n",
    "        try:\n",
    "            learn.data.batch_size = bs\n",
    "            learn.fit(1)\n",
    "            out,low = bs,bs\n",
    "        except RuntimeError as e:\n",
    "            if verbose: print(e,f'tried bs:{bs}')\n",
    "            hi = bs #bs is the new high value, keep cutting till we fail again.\n",
    "\n",
    "        bs = (low + hi) // 2  \n",
    "        if bs == hi: bs = low\n",
    "            \n",
    "        print(f'trying {bs}, lo:{low},hi:{hi}')\n",
    "        if verbose: memtrace.report(f'bs:{bs}')\n",
    "        test_dict[bs]=tuple(memtrace.data())\n",
    "                        \n",
    "    return out,test_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def do_the_magic(learn,max_bs=32,verbose=True,memtrace=None):\n",
    "    defaults.extra_callbacks = [FitNBatch()]\n",
    "    bs_test_data = test_from_max_bs(learn,max_bs=max_bs,verbose=verbose,memtrace=memtrace)\n",
    "    defaults.extra_callbacks = None\n",
    "    return bs_test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "res_models = [models.resnet18,models.resnet34,models.resnet50,models.resnet101,models.resnet152]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "bs = 64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import fastai\n",
    "import fastprogress\n",
    "\n",
    "class progress_diabled():\n",
    "    ''' Context manager to disable the progress update bar and Recorder print'''\n",
    "    def __init__(self,learner:Learner):\n",
    "        self.learn = learner\n",
    "    def __enter__(self):\n",
    "        #silence progress bar\n",
    "        fastprogress.fastprogress.NO_BAR = True\n",
    "        fastai.basic_train.master_bar, fastai.basic_train.progress_bar = fastprogress.force_console_behavior()\n",
    "        self.learn.callback_fns[0] = partial(Recorder,add_time=True) #,silent=True) #silence recorder\n",
    "        \n",
    "        return self.learn    \n",
    "    def __exit__(self,type,value,traceback):\n",
    "        fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar,progress_bar\n",
    "        self.learn.callback_fns[0] = partial(Recorder,add_time=True)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "*** Experiment started with the Pytorch backend\n",
      "Device: ID 0, TITAN Xp (12194 RAM)\n",
      "\n",
      "\n",
      "*** Current state:\n",
      "RAM:     Used     Free    Total        Util\n",
      "CPU:    2,261  109,691  128,856 MB   1.76% \n",
      "GPU:      577   11,617   12,194 MB   4.73% \n",
      "\n",
      "\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.839389    2.447133    00:02     \n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 11.91 GiB total capacity; 10.73 GiB already allocated; 595.06 MiB free; 3.81 MiB cached) tried bs:4000\n",
      "trying 2001, lo:2,hi:4000\n",
      "△Used Peaked MB:  1,348    466 (bs:2001)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 500.25 MiB (GPU 0; 11.91 GiB total capacity; 10.82 GiB already allocated; 501.06 MiB free; 3.80 MiB cached) tried bs:2001\n",
      "trying 1001, lo:2,hi:2001\n",
      "△Used Peaked MB:    254  1,560 (bs:1001)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 7.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:1001\n",
      "trying 501, lo:2,hi:1001\n",
      "△Used Peaked MB:  1,940      0 (bs:501)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 23.50 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:501\n",
      "trying 251, lo:2,hi:501\n",
      "△Used Peaked MB:  1,940      0 (bs:251)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 11.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:251\n",
      "trying 126, lo:2,hi:251\n",
      "△Used Peaked MB:  1,940      0 (bs:126)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 6.00 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:126\n",
      "trying 64, lo:2,hi:126\n",
      "△Used Peaked MB:  1,940      0 (bs:64)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 3.00 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:64\n",
      "trying 33, lo:2,hi:64\n",
      "△Used Peaked MB:  1,940      0 (bs:33)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 8.25 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:33\n",
      "trying 17, lo:2,hi:33\n",
      "△Used Peaked MB:  1,940      0 (bs:17)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 4.25 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.25 MiB cached) tried bs:17\n",
      "trying 9, lo:2,hi:17\n",
      "△Used Peaked MB:  1,940      0 (bs:9)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 2.62 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 5.34 MiB cached) tried bs:9\n",
      "trying 5, lo:2,hi:9\n",
      "△Used Peaked MB:  1,940      0 (bs:5)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 2.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 7.56 MiB cached) tried bs:5\n",
      "trying 3, lo:2,hi:5\n",
      "△Used Peaked MB:  1,940      0 (bs:3)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 9.00 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 6.81 MiB cached) tried bs:3\n",
      "trying 2, lo:2,hi:3\n",
      "△Used Peaked MB:  1,940      0 (bs:2)\n",
      "･ RAM:  △Consumed    △Peaked    Used Total | Exec time 0:02:34.515\n",
      "･ CPU:          2          2      2,385 MB |\n",
      "･ GPU:      9,834      1,780     10,411 MB |\n",
      "\n",
      "IPyExperimentsPytorch: Finishing\n",
      "\n",
      "*** Experiment finished in 00:02:34 (elapsed wallclock time)\n",
      "\n",
      "*** Newly defined local variables:\n",
      "Deleted: data, learn, x1\n",
      "Kept:    max_bs, tested_bs_data\n",
      "\n",
      "*** Circular ref objects gc collected during the experiment:\n",
      "cleared 133 objects (only temporary leakage)\n",
      "\n",
      "*** Experiment memory:\n",
      "RAM: Consumed       Reclaimed\n",
      "CPU:      124       89 MB ( 71.65%)\n",
      "GPU:    9,834    9,818 MB ( 99.84%)\n",
      "\n",
      "*** Current state:\n",
      "RAM:     Used     Free    Total        Util\n",
      "CPU:    2,296  109,667  128,856 MB   1.78% \n",
      "GPU:      593   11,601   12,194 MB   4.86% \n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "22"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arch = res_models[0]\n",
    "max_mem = 2000\n",
    "img_size = 64\n",
    "\n",
    "exp = IPyExperimentsPytorch()\n",
    "x1 = gpu_mem_leave_free_mbs(max_mem)\n",
    "data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
    "                                  ).normalize(imagenet_stats)\n",
    "learn = create_cnn(data, arch)\n",
    "learn.unfreeze()\n",
    "with progress_diabled(learn) as learn:\n",
    "    max_bs,tested_bs_data = do_the_magic(learn,max_bs=4000,verbose=True)\n",
    "\n",
    "exp.keep_var_names('max_bs','tested_bs_data')\n",
    "del exp\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "*** Experiment started with the Pytorch backend\n",
      "Device: ID 0, TITAN Xp (12194 RAM)\n",
      "\n",
      "\n",
      "*** Current state:\n",
      "RAM:     Used     Free    Total        Util\n",
      "CPU:    2,297  109,667  128,856 MB   1.78% \n",
      "GPU:      593   11,601   12,194 MB   4.86% \n",
      "\n",
      "\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.830230    3.242904    00:02     \n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 11.91 GiB total capacity; 10.71 GiB already allocated; 619.06 MiB free; 9.31 MiB cached) tried bs:4000\n",
      "trying 2001, lo:2,hi:4000\n",
      "△Used Peaked MB:  1,330    460 (bs:2001)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 500.25 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 23.06 MiB free; 9.30 MiB cached) tried bs:2001\n",
      "trying 1001, lo:2,hi:2001\n",
      "△Used Peaked MB:    738  1,188 (bs:1001)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 62.62 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 21.06 MiB free; 9.31 MiB cached) tried bs:1001\n",
      "trying 501, lo:2,hi:1001\n",
      "△Used Peaked MB:  1,928      0 (bs:501)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 23.50 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 21.06 MiB free; 9.31 MiB cached) tried bs:501\n",
      "trying 251, lo:2,hi:501\n",
      "△Used Peaked MB:  1,928      0 (bs:251)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 11.88 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 9.06 MiB free; 9.31 MiB cached) tried bs:251\n",
      "trying 126, lo:2,hi:251\n",
      "△Used Peaked MB:  1,940      0 (bs:126)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "CUDA out of memory. Tried to allocate 6.00 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 15.19 MiB cached) tried bs:126\n",
      "trying 64, lo:2,hi:126\n",
      "△Used Peaked MB:  1,940      6 (bs:64)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         4.269319    3.768133    00:07     \n",
      "trying 95, lo:64,hi:126\n",
      "△Used Peaked MB:    142  1,804 (bs:95)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.999418    3.658793    00:08     \n",
      "trying 110, lo:95,hi:126\n",
      "△Used Peaked MB:    142  1,804 (bs:110)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.807487    3.473353    00:09     \n",
      "trying 118, lo:110,hi:126\n",
      "△Used Peaked MB:    142  1,804 (bs:118)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.646755    3.263668    00:10     \n",
      "trying 122, lo:118,hi:126\n",
      "△Used Peaked MB:    142  1,804 (bs:122)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.469121    3.124530    00:10     \n",
      "trying 124, lo:122,hi:126\n",
      "△Used Peaked MB:    142  1,804 (bs:124)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.436059    2.996242    00:10     \n",
      "trying 125, lo:124,hi:126\n",
      "△Used Peaked MB:    142  1,804 (bs:125)\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         3.171064    2.925839    00:11     \n",
      "trying 125, lo:125,hi:126\n",
      "△Used Peaked MB:    142  1,804 (bs:125)\n",
      "･ RAM:  △Consumed    △Peaked    Used Total | Exec time 0:03:38.286\n",
      "･ CPU:          0          3      2,386 MB |\n",
      "･ GPU:      9,794      1,804     10,387 MB |\n",
      "\n",
      "IPyExperimentsPytorch: Finishing\n",
      "\n",
      "*** Experiment finished in 00:03:38 (elapsed wallclock time)\n",
      "\n",
      "*** Newly defined local variables:\n",
      "Deleted: data, learn, x1\n",
      "Kept:    max_bs, tested_bs_data\n",
      "\n",
      "*** Circular ref objects gc collected during the experiment:\n",
      "cleared 55 objects (only temporary leakage)\n",
      "\n",
      "*** Experiment memory:\n",
      "RAM: Consumed       Reclaimed\n",
      "CPU:       89       89 MB ( 99.74%)\n",
      "GPU:    9,794    9,794 MB (100.00%)\n",
      "\n",
      "*** Current state:\n",
      "RAM:     Used     Free    Total        Util\n",
      "CPU:    2,297  109,660  128,856 MB   1.78% \n",
      "GPU:      593   11,601   12,194 MB   4.86% \n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "22"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "exp = IPyExperimentsPytorch()\n",
    "x1 = gpu_mem_leave_free_mbs(max_mem)\n",
    "data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
    "                                  ).normalize(imagenet_stats)\n",
    "learn = create_cnn(data, arch)\n",
    "learn.unfreeze()\n",
    "with progress_diabled(learn) as learn:\n",
    "    max_bs,tested_bs_data = do_the_magic(learn,max_bs=4000,verbose=True)\n",
    "\n",
    "exp.keep_var_names('max_bs','tested_bs_data')\n",
    "del exp\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "*** Experiment started with the Pytorch backend\n",
      "Device: ID 0, TITAN Xp (12194 RAM)\n",
      "\n",
      "\n",
      "*** Current state:\n",
      "RAM:     Used     Free    Total        Util\n",
      "CPU:    2,297  109,660  128,856 MB   1.78% \n",
      "GPU:      593   11,601   12,194 MB   4.86% \n",
      "\n",
      "\n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         4.184824    5.174956    00:01     \n",
      "epoch     train_loss  valid_loss  time    \n",
      "1         4.089300    3.800783    00:28     \n",
      "trying 800, lo:800,hi:800\n",
      "△Used Peaked MB:    142  1,686 (bs:800)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM:  △Consumed    △Peaked    Used Total | Exec time 0:00:37.295\n",
      "･ CPU:          0        114      2,388 MB |\n",
      "･ GPU:      9,794      1,686     10,387 MB |\n",
      "\n",
      "IPyExperimentsPytorch: Finishing\n",
      "\n",
      "*** Experiment finished in 00:00:37 (elapsed wallclock time)\n",
      "\n",
      "*** Newly defined local variables:\n",
      "Deleted: data, learn, x1\n",
      "Kept:    max_bs, tested_bs_data\n",
      "\n",
      "*** Circular ref objects gc collected during the experiment:\n",
      "cleared 55 objects (only temporary leakage)\n",
      "\n",
      "*** Experiment memory:\n",
      "RAM: Consumed       Reclaimed\n",
      "CPU:       90       90 MB ( 99.23%)\n",
      "GPU:    9,794    9,794 MB (100.00%)\n"
     ]
    }
   ],
   "source": [
    "exp = IPyExperimentsPytorch()\n",
    "x1 = gpu_mem_leave_free_mbs(max_mem)\n",
    "data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
    "                                  ).normalize(imagenet_stats)\n",
    "learn = create_cnn(data, arch)\n",
    "learn.unfreeze()\n",
    "with progress_diabled(learn) as learn:\n",
    "    max_bs,tested_bs_data = do_the_magic(learn,max_bs=800,verbose=True)\n",
    "\n",
    "exp.keep_var_names('max_bs','tested_bs_data')\n",
    "del exp\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7 fasta.ai1 DEV",
   "language": "python",
   "name": "fastai1_dev"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Find max model / batch_size combo for GPU Ram"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"%reload_ext autoreload\n",
	"%autoreload 2\n",
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "raw",
	"metadata": {},
	"source": [
	"import os\n",
	"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"from fastai.vision import *\n",
	"from fastai.callbacks.mem import *\n",
	"from ipyexperiments import *\n",
	"from ipyexperiments.utils.mem import *"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"seed = 42\n",
	"\n",
	"# python RNG\n",
	"random.seed(seed)\n",
	"\n",
	"# pytorch RNGs\n",
	"import torch\n",
	"torch.manual_seed(seed)\n",
	"torch.backends.cudnn.deterministic = True\n",
	"if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)\n",
	"\n",
	"# numpy RNG\n",
	"import numpy as np\n",
	"np.random.seed(seed)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"path = untar_data(URLs.PETS)\n",
	"path_img = path/'images'\n",
	"fnames = get_image_files(path_img)\n",
	"pat = r'/([^/]+)_\\d+.jpg$'"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"from fastai.callback import Callback\n",
	"\n",
	"class FitNBatch(Callback):\n",
	" def __init__(self, n_batch:int=2): \n",
	" self.stop,self.n_batch = False,n_batch\n",
	" \n",
	" def on_batch_end(self, iteration, **kwargs):\n",
	" if iteration >= self.n_batch: return {'stop_epoch': True, 'stop_training': True}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"def test_from_max_bs(learn,max_bs:int=1024,r_factor:float=0.80,verbose:bool=True,memtrace=None)->int:\n",
	" ''' Binary search for max batch size with given learner'''\n",
	" if memtrace is None: memtrace = GPUMemTrace()\n",
	" test_dict = dict()\n",
	"\n",
	" low,hi = 2,max_bs\n",
	" \n",
	" #first test low, if that does not fit, return\n",
	" try:\n",
	" learn.data.batch_size = low\n",
	" learn.fit(1)\n",
	" out = low #this could be our best if the next loop fails\n",
	" except RuntimeError as e:\n",
	" if verbose: print(e,f'tried bs:{bs}')\n",
	" return None,test_dict #can't fit min\n",
	" \n",
	" bs = hi\n",
	" while (hi-low) > 1:\n",
	" try:\n",
	" learn.data.batch_size = bs\n",
	" learn.fit(1)\n",
	" out,low = bs,bs\n",
	" except RuntimeError as e:\n",
	" if verbose: print(e,f'tried bs:{bs}')\n",
	" hi = bs #bs is the new high value, keep cutting till we fail again.\n",
	"\n",
	" bs = (low + hi) // 2 \n",
	" if bs == hi: bs = low\n",
	" \n",
	" print(f'trying {bs}, lo:{low},hi:{hi}')\n",
	" if verbose: memtrace.report(f'bs:{bs}')\n",
	" test_dict[bs]=tuple(memtrace.data())\n",
	" \n",
	" return out,test_dict"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"def do_the_magic(learn,max_bs=32,verbose=True,memtrace=None):\n",
	" defaults.extra_callbacks = [FitNBatch()]\n",
	" bs_test_data = test_from_max_bs(learn,max_bs=max_bs,verbose=verbose,memtrace=memtrace)\n",
	" defaults.extra_callbacks = None\n",
	" return bs_test_data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"res_models = [models.resnet18,models.resnet34,models.resnet50,models.resnet101,models.resnet152]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"bs = 64"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"import fastai\n",
	"import fastprogress\n",
	"\n",
	"class progress_diabled():\n",
	" ''' Context manager to disable the progress update bar and Recorder print'''\n",
	" def __init__(self,learner:Learner):\n",
	" self.learn = learner\n",
	" def __enter__(self):\n",
	" #silence progress bar\n",
	" fastprogress.fastprogress.NO_BAR = True\n",
	" fastai.basic_train.master_bar, fastai.basic_train.progress_bar = fastprogress.force_console_behavior()\n",
	" self.learn.callback_fns[0] = partial(Recorder,add_time=True) #,silent=True) #silence recorder\n",
	" \n",
	" return self.learn \n",
	" def __exit__(self,type,value,traceback):\n",
	" fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar,progress_bar\n",
	" self.learn.callback_fns[0] = partial(Recorder,add_time=True)\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"*** Experiment started with the Pytorch backend\n",
	"Device: ID 0, TITAN Xp (12194 RAM)\n",
	"\n",
	"\n",
	"*** Current state:\n",
	"RAM: Used Free Total Util\n",
	"CPU: 2,261 109,691 128,856 MB 1.76% \n",
	"GPU: 577 11,617 12,194 MB 4.73% \n",
	"\n",
	"\n",
	"epoch train_loss valid_loss time \n",
	"1 3.839389 2.447133 00:02 \n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 11.91 GiB total capacity; 10.73 GiB already allocated; 595.06 MiB free; 3.81 MiB cached) tried bs:4000\n",
	"trying 2001, lo:2,hi:4000\n",
	"△Used Peaked MB: 1,348 466 (bs:2001)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 500.25 MiB (GPU 0; 11.91 GiB total capacity; 10.82 GiB already allocated; 501.06 MiB free; 3.80 MiB cached) tried bs:2001\n",
	"trying 1001, lo:2,hi:2001\n",
	"△Used Peaked MB: 254 1,560 (bs:1001)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 7.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:1001\n",
	"trying 501, lo:2,hi:1001\n",
	"△Used Peaked MB: 1,940 0 (bs:501)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 23.50 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:501\n",
	"trying 251, lo:2,hi:501\n",
	"△Used Peaked MB: 1,940 0 (bs:251)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 11.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:251\n",
	"trying 126, lo:2,hi:251\n",
	"△Used Peaked MB: 1,940 0 (bs:126)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 6.00 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:126\n",
	"trying 64, lo:2,hi:126\n",
	"△Used Peaked MB: 1,940 0 (bs:64)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 3.00 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:64\n",
	"trying 33, lo:2,hi:64\n",
	"△Used Peaked MB: 1,940 0 (bs:33)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 8.25 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.05 MiB cached) tried bs:33\n",
	"trying 17, lo:2,hi:33\n",
	"△Used Peaked MB: 1,940 0 (bs:17)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 4.25 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 9.25 MiB cached) tried bs:17\n",
	"trying 9, lo:2,hi:17\n",
	"△Used Peaked MB: 1,940 0 (bs:9)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 2.62 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 5.34 MiB cached) tried bs:9\n",
	"trying 5, lo:2,hi:9\n",
	"△Used Peaked MB: 1,940 0 (bs:5)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 2.88 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 3.06 MiB free; 7.56 MiB cached) tried bs:5\n",
	"trying 3, lo:2,hi:5\n",
	"△Used Peaked MB: 1,940 0 (bs:3)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 9.00 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 6.81 MiB cached) tried bs:3\n",
	"trying 2, lo:2,hi:3\n",
	"△Used Peaked MB: 1,940 0 (bs:2)\n",
	"･ RAM: △Consumed △Peaked Used Total \| Exec time 0:02:34.515\n",
	"･ CPU: 2 2 2,385 MB \|\n",
	"･ GPU: 9,834 1,780 10,411 MB \|\n",
	"\n",
	"IPyExperimentsPytorch: Finishing\n",
	"\n",
	"*** Experiment finished in 00:02:34 (elapsed wallclock time)\n",
	"\n",
	"*** Newly defined local variables:\n",
	"Deleted: data, learn, x1\n",
	"Kept: max_bs, tested_bs_data\n",
	"\n",
	"*** Circular ref objects gc collected during the experiment:\n",
	"cleared 133 objects (only temporary leakage)\n",
	"\n",
	"*** Experiment memory:\n",
	"RAM: Consumed Reclaimed\n",
	"CPU: 124 89 MB ( 71.65%)\n",
	"GPU: 9,834 9,818 MB ( 99.84%)\n",
	"\n",
	"*** Current state:\n",
	"RAM: Used Free Total Util\n",
	"CPU: 2,296 109,667 128,856 MB 1.78% \n",
	"GPU: 593 11,601 12,194 MB 4.86% \n",
	"\n",
	"\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"22"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"arch = res_models[0]\n",
	"max_mem = 2000\n",
	"img_size = 64\n",
	"\n",
	"exp = IPyExperimentsPytorch()\n",
	"x1 = gpu_mem_leave_free_mbs(max_mem)\n",
	"data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
	" ).normalize(imagenet_stats)\n",
	"learn = create_cnn(data, arch)\n",
	"learn.unfreeze()\n",
	"with progress_diabled(learn) as learn:\n",
	" max_bs,tested_bs_data = do_the_magic(learn,max_bs=4000,verbose=True)\n",
	"\n",
	"exp.keep_var_names('max_bs','tested_bs_data')\n",
	"del exp\n",
	"gc.collect()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"*** Experiment started with the Pytorch backend\n",
	"Device: ID 0, TITAN Xp (12194 RAM)\n",
	"\n",
	"\n",
	"*** Current state:\n",
	"RAM: Used Free Total Util\n",
	"CPU: 2,297 109,667 128,856 MB 1.78% \n",
	"GPU: 593 11,601 12,194 MB 4.86% \n",
	"\n",
	"\n",
	"epoch train_loss valid_loss time \n",
	"1 3.830230 3.242904 00:02 \n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 11.91 GiB total capacity; 10.71 GiB already allocated; 619.06 MiB free; 9.31 MiB cached) tried bs:4000\n",
	"trying 2001, lo:2,hi:4000\n",
	"△Used Peaked MB: 1,330 460 (bs:2001)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 500.25 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 23.06 MiB free; 9.30 MiB cached) tried bs:2001\n",
	"trying 1001, lo:2,hi:2001\n",
	"△Used Peaked MB: 738 1,188 (bs:1001)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 62.62 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 21.06 MiB free; 9.31 MiB cached) tried bs:1001\n",
	"trying 501, lo:2,hi:1001\n",
	"△Used Peaked MB: 1,928 0 (bs:501)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 23.50 MiB (GPU 0; 11.91 GiB total capacity; 11.28 GiB already allocated; 21.06 MiB free; 9.31 MiB cached) tried bs:501\n",
	"trying 251, lo:2,hi:501\n",
	"△Used Peaked MB: 1,928 0 (bs:251)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 11.88 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 9.06 MiB free; 9.31 MiB cached) tried bs:251\n",
	"trying 126, lo:2,hi:251\n",
	"△Used Peaked MB: 1,940 0 (bs:126)\n",
	"epoch train_loss valid_loss time \n",
	"CUDA out of memory. Tried to allocate 6.00 MiB (GPU 0; 11.91 GiB total capacity; 11.29 GiB already allocated; 3.06 MiB free; 15.19 MiB cached) tried bs:126\n",
	"trying 64, lo:2,hi:126\n",
	"△Used Peaked MB: 1,940 6 (bs:64)\n",
	"epoch train_loss valid_loss time \n",
	"1 4.269319 3.768133 00:07 \n",
	"trying 95, lo:64,hi:126\n",
	"△Used Peaked MB: 142 1,804 (bs:95)\n",
	"epoch train_loss valid_loss time \n",
	"1 3.999418 3.658793 00:08 \n",
	"trying 110, lo:95,hi:126\n",
	"△Used Peaked MB: 142 1,804 (bs:110)\n",
	"epoch train_loss valid_loss time \n",
	"1 3.807487 3.473353 00:09 \n",
	"trying 118, lo:110,hi:126\n",
	"△Used Peaked MB: 142 1,804 (bs:118)\n",
	"epoch train_loss valid_loss time \n",
	"1 3.646755 3.263668 00:10 \n",
	"trying 122, lo:118,hi:126\n",
	"△Used Peaked MB: 142 1,804 (bs:122)\n",
	"epoch train_loss valid_loss time \n",
	"1 3.469121 3.124530 00:10 \n",
	"trying 124, lo:122,hi:126\n",
	"△Used Peaked MB: 142 1,804 (bs:124)\n",
	"epoch train_loss valid_loss time \n",
	"1 3.436059 2.996242 00:10 \n",
	"trying 125, lo:124,hi:126\n",
	"△Used Peaked MB: 142 1,804 (bs:125)\n",
	"epoch train_loss valid_loss time \n",
	"1 3.171064 2.925839 00:11 \n",
	"trying 125, lo:125,hi:126\n",
	"△Used Peaked MB: 142 1,804 (bs:125)\n",
	"･ RAM: △Consumed △Peaked Used Total \| Exec time 0:03:38.286\n",
	"･ CPU: 0 3 2,386 MB \|\n",
	"･ GPU: 9,794 1,804 10,387 MB \|\n",
	"\n",
	"IPyExperimentsPytorch: Finishing\n",
	"\n",
	"*** Experiment finished in 00:03:38 (elapsed wallclock time)\n",
	"\n",
	"*** Newly defined local variables:\n",
	"Deleted: data, learn, x1\n",
	"Kept: max_bs, tested_bs_data\n",
	"\n",
	"*** Circular ref objects gc collected during the experiment:\n",
	"cleared 55 objects (only temporary leakage)\n",
	"\n",
	"*** Experiment memory:\n",
	"RAM: Consumed Reclaimed\n",
	"CPU: 89 89 MB ( 99.74%)\n",
	"GPU: 9,794 9,794 MB (100.00%)\n",
	"\n",
	"*** Current state:\n",
	"RAM: Used Free Total Util\n",
	"CPU: 2,297 109,660 128,856 MB 1.78% \n",
	"GPU: 593 11,601 12,194 MB 4.86% \n",
	"\n",
	"\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"22"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"exp = IPyExperimentsPytorch()\n",
	"x1 = gpu_mem_leave_free_mbs(max_mem)\n",
	"data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
	" ).normalize(imagenet_stats)\n",
	"learn = create_cnn(data, arch)\n",
	"learn.unfreeze()\n",
	"with progress_diabled(learn) as learn:\n",
	" max_bs,tested_bs_data = do_the_magic(learn,max_bs=4000,verbose=True)\n",
	"\n",
	"exp.keep_var_names('max_bs','tested_bs_data')\n",
	"del exp\n",
	"gc.collect()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"*** Experiment started with the Pytorch backend\n",
	"Device: ID 0, TITAN Xp (12194 RAM)\n",
	"\n",
	"\n",
	"*** Current state:\n",
	"RAM: Used Free Total Util\n",
	"CPU: 2,297 109,660 128,856 MB 1.78% \n",
	"GPU: 593 11,601 12,194 MB 4.86% \n",
	"\n",
	"\n",
	"epoch train_loss valid_loss time \n",
	"1 4.184824 5.174956 00:01 \n",
	"epoch train_loss valid_loss time \n",
	"1 4.089300 3.800783 00:28 \n",
	"trying 800, lo:800,hi:800\n",
	"△Used Peaked MB: 142 1,686 (bs:800)\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"6"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"･ RAM: △Consumed △Peaked Used Total \| Exec time 0:00:37.295\n",
	"･ CPU: 0 114 2,388 MB \|\n",
	"･ GPU: 9,794 1,686 10,387 MB \|\n",
	"\n",
	"IPyExperimentsPytorch: Finishing\n",
	"\n",
	"*** Experiment finished in 00:00:37 (elapsed wallclock time)\n",
	"\n",
	"*** Newly defined local variables:\n",
	"Deleted: data, learn, x1\n",
	"Kept: max_bs, tested_bs_data\n",
	"\n",
	"*** Circular ref objects gc collected during the experiment:\n",
	"cleared 55 objects (only temporary leakage)\n",
	"\n",
	"*** Experiment memory:\n",
	"RAM: Consumed Reclaimed\n",
	"CPU: 90 90 MB ( 99.23%)\n",
	"GPU: 9,794 9,794 MB (100.00%)\n"
	]
	}
	],
	"source": [
	"exp = IPyExperimentsPytorch()\n",
	"x1 = gpu_mem_leave_free_mbs(max_mem)\n",
	"data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=img_size, bs=bs\n",
	" ).normalize(imagenet_stats)\n",
	"learn = create_cnn(data, arch)\n",
	"learn.unfreeze()\n",
	"with progress_diabled(learn) as learn:\n",
	" max_bs,tested_bs_data = do_the_magic(learn,max_bs=800,verbose=True)\n",
	"\n",
	"exp.keep_var_names('max_bs','tested_bs_data')\n",
	"del exp\n",
	"gc.collect()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3.7 fasta.ai1 DEV",
	"language": "python",
	"name": "fastai1_dev"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.1"
	},
	"varInspector": {
	"cols": {
	"lenName": 16,
	"lenType": 16,
	"lenVar": 40
	},
	"kernels_config": {
	"python": {
	"delete_cmd_postfix": "",
	"delete_cmd_prefix": "del ",
	"library": "var_list.py",
	"varRefreshCmd": "print(var_dic_list())"
	},
	"r": {
	"delete_cmd_postfix": ") ",
	"delete_cmd_prefix": "rm(",
	"library": "var_list.r",
	"varRefreshCmd": "cat(var_dic_list()) "
	}
	},
	"types_to_exclude": [
	"module",
	"function",
	"builtin_function_or_method",
	"instance",
	"_Feature"
	],
	"window_display": false
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}