angadbajwa23/data_fetching.ipynb Secret

## data_fetching.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "C8ygXqoQdq_H",
    "outputId": "3be12076-eb35-4305-f06f-ede8656f22fa"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
     ]
    }
   ],
   "source": [
    "from google.colab import drive\n",
    "drive.mount('/content/gdrive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "CVFJ4U2GduEI",
    "outputId": "0e7a79f5-fba3-49a0-bb98-ec191f9a4d42"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/content/gdrive/My Drive/2d\n"
     ]
    }
   ],
   "source": [
    "%cd gdrive/My Drive/2d"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "nbAf6JWerVr6"
   },
   "source": [
    "### Downloading the dataset from [figshare.com/articles/brain_tumor_dataset/1512427](figshare.com/articles/brain_tumor_dataset/1512427)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 202
    },
    "colab_type": "code",
    "id": "Cmz08fU3eAFQ",
    "outputId": "2ba55caf-72e6-446c-a25a-d31b66f7db5b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2020-05-18 06:28:44--  https://ndownloader.figshare.com/articles/1512427/versions/5\n",
      "Resolving ndownloader.figshare.com (ndownloader.figshare.com)... 34.251.18.86, 34.240.222.171, 34.252.153.30, ...\n",
      "Connecting to ndownloader.figshare.com (ndownloader.figshare.com)|34.251.18.86|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 879501695 (839M) [application/zip]\n",
      "Saving to: ‘5’\n",
      "\n",
      "5                   100%[===================>] 838.76M  23.7MB/s    in 38s     \n",
      "\n",
      "2020-05-18 06:29:22 (22.2 MB/s) - ‘5’ saved [879501695/879501695]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!wget https://ndownloader.figshare.com/articles/1512427/versions/5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 134
    },
    "colab_type": "code",
    "id": "MbL09Q9neF9i",
    "outputId": "31dac3f4-841a-4061-ea93-e75147e6a723"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Archive:  5\n",
      " extracting: brainTumorDataPublic_1-766.zip  \n",
      " extracting: brainTumorDataPublic_1533-2298.zip  \n",
      " extracting: brainTumorDataPublic_767-1532.zip  \n",
      " extracting: brainTumorDataPublic_2299-3064.zip  \n",
      " extracting: cvind.mat               \n",
      " extracting: README.txt              \n"
     ]
    }
   ],
   "source": [
    "!unzip 5 && rm 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "eVF7_xNbeJFh"
   },
   "outputs": [],
   "source": [
    "!cat brainTumorDataPublic_* > brainTumorDataPublic_temp.zip\n",
    "!zip -FF brainTumorDataPublic_temp.zip --out data.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Gh7oUYHMeMWU"
   },
   "outputs": [],
   "source": [
    "!rm brainTumorDataPublic_*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "8tcBi2kHeT2I"
   },
   "outputs": [],
   "source": [
    "!unzip data.zip -d data && rm data.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "K0IcaIDeeUVn",
    "outputId": "74e02ae0-6a71-4ffe-fd11-13bf851b5432"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3064\n"
     ]
    }
   ],
   "source": [
    "!ls data | wc -l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 151
    },
    "colab_type": "code",
    "id": "BAMRX6SveW-0",
    "outputId": "5bf6ec54-97e0-40a8-8a4a-5294f40f5c88"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting hdf5storage\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/79/e0/5dd25068a231cd817265529368aca2f918049b290dcb2fd9b24ce136adf4/hdf5storage-0.1.15-py2.py3-none-any.whl (56kB)\n",
      "\r",
      "\u001b[K     |█████▊                          | 10kB 15.8MB/s eta 0:00:01\r",
      "\u001b[K     |███████████▌                    | 20kB 1.7MB/s eta 0:00:01\r",
      "\u001b[K     |█████████████████▎              | 30kB 2.3MB/s eta 0:00:01\r",
      "\u001b[K     |███████████████████████         | 40kB 1.6MB/s eta 0:00:01\r",
      "\u001b[K     |████████████████████████████▉   | 51kB 2.0MB/s eta 0:00:01\r",
      "\u001b[K     |████████████████████████████████| 61kB 1.7MB/s \n",
      "\u001b[?25hRequirement already satisfied: numpy; python_version >= \"3.4\" in /usr/local/lib/python3.6/dist-packages (from hdf5storage) (1.18.4)\n",
      "Requirement already satisfied: h5py>=2.1; python_version >= \"3.3\" in /usr/local/lib/python3.6/dist-packages (from hdf5storage) (2.10.0)\n",
      "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py>=2.1; python_version >= \"3.3\"->hdf5storage) (1.12.0)\n",
      "Installing collected packages: hdf5storage\n",
      "Successfully installed hdf5storage-0.1.15\n"
     ]
    }
   ],
   "source": [
    "!pip install hdf5storage"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "HEdvXiYasKEC"
   },
   "source": [
    "### Creating images, labels and masks numpy arrays "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 101
    },
    "colab_type": "code",
    "id": "ImwdQyMGeZAA",
    "outputId": "e7346546-d58e-43b0-a968-06aeab112f5f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3064/3064] images loaded: 100.0 %\n",
      "labels: (3064,)\n",
      "images: (3064, 512, 512)\n",
      "masks: (3064, 512, 512)\n",
      "labels.npy, images.npy, masks.npy saved in /content/gdrive/My Drive/2d/\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import argparse\n",
    "import sys\n",
    "import numpy as np\n",
    "import hdf5storage\n",
    "import cv2\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "class NoDataFound(Exception):\n",
    "    pass\n",
    "\n",
    "\n",
    "def dir_path(path):\n",
    "    \"\"\"Check the path and the existence of a data directory\"\"\"\n",
    "    # replace '\\' in path for Windows users\n",
    "    path = path.replace('\\\\', '/')\n",
    "    data_path = os.path.join(path, 'data').replace('\\\\', '/')\n",
    "\n",
    "    if os.path.isdir(data_path):\n",
    "        return path\n",
    "    elif os.path.isdir(path):\n",
    "        raise NoDataFound('Could not find a \"data\" folder inside directory. {} does not exist.'\n",
    "                          .format(data_path))\n",
    "    else:\n",
    "        raise NotADirectoryError(path)\n",
    "\n",
    "path='/content/gdrive/My Drive/2d/'\n",
    "parser = argparse.ArgumentParser()\n",
    "parser.add_argument('path', help='path to the brain_tumor_dataset directory', type=dir_path)\n",
    "parser.add_argument('--image-dimension', '-d', default=512, help='dimension of the image', type=int)\n",
    "args = parser.parse_args(args=[path])\n",
    "\n",
    "labels = []\n",
    "images = []\n",
    "masks = []\n",
    "\n",
    "data_dir = os.path.join(args.path, 'data').replace('\\\\', '/')\n",
    "files = os.listdir(data_dir)\n",
    "for i, file in enumerate(files, start=1):\n",
    "    if i % 10 == 0:\n",
    "        # print the percentage of images loaded\n",
    "        sys.stdout.write('\\r[{}/{}] images loaded: {:.1f} %'\n",
    "                         .format(i, len(files), i / float(len(files)) * 100))\n",
    "        sys.stdout.flush()\n",
    "\n",
    "    # load matlab file with hdf5storage as scipy.io.loadmat does not support v7.3 files\n",
    "    mat_file = hdf5storage.loadmat(os.path.join(data_dir, file))['cjdata'][0]\n",
    "\n",
    "    # resize image and mask to a unique size\n",
    "    image = cv2.resize(mat_file[2], dsize=(args.image_dimension, args.image_dimension),\n",
    "                       interpolation=cv2.INTER_CUBIC)\n",
    "    mask = cv2.resize(mat_file[4].astype('uint8'), dsize=(args.image_dimension, args.image_dimension),\n",
    "                      interpolation=cv2.INTER_CUBIC)\n",
    "\n",
    "    labels.append(int(mat_file[0]))\n",
    "    images.append(image)\n",
    "    masks.append(mask.astype(bool))\n",
    "\n",
    "sys.stdout.write('\\r[{}/{}] images loaded: {:.1f} %'\n",
    "                 .format(i, len(files), i / float(len(files)) * 100))\n",
    "sys.stdout.flush()\n",
    "\n",
    "labels = np.array(labels)\n",
    "images = np.array(images)\n",
    "masks = np.array(masks)\n",
    "\n",
    "print('\\nlabels:', labels.shape)\n",
    "print('images:', images.shape)\n",
    "print('masks:', masks.shape)\n",
    "\n",
    "np.save(os.path.join(args.path, 'labels.npy'), labels)\n",
    "np.save(os.path.join(args.path, 'images.npy'), images)\n",
    "np.save(os.path.join(args.path, 'masks.npy'), masks)\n",
    "\n",
    "print('labels.npy, images.npy, masks.npy saved in', args.path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "g5nRl-jvefUt"
   },
   "outputs": [],
   "source": [
    "integer_to_class = {'1': 'meningioma (1)', '2': 'glioma (2)', '3': 'pituitary tumor (3)'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 67
    },
    "colab_type": "code",
    "id": "4bYzy46Tekrd",
    "outputId": "c2196313-227d-4689-c682-3e9e2d829424"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3064,)\n",
      "(3064, 512, 512)\n",
      "(3064, 512, 512)\n"
     ]
    }
   ],
   "source": [
    "labels = np.load('labels.npy')\n",
    "images = np.load('images.npy')\n",
    "masks = np.load('masks.npy')\n",
    "\n",
    "print(labels.shape)\n",
    "print(images.shape)\n",
    "print(masks.shape)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "Data_exploration",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"colab_type": "code",
	"id": "C8ygXqoQdq_H",
	"outputId": "3be12076-eb35-4305-f06f-ede8656f22fa"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
	]
	}
	],
	"source": [
	"from google.colab import drive\n",
	"drive.mount('/content/gdrive')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"colab_type": "code",
	"id": "CVFJ4U2GduEI",
	"outputId": "0e7a79f5-fba3-49a0-bb98-ec191f9a4d42"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"/content/gdrive/My Drive/2d\n"
	]
	}
	],
	"source": [
	"%cd gdrive/My Drive/2d"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"colab_type": "text",
	"id": "nbAf6JWerVr6"
	},
	"source": [
	"### Downloading the dataset from [figshare.com/articles/brain_tumor_dataset/1512427](figshare.com/articles/brain_tumor_dataset/1512427)\n",
	"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 202
	},
	"colab_type": "code",
	"id": "Cmz08fU3eAFQ",
	"outputId": "2ba55caf-72e6-446c-a25a-d31b66f7db5b"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"--2020-05-18 06:28:44-- https://ndownloader.figshare.com/articles/1512427/versions/5\n",
	"Resolving ndownloader.figshare.com (ndownloader.figshare.com)... 34.251.18.86, 34.240.222.171, 34.252.153.30, ...\n",
	"Connecting to ndownloader.figshare.com (ndownloader.figshare.com)\|34.251.18.86\|:443... connected.\n",
	"HTTP request sent, awaiting response... 200 OK\n",
	"Length: 879501695 (839M) [application/zip]\n",
	"Saving to: ‘5’\n",
	"\n",
	"5 100%[===================>] 838.76M 23.7MB/s in 38s \n",
	"\n",
	"2020-05-18 06:29:22 (22.2 MB/s) - ‘5’ saved [879501695/879501695]\n",
	"\n"
	]
	}
	],
	"source": [
	"!wget https://ndownloader.figshare.com/articles/1512427/versions/5"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 134
	},
	"colab_type": "code",
	"id": "MbL09Q9neF9i",
	"outputId": "31dac3f4-841a-4061-ea93-e75147e6a723"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Archive: 5\n",
	" extracting: brainTumorDataPublic_1-766.zip \n",
	" extracting: brainTumorDataPublic_1533-2298.zip \n",
	" extracting: brainTumorDataPublic_767-1532.zip \n",
	" extracting: brainTumorDataPublic_2299-3064.zip \n",
	" extracting: cvind.mat \n",
	" extracting: README.txt \n"
	]
	}
	],
	"source": [
	"!unzip 5 && rm 5"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {},
	"colab_type": "code",
	"id": "eVF7_xNbeJFh"
	},
	"outputs": [],
	"source": [
	"!cat brainTumorDataPublic_* > brainTumorDataPublic_temp.zip\n",
	"!zip -FF brainTumorDataPublic_temp.zip --out data.zip"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {},
	"colab_type": "code",
	"id": "Gh7oUYHMeMWU"
	},
	"outputs": [],
	"source": [
	"!rm brainTumorDataPublic_*"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {},
	"colab_type": "code",
	"id": "8tcBi2kHeT2I"
	},
	"outputs": [],
	"source": [
	"!unzip data.zip -d data && rm data.zip"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"colab_type": "code",
	"id": "K0IcaIDeeUVn",
	"outputId": "74e02ae0-6a71-4ffe-fd11-13bf851b5432"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"3064\n"
	]
	}
	],
	"source": [
	"!ls data \| wc -l"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 151
	},
	"colab_type": "code",
	"id": "BAMRX6SveW-0",
	"outputId": "5bf6ec54-97e0-40a8-8a4a-5294f40f5c88"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Collecting hdf5storage\n",
	"\u001b[?25l Downloading https://files.pythonhosted.org/packages/79/e0/5dd25068a231cd817265529368aca2f918049b290dcb2fd9b24ce136adf4/hdf5storage-0.1.15-py2.py3-none-any.whl (56kB)\n",
	"\r",
	"\u001b[K \|█████▊ \| 10kB 15.8MB/s eta 0:00:01\r",
	"\u001b[K \|███████████▌ \| 20kB 1.7MB/s eta 0:00:01\r",
	"\u001b[K \|█████████████████▎ \| 30kB 2.3MB/s eta 0:00:01\r",
	"\u001b[K \|███████████████████████ \| 40kB 1.6MB/s eta 0:00:01\r",
	"\u001b[K \|████████████████████████████▉ \| 51kB 2.0MB/s eta 0:00:01\r",
	"\u001b[K \|████████████████████████████████\| 61kB 1.7MB/s \n",
	"\u001b[?25hRequirement already satisfied: numpy; python_version >= \"3.4\" in /usr/local/lib/python3.6/dist-packages (from hdf5storage) (1.18.4)\n",
	"Requirement already satisfied: h5py>=2.1; python_version >= \"3.3\" in /usr/local/lib/python3.6/dist-packages (from hdf5storage) (2.10.0)\n",
	"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py>=2.1; python_version >= \"3.3\"->hdf5storage) (1.12.0)\n",
	"Installing collected packages: hdf5storage\n",
	"Successfully installed hdf5storage-0.1.15\n"
	]
	}
	],
	"source": [
	"!pip install hdf5storage"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"colab_type": "text",
	"id": "HEdvXiYasKEC"
	},
	"source": [
	"### Creating images, labels and masks numpy arrays "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 101
	},
	"colab_type": "code",
	"id": "ImwdQyMGeZAA",
	"outputId": "e7346546-d58e-43b0-a968-06aeab112f5f"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[3064/3064] images loaded: 100.0 %\n",
	"labels: (3064,)\n",
	"images: (3064, 512, 512)\n",
	"masks: (3064, 512, 512)\n",
	"labels.npy, images.npy, masks.npy saved in /content/gdrive/My Drive/2d/\n"
	]
	}
	],
	"source": [
	"import os\n",
	"import argparse\n",
	"import sys\n",
	"import numpy as np\n",
	"import hdf5storage\n",
	"import cv2\n",
	"import matplotlib.pyplot as plt\n",
	"\n",
	"class NoDataFound(Exception):\n",
	" pass\n",
	"\n",
	"\n",
	"def dir_path(path):\n",
	" \"\"\"Check the path and the existence of a data directory\"\"\"\n",
	" # replace '\\' in path for Windows users\n",
	" path = path.replace('\\\\', '/')\n",
	" data_path = os.path.join(path, 'data').replace('\\\\', '/')\n",
	"\n",
	" if os.path.isdir(data_path):\n",
	" return path\n",
	" elif os.path.isdir(path):\n",
	" raise NoDataFound('Could not find a \"data\" folder inside directory. {} does not exist.'\n",
	" .format(data_path))\n",
	" else:\n",
	" raise NotADirectoryError(path)\n",
	"\n",
	"path='/content/gdrive/My Drive/2d/'\n",
	"parser = argparse.ArgumentParser()\n",
	"parser.add_argument('path', help='path to the brain_tumor_dataset directory', type=dir_path)\n",
	"parser.add_argument('--image-dimension', '-d', default=512, help='dimension of the image', type=int)\n",
	"args = parser.parse_args(args=[path])\n",
	"\n",
	"labels = []\n",
	"images = []\n",
	"masks = []\n",
	"\n",
	"data_dir = os.path.join(args.path, 'data').replace('\\\\', '/')\n",
	"files = os.listdir(data_dir)\n",
	"for i, file in enumerate(files, start=1):\n",
	" if i % 10 == 0:\n",
	" # print the percentage of images loaded\n",
	" sys.stdout.write('\\r[{}/{}] images loaded: {:.1f} %'\n",
	" .format(i, len(files), i / float(len(files)) * 100))\n",
	" sys.stdout.flush()\n",
	"\n",
	" # load matlab file with hdf5storage as scipy.io.loadmat does not support v7.3 files\n",
	" mat_file = hdf5storage.loadmat(os.path.join(data_dir, file))['cjdata'][0]\n",
	"\n",
	" # resize image and mask to a unique size\n",
	" image = cv2.resize(mat_file[2], dsize=(args.image_dimension, args.image_dimension),\n",
	" interpolation=cv2.INTER_CUBIC)\n",
	" mask = cv2.resize(mat_file[4].astype('uint8'), dsize=(args.image_dimension, args.image_dimension),\n",
	" interpolation=cv2.INTER_CUBIC)\n",
	"\n",
	" labels.append(int(mat_file[0]))\n",
	" images.append(image)\n",
	" masks.append(mask.astype(bool))\n",
	"\n",
	"sys.stdout.write('\\r[{}/{}] images loaded: {:.1f} %'\n",
	" .format(i, len(files), i / float(len(files)) * 100))\n",
	"sys.stdout.flush()\n",
	"\n",
	"labels = np.array(labels)\n",
	"images = np.array(images)\n",
	"masks = np.array(masks)\n",
	"\n",
	"print('\\nlabels:', labels.shape)\n",
	"print('images:', images.shape)\n",
	"print('masks:', masks.shape)\n",
	"\n",
	"np.save(os.path.join(args.path, 'labels.npy'), labels)\n",
	"np.save(os.path.join(args.path, 'images.npy'), images)\n",
	"np.save(os.path.join(args.path, 'masks.npy'), masks)\n",
	"\n",
	"print('labels.npy, images.npy, masks.npy saved in', args.path)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {},
	"colab_type": "code",
	"id": "g5nRl-jvefUt"
	},
	"outputs": [],
	"source": [
	"integer_to_class = {'1': 'meningioma (1)', '2': 'glioma (2)', '3': 'pituitary tumor (3)'}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 67
	},
	"colab_type": "code",
	"id": "4bYzy46Tekrd",
	"outputId": "c2196313-227d-4689-c682-3e9e2d829424"
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(3064,)\n",
	"(3064, 512, 512)\n",
	"(3064, 512, 512)\n"
	]
	}
	],
	"source": [
	"labels = np.load('labels.npy')\n",
	"images = np.load('images.npy')\n",
	"masks = np.load('masks.npy')\n",
	"\n",
	"print(labels.shape)\n",
	"print(images.shape)\n",
	"print(masks.shape)"
	]
	}
	],
	"metadata": {
	"colab": {
	"collapsed_sections": [],
	"name": "Data_exploration",
	"provenance": []
	},
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}