Skip to content

Instantly share code, notes, and snippets.

@angadbajwa23
Last active September 28, 2022 18:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save angadbajwa23/595c05a361077dab3b878a15a691c5d6 to your computer and use it in GitHub Desktop.
Save angadbajwa23/595c05a361077dab3b878a15a691c5d6 to your computer and use it in GitHub Desktop.
Data_Fetching
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "C8ygXqoQdq_H",
"outputId": "3be12076-eb35-4305-f06f-ede8656f22fa"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
]
}
],
"source": [
"from google.colab import drive\n",
"drive.mount('/content/gdrive')"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "CVFJ4U2GduEI",
"outputId": "0e7a79f5-fba3-49a0-bb98-ec191f9a4d42"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/content/gdrive/My Drive/2d\n"
]
}
],
"source": [
"%cd gdrive/My Drive/2d"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "nbAf6JWerVr6"
},
"source": [
"### Downloading the dataset from [figshare.com/articles/brain_tumor_dataset/1512427](figshare.com/articles/brain_tumor_dataset/1512427)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 202
},
"colab_type": "code",
"id": "Cmz08fU3eAFQ",
"outputId": "2ba55caf-72e6-446c-a25a-d31b66f7db5b"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2020-05-18 06:28:44-- https://ndownloader.figshare.com/articles/1512427/versions/5\n",
"Resolving ndownloader.figshare.com (ndownloader.figshare.com)... 34.251.18.86, 34.240.222.171, 34.252.153.30, ...\n",
"Connecting to ndownloader.figshare.com (ndownloader.figshare.com)|34.251.18.86|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 879501695 (839M) [application/zip]\n",
"Saving to: ‘5’\n",
"\n",
"5 100%[===================>] 838.76M 23.7MB/s in 38s \n",
"\n",
"2020-05-18 06:29:22 (22.2 MB/s) - ‘5’ saved [879501695/879501695]\n",
"\n"
]
}
],
"source": [
"!wget https://ndownloader.figshare.com/articles/1512427/versions/5"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 134
},
"colab_type": "code",
"id": "MbL09Q9neF9i",
"outputId": "31dac3f4-841a-4061-ea93-e75147e6a723"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: 5\n",
" extracting: brainTumorDataPublic_1-766.zip \n",
" extracting: brainTumorDataPublic_1533-2298.zip \n",
" extracting: brainTumorDataPublic_767-1532.zip \n",
" extracting: brainTumorDataPublic_2299-3064.zip \n",
" extracting: cvind.mat \n",
" extracting: README.txt \n"
]
}
],
"source": [
"!unzip 5 && rm 5"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "eVF7_xNbeJFh"
},
"outputs": [],
"source": [
"!cat brainTumorDataPublic_* > brainTumorDataPublic_temp.zip\n",
"!zip -FF brainTumorDataPublic_temp.zip --out data.zip"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Gh7oUYHMeMWU"
},
"outputs": [],
"source": [
"!rm brainTumorDataPublic_*"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "8tcBi2kHeT2I"
},
"outputs": [],
"source": [
"!unzip data.zip -d data && rm data.zip"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "K0IcaIDeeUVn",
"outputId": "74e02ae0-6a71-4ffe-fd11-13bf851b5432"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3064\n"
]
}
],
"source": [
"!ls data | wc -l"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 151
},
"colab_type": "code",
"id": "BAMRX6SveW-0",
"outputId": "5bf6ec54-97e0-40a8-8a4a-5294f40f5c88"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting hdf5storage\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/79/e0/5dd25068a231cd817265529368aca2f918049b290dcb2fd9b24ce136adf4/hdf5storage-0.1.15-py2.py3-none-any.whl (56kB)\n",
"\r",
"\u001b[K |█████▊ | 10kB 15.8MB/s eta 0:00:01\r",
"\u001b[K |███████████▌ | 20kB 1.7MB/s eta 0:00:01\r",
"\u001b[K |█████████████████▎ | 30kB 2.3MB/s eta 0:00:01\r",
"\u001b[K |███████████████████████ | 40kB 1.6MB/s eta 0:00:01\r",
"\u001b[K |████████████████████████████▉ | 51kB 2.0MB/s eta 0:00:01\r",
"\u001b[K |████████████████████████████████| 61kB 1.7MB/s \n",
"\u001b[?25hRequirement already satisfied: numpy; python_version >= \"3.4\" in /usr/local/lib/python3.6/dist-packages (from hdf5storage) (1.18.4)\n",
"Requirement already satisfied: h5py>=2.1; python_version >= \"3.3\" in /usr/local/lib/python3.6/dist-packages (from hdf5storage) (2.10.0)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py>=2.1; python_version >= \"3.3\"->hdf5storage) (1.12.0)\n",
"Installing collected packages: hdf5storage\n",
"Successfully installed hdf5storage-0.1.15\n"
]
}
],
"source": [
"!pip install hdf5storage"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "HEdvXiYasKEC"
},
"source": [
"### Creating images, labels and masks numpy arrays "
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 101
},
"colab_type": "code",
"id": "ImwdQyMGeZAA",
"outputId": "e7346546-d58e-43b0-a968-06aeab112f5f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[3064/3064] images loaded: 100.0 %\n",
"labels: (3064,)\n",
"images: (3064, 512, 512)\n",
"masks: (3064, 512, 512)\n",
"labels.npy, images.npy, masks.npy saved in /content/gdrive/My Drive/2d/\n"
]
}
],
"source": [
"import os\n",
"import argparse\n",
"import sys\n",
"import numpy as np\n",
"import hdf5storage\n",
"import cv2\n",
"import matplotlib.pyplot as plt\n",
"\n",
"class NoDataFound(Exception):\n",
" pass\n",
"\n",
"\n",
"def dir_path(path):\n",
" \"\"\"Check the path and the existence of a data directory\"\"\"\n",
" # replace '\\' in path for Windows users\n",
" path = path.replace('\\\\', '/')\n",
" data_path = os.path.join(path, 'data').replace('\\\\', '/')\n",
"\n",
" if os.path.isdir(data_path):\n",
" return path\n",
" elif os.path.isdir(path):\n",
" raise NoDataFound('Could not find a \"data\" folder inside directory. {} does not exist.'\n",
" .format(data_path))\n",
" else:\n",
" raise NotADirectoryError(path)\n",
"\n",
"path='/content/gdrive/My Drive/2d/'\n",
"parser = argparse.ArgumentParser()\n",
"parser.add_argument('path', help='path to the brain_tumor_dataset directory', type=dir_path)\n",
"parser.add_argument('--image-dimension', '-d', default=512, help='dimension of the image', type=int)\n",
"args = parser.parse_args(args=[path])\n",
"\n",
"labels = []\n",
"images = []\n",
"masks = []\n",
"\n",
"data_dir = os.path.join(args.path, 'data').replace('\\\\', '/')\n",
"files = os.listdir(data_dir)\n",
"for i, file in enumerate(files, start=1):\n",
" if i % 10 == 0:\n",
" # print the percentage of images loaded\n",
" sys.stdout.write('\\r[{}/{}] images loaded: {:.1f} %'\n",
" .format(i, len(files), i / float(len(files)) * 100))\n",
" sys.stdout.flush()\n",
"\n",
" # load matlab file with hdf5storage as scipy.io.loadmat does not support v7.3 files\n",
" mat_file = hdf5storage.loadmat(os.path.join(data_dir, file))['cjdata'][0]\n",
"\n",
" # resize image and mask to a unique size\n",
" image = cv2.resize(mat_file[2], dsize=(args.image_dimension, args.image_dimension),\n",
" interpolation=cv2.INTER_CUBIC)\n",
" mask = cv2.resize(mat_file[4].astype('uint8'), dsize=(args.image_dimension, args.image_dimension),\n",
" interpolation=cv2.INTER_CUBIC)\n",
"\n",
" labels.append(int(mat_file[0]))\n",
" images.append(image)\n",
" masks.append(mask.astype(bool))\n",
"\n",
"sys.stdout.write('\\r[{}/{}] images loaded: {:.1f} %'\n",
" .format(i, len(files), i / float(len(files)) * 100))\n",
"sys.stdout.flush()\n",
"\n",
"labels = np.array(labels)\n",
"images = np.array(images)\n",
"masks = np.array(masks)\n",
"\n",
"print('\\nlabels:', labels.shape)\n",
"print('images:', images.shape)\n",
"print('masks:', masks.shape)\n",
"\n",
"np.save(os.path.join(args.path, 'labels.npy'), labels)\n",
"np.save(os.path.join(args.path, 'images.npy'), images)\n",
"np.save(os.path.join(args.path, 'masks.npy'), masks)\n",
"\n",
"print('labels.npy, images.npy, masks.npy saved in', args.path)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "g5nRl-jvefUt"
},
"outputs": [],
"source": [
"integer_to_class = {'1': 'meningioma (1)', '2': 'glioma (2)', '3': 'pituitary tumor (3)'}"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 67
},
"colab_type": "code",
"id": "4bYzy46Tekrd",
"outputId": "c2196313-227d-4689-c682-3e9e2d829424"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(3064,)\n",
"(3064, 512, 512)\n",
"(3064, 512, 512)\n"
]
}
],
"source": [
"labels = np.load('labels.npy')\n",
"images = np.load('images.npy')\n",
"masks = np.load('masks.npy')\n",
"\n",
"print(labels.shape)\n",
"print(images.shape)\n",
"print(masks.shape)"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Data_exploration",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment