Skip to content

Instantly share code, notes, and snippets.

@muratxs
Last active December 9, 2019 20:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save muratxs/4319d8e3dc096277deeb8a54be7b200d to your computer and use it in GitHub Desktop.
Save muratxs/4319d8e3dc096277deeb8a54be7b200d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cat and Dog Datasets Load & Preprocessing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Necessesary Libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import os\n",
"import random\n",
"import pickle"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data Files Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"DATADIR = \"Datasets\"\n",
"CATEGORIES = [\"Dog\", \"Cat\"]\n",
"IMG_SIZE = 75"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"train = []\n",
"\n",
"for category in CATEGORIES:\n",
" path = os.path.join(DATADIR, category)\n",
" class_num = CATEGORIES.index(category)\n",
" for img in os.listdir(path):\n",
" try:\n",
" img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)\n",
" new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))\n",
" train.append([new_array, class_num])\n",
" except Exception as e:\n",
" pass"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24946"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"random.shuffle(train)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(24946, 2)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train = np.array(train)\n",
"train.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"X = []\n",
"y = []"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"for feature, label in train:\n",
" X.append(feature)\n",
" y.append(label)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"X = np.array(X)\n",
"y = np.array(y)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"X = X.reshape(-1, IMG_SIZE, IMG_SIZE, 1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(24946, 75, 75, 1)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"pickle_out = open(\"X.pickle\", \"wb\")\n",
"pickle.dump(X, pickle_out)\n",
"pickle_out.close()\n",
"\n",
"pickle_out = open(\"y.pickle\", \"wb\")\n",
"pickle.dump(y, pickle_out)\n",
"pickle_out.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment