nogawanogawa/test.ipynb

## test.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "baf8897c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from lib.feature.src.phash import *\n",
    "from lib.feature.src.akaze import *\n",
    "from PIL import Image\n",
    "from os import listdir, path\n",
    "import numpy as np\n",
    "from pyclustering.cluster import gmeans,xmeans\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from sklearn.decomposition import PCA\n",
    "import itertools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6f0ca11a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def corpus_filenames(path):\n",
    "    \"\"\" pathの中のファイルをlistにして返す \"\"\"\n",
    "    labels = []  # *.txt\n",
    "    for y in listdir(path):\n",
    "        if not y.startswith('LICENSE'):\n",
    "            labels.append(y)\n",
    "    return labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c77b02c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = \"/app/images/Abyssinian_4.jpg\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "99508ddb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(get_hash(filename))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "33558b3a",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[199, 237, 191, ...,   6,   0,  24],\n",
       "       [192, 243,   1, ...,  10,  49,  32],\n",
       "       [224, 193,  15, ...,  99, 216,  62],\n",
       "       ...,\n",
       "       [  0, 238,   5, ..., 252,  15,   0],\n",
       "       [225, 240,  11, ..., 115, 254,  63],\n",
       "       [ 65,  73,  11, ..., 121, 255,  62]], dtype=uint8)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_akaze_featrue(filename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "33f834e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "PATH = \"/app/images\"\n",
    "b = None\n",
    "for filename in corpus_filenames(PATH):\n",
    "    filepath = path.join(PATH, filename)\n",
    "    try:\n",
    "        if b is None:\n",
    "            b = get_akaze_featrue(filepath)\n",
    "        else :\n",
    "            b = np.append(b, get_akaze_featrue(filepath), axis=0)\n",
    "    except:\n",
    "        pass\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6752b9fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 11, 178, 252, ...,   0, 252,  62],\n",
       "       [161,  70,   7, ...,  95,   0,  53],\n",
       "       [ 64,  13,   8, ..., 254,  10,  37],\n",
       "       ...,\n",
       "       [  8, 230,   3, ..., 255, 255,  63],\n",
       "       [117, 253,  79, ..., 143,   0,  48],\n",
       "       [ 65,  61, 236, ...,  65, 221,  60]], dtype=uint8)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b\n",
    "\n",
    "# akaze \n",
    "# kmeans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "dca24173",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2805623"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "276a1841",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.7/site-packages/numpy/core/_asarray.py:102: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
      "  return array(a, dtype, copy=False, order=order)\n"
     ]
    }
   ],
   "source": [
    "# X-means\n",
    "xmeans_instance = xmeans.xmeans(b).process()\n",
    "\n",
    "clusters = xmeans_instance.get_clusters()\n",
    "centers = xmeans_instance.get_centers()\n",
    "\n",
    "labels_size = len(\n",
    "    list(itertools.chain.from_iterable(clusters))\n",
    ")\n",
    "labels = np.zeros((1, labels_size))\n",
    "for n, n_th_cluster in np.ndenumerate(clusters):\n",
    "    for img_num in n_th_cluster:\n",
    "        labels[0][img_num] = n[0]\n",
    "labels = labels.ravel()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "07217d3d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min(labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "1e019069",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max(labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8966add",
   "metadata": {},
   "outputs": [],
   "source": [
    "# G-means\n",
    "\n",
    "gmeans_instance = gmeans.gmeans(b).process()\n",
    "\n",
    "clusters = gmeans_instance.get_clusters()\n",
    "centers = gmeans_instance.get_centers()\n",
    "\n",
    "labels_size = len(\n",
    "    list(itertools.chain.from_iterable(clusters))\n",
    ")\n",
    "labels = np.zeros((1, labels_size))\n",
    "for n, n_th_cluster in np.ndenumerate(clusters):\n",
    "    for img_num in n_th_cluster:\n",
    "        labels[0][img_num] = n[0]\n",
    "labels = labels.ravel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3f233a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "min(labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9da97557",
   "metadata": {},
   "outputs": [],
   "source": [
    "max(labels)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "baf8897c",
	"metadata": {},
	"outputs": [],
	"source": [
	"from lib.feature.src.phash import *\n",
	"from lib.feature.src.akaze import *\n",
	"from PIL import Image\n",
	"from os import listdir, path\n",
	"import numpy as np\n",
	"from pyclustering.cluster import gmeans,xmeans\n",
	"import plotly.express as px\n",
	"import plotly.graph_objects as go\n",
	"from sklearn.decomposition import PCA\n",
	"import itertools"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "6f0ca11a",
	"metadata": {},
	"outputs": [],
	"source": [
	"def corpus_filenames(path):\n",
	" \"\"\" pathの中のファイルをlistにして返す \"\"\"\n",
	" labels = [] # *.txt\n",
	" for y in listdir(path):\n",
	" if not y.startswith('LICENSE'):\n",
	" labels.append(y)\n",
	" return labels"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "c77b02c4",
	"metadata": {},
	"outputs": [],
	"source": [
	"filename = \"/app/images/Abyssinian_4.jpg\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "99508ddb",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"64"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"len(get_hash(filename))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "33558b3a",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[199, 237, 191, ..., 6, 0, 24],\n",
	" [192, 243, 1, ..., 10, 49, 32],\n",
	" [224, 193, 15, ..., 99, 216, 62],\n",
	" ...,\n",
	" [ 0, 238, 5, ..., 252, 15, 0],\n",
	" [225, 240, 11, ..., 115, 254, 63],\n",
	" [ 65, 73, 11, ..., 121, 255, 62]], dtype=uint8)"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"get_akaze_featrue(filename)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "33f834e5",
	"metadata": {},
	"outputs": [],
	"source": [
	"PATH = \"/app/images\"\n",
	"b = None\n",
	"for filename in corpus_filenames(PATH):\n",
	" filepath = path.join(PATH, filename)\n",
	" try:\n",
	" if b is None:\n",
	" b = get_akaze_featrue(filepath)\n",
	" else :\n",
	" b = np.append(b, get_akaze_featrue(filepath), axis=0)\n",
	" except:\n",
	" pass\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"id": "6752b9fa",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[ 11, 178, 252, ..., 0, 252, 62],\n",
	" [161, 70, 7, ..., 95, 0, 53],\n",
	" [ 64, 13, 8, ..., 254, 10, 37],\n",
	" ...,\n",
	" [ 8, 230, 3, ..., 255, 255, 63],\n",
	" [117, 253, 79, ..., 143, 0, 48],\n",
	" [ 65, 61, 236, ..., 65, 221, 60]], dtype=uint8)"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"b\n",
	"\n",
	"# akaze \n",
	"# kmeans"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"id": "dca24173",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"2805623"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"len(b)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"id": "276a1841",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.7/site-packages/numpy/core/_asarray.py:102: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
	" return array(a, dtype, copy=False, order=order)\n"
	]
	}
	],
	"source": [
	"# X-means\n",
	"xmeans_instance = xmeans.xmeans(b).process()\n",
	"\n",
	"clusters = xmeans_instance.get_clusters()\n",
	"centers = xmeans_instance.get_centers()\n",
	"\n",
	"labels_size = len(\n",
	" list(itertools.chain.from_iterable(clusters))\n",
	")\n",
	"labels = np.zeros((1, labels_size))\n",
	"for n, n_th_cluster in np.ndenumerate(clusters):\n",
	" for img_num in n_th_cluster:\n",
	" labels[0][img_num] = n[0]\n",
	"labels = labels.ravel()\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"id": "07217d3d",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.0"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"min(labels)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"id": "1e019069",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"19.0"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"max(labels)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "d8966add",
	"metadata": {},
	"outputs": [],
	"source": [
	"# G-means\n",
	"\n",
	"gmeans_instance = gmeans.gmeans(b).process()\n",
	"\n",
	"clusters = gmeans_instance.get_clusters()\n",
	"centers = gmeans_instance.get_centers()\n",
	"\n",
	"labels_size = len(\n",
	" list(itertools.chain.from_iterable(clusters))\n",
	")\n",
	"labels = np.zeros((1, labels_size))\n",
	"for n, n_th_cluster in np.ndenumerate(clusters):\n",
	" for img_num in n_th_cluster:\n",
	" labels[0][img_num] = n[0]\n",
	"labels = labels.ravel()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "b3f233a2",
	"metadata": {},
	"outputs": [],
	"source": [
	"min(labels)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "9da97557",
	"metadata": {},
	"outputs": [],
	"source": [
	"max(labels)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}