DanielaLaura/k-medoids.ipynb

## k-medoids.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "column_names=['class','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium',\n",
    "              'Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins',\n",
    "              'Color intensity','Hue','diluted wines','Proline']\n",
    "\n",
    "wine = pd.read_csv('wine.csv',names=column_names)\n",
    "    \n",
    "wine_class = wine['class']\n",
    "del wine['class']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scaler = MinMaxScaler()\n",
    "data = pd.DataFrame(scaler.fit_transform(wine), columns=wine.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "m = data.shape[0]\n",
    "n = data.shape[1]\n",
    "n_iter = 50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 258,
   "metadata": {},
   "outputs": [],
   "source": [
    "K= 3\n",
    "import random\n",
    "#create an empty centroid array\n",
    "centroids = np.array([]).reshape(n,0)\n",
    "#create 5 random centroids\n",
    "for k in range(K):\n",
    "    centroids = np.c_[centroids, data.iloc[random.randint(0,m-1)]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 259,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.61315789, 0.43684211, 0.75526316],\n",
       "       [0.35968379, 0.15612648, 0.18577075],\n",
       "       [0.52941176, 0.48128342, 0.40641711],\n",
       "       [0.48453608, 0.52061856, 0.27835052],\n",
       "       [0.20652174, 0.10869565, 0.33695652],\n",
       "       [0.14482759, 0.13793103, 0.73103448],\n",
       "       [0.03375527, 0.23628692, 0.64345992],\n",
       "       [0.45283019, 0.8490566 , 0.1509434 ],\n",
       "       [0.07255521, 0.38170347, 0.54574132],\n",
       "       [0.36860068, 0.15102389, 0.4112628 ],\n",
       "       [0.17886179, 0.3902439 , 0.3495935 ],\n",
       "       [0.43956044, 0.28937729, 0.75457875],\n",
       "       [0.35805991, 0.15477889, 0.5042796 ]])"
      ]
     },
     "execution_count": 259,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "centroids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 260,
   "metadata": {},
   "outputs": [],
   "source": [
    "def initMedoids( X,k):\n",
    "        ''' \n",
    "        Parameters\n",
    "        ----------\n",
    "        X: input data. \n",
    "        '''\n",
    "        medoids = []\n",
    "       \n",
    "        #Starting medoids will be random members from data set X\n",
    "        indexes = np.random.randint(0, len(X)-1,k)\n",
    "        medoids = X.iloc[indexes]\n",
    "        return medoids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 261,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Alcohol</th>\n",
       "      <th>Malic acid</th>\n",
       "      <th>Ash</th>\n",
       "      <th>Alcalinity of ash</th>\n",
       "      <th>Magnesium</th>\n",
       "      <th>Total phenols</th>\n",
       "      <th>Flavanoids</th>\n",
       "      <th>Nonflavanoid phenols</th>\n",
       "      <th>Proanthocyanins</th>\n",
       "      <th>Color intensity</th>\n",
       "      <th>Hue</th>\n",
       "      <th>diluted wines</th>\n",
       "      <th>Proline</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>0.671053</td>\n",
       "      <td>0.181818</td>\n",
       "      <td>0.534759</td>\n",
       "      <td>0.438144</td>\n",
       "      <td>0.391304</td>\n",
       "      <td>0.648276</td>\n",
       "      <td>0.601266</td>\n",
       "      <td>0.169811</td>\n",
       "      <td>0.485804</td>\n",
       "      <td>0.479522</td>\n",
       "      <td>0.495935</td>\n",
       "      <td>0.589744</td>\n",
       "      <td>0.882311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>0.321053</td>\n",
       "      <td>0.195652</td>\n",
       "      <td>0.406417</td>\n",
       "      <td>0.432990</td>\n",
       "      <td>0.108696</td>\n",
       "      <td>0.231034</td>\n",
       "      <td>0.356540</td>\n",
       "      <td>0.452830</td>\n",
       "      <td>0.384858</td>\n",
       "      <td>0.180887</td>\n",
       "      <td>0.422764</td>\n",
       "      <td>0.695971</td>\n",
       "      <td>0.165478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>0.755263</td>\n",
       "      <td>0.185771</td>\n",
       "      <td>0.406417</td>\n",
       "      <td>0.278351</td>\n",
       "      <td>0.336957</td>\n",
       "      <td>0.731034</td>\n",
       "      <td>0.643460</td>\n",
       "      <td>0.150943</td>\n",
       "      <td>0.545741</td>\n",
       "      <td>0.411263</td>\n",
       "      <td>0.349593</td>\n",
       "      <td>0.754579</td>\n",
       "      <td>0.504280</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Alcohol  Malic acid       Ash  Alcalinity of ash  Magnesium  \\\n",
       "31   0.671053    0.181818  0.534759           0.438144   0.391304   \n",
       "106  0.321053    0.195652  0.406417           0.432990   0.108696   \n",
       "47   0.755263    0.185771  0.406417           0.278351   0.336957   \n",
       "\n",
       "     Total phenols  Flavanoids  Nonflavanoid phenols  Proanthocyanins  \\\n",
       "31        0.648276    0.601266              0.169811         0.485804   \n",
       "106       0.231034    0.356540              0.452830         0.384858   \n",
       "47        0.731034    0.643460              0.150943         0.545741   \n",
       "\n",
       "     Color intensity       Hue  diluted wines   Proline  \n",
       "31          0.479522  0.495935       0.589744  0.882311  \n",
       "106         0.180887  0.422764       0.695971  0.165478  \n",
       "47          0.411263  0.349593       0.754579  0.504280  "
      ]
     },
     "execution_count": 261,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pr = initMedoids( data,3)\n",
    "pr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 277,
   "metadata": {},
   "outputs": [],
   "source": [
    "#create an empty array\n",
    "euclid = np.array([]).reshape(m,0)\n",
    "#find distance betweeen centroids and each point\n",
    "for k in range(K):\n",
    "       dist=np.sum((data-centroids[:,k])**2,axis=1)\n",
    "       euclid=np.c_[euclid,dist]\n",
    "#store the minimum distance value computed\n",
    "labels=np.argmin(euclid,axis=1)+1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 263,
   "metadata": {},
   "outputs": [],
   "source": [
    "#euclid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 264,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_distance (X, medoids, k):\n",
    "#create an empty array\n",
    "    euclid = np.array([]).reshape(m,0)\n",
    "#find distance betweeen centroids and each point\n",
    "    for k in range(K):\n",
    "        dist=np.sum((X-medoids[:,k])**2,axis=1)\n",
    "        euclid=np.c_[euclid,dist]\n",
    "    #store the minimum distance value computed\n",
    "        labels=np.argmin(euclid,axis=1)+1\n",
    "    return labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 265,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
       "       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
       "       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 1, 3, 2, 2,\n",
       "       3, 1, 2, 3, 2, 3, 1, 3, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 3, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 3, 3, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 2, 2, 2, 2, 3,\n",
       "       3, 1, 2, 2, 2, 2, 2, 2, 1, 2, 3, 3, 2, 3, 3, 2, 2, 2, 2, 1, 1, 1,\n",
       "       1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1], dtype=int64)"
      ]
     },
     "execution_count": 265,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels = compute_distance (data,centroids, 3)\n",
    "labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 266,
   "metadata": {},
   "outputs": [],
   "source": [
    " def euclidian(param1, param2):\n",
    "        euclidian_distance = np.sqrt(np.sum((param1-param2)**2))\n",
    "        #euclidian_distance = np.sum(np.abs(param1-param2))\n",
    "        return euclidian_distance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 267,
   "metadata": {},
   "outputs": [],
   "source": [
    "def updateMedoids( X, labels):\n",
    "        '''\n",
    "        Parameters\n",
    "        ----------\n",
    "        labels: a list contains labels of data points\n",
    "        '''\n",
    "        #self.has_converged = True\n",
    "        \n",
    "        #Store data points to the current cluster they belong to\n",
    "        k=3\n",
    "        medoids_cost = np.sum(euclid)\n",
    "    \n",
    "        clusters = []\n",
    "        for i in range(0,k):\n",
    "            cluster = []\n",
    "            for j in range(len(X)):\n",
    "                if (labels[j] == i):\n",
    "                    cluster.append(X.loc[j])\n",
    "            clusters.append(cluster)\n",
    "        \n",
    "        #Calculate the new medoids\n",
    "        new_medoids = []\n",
    "        for i in range(0, k):\n",
    "            new_medoid = centroids[i]\n",
    "            old_medoids_cost = medoids_cost\n",
    "            for j in range(len(clusters[i])):\n",
    "                \n",
    "                #Cost of the current data points to be compared with the current optimal cost\n",
    "                cur_medoids_cost = 0\n",
    "                for dpoint_index in range(len(clusters[i])):\n",
    "                    cur_medoids_cost += np.sum(euclidian(clusters[i][j], clusters[i][dpoint_index]))\n",
    "                \n",
    "                #If current cost is less than current optimal cost,\n",
    "                #make the current data point new medoid of the cluster\n",
    "                if cur_medoids_cost < old_medoids_cost:\n",
    "                    new_medoid = clusters[i][j]\n",
    "                    old_medoids_cost = cur_medoids_cost\n",
    "            \n",
    "            #Now we have the optimal medoid of the current cluster\n",
    "            new_medoids.append(new_medoid)\n",
    "        return new_medoids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 268,
   "metadata": {},
   "outputs": [],
   "source": [
    "update = updateMedoids( data, labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 130,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 131,
	"metadata": {},
	"outputs": [],
	"source": [
	"column_names=['class','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium',\n",
	" 'Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins',\n",
	" 'Color intensity','Hue','diluted wines','Proline']\n",
	"\n",
	"wine = pd.read_csv('wine.csv',names=column_names)\n",
	" \n",
	"wine_class = wine['class']\n",
	"del wine['class']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 132,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.preprocessing import MinMaxScaler\n",
	"scaler = MinMaxScaler()\n",
	"data = pd.DataFrame(scaler.fit_transform(wine), columns=wine.columns)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 133,
	"metadata": {},
	"outputs": [],
	"source": [
	"m = data.shape[0]\n",
	"n = data.shape[1]\n",
	"n_iter = 50"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 258,
	"metadata": {},
	"outputs": [],
	"source": [
	"K= 3\n",
	"import random\n",
	"#create an empty centroid array\n",
	"centroids = np.array([]).reshape(n,0)\n",
	"#create 5 random centroids\n",
	"for k in range(K):\n",
	" centroids = np.c_[centroids, data.iloc[random.randint(0,m-1)]]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 259,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[0.61315789, 0.43684211, 0.75526316],\n",
	" [0.35968379, 0.15612648, 0.18577075],\n",
	" [0.52941176, 0.48128342, 0.40641711],\n",
	" [0.48453608, 0.52061856, 0.27835052],\n",
	" [0.20652174, 0.10869565, 0.33695652],\n",
	" [0.14482759, 0.13793103, 0.73103448],\n",
	" [0.03375527, 0.23628692, 0.64345992],\n",
	" [0.45283019, 0.8490566 , 0.1509434 ],\n",
	" [0.07255521, 0.38170347, 0.54574132],\n",
	" [0.36860068, 0.15102389, 0.4112628 ],\n",
	" [0.17886179, 0.3902439 , 0.3495935 ],\n",
	" [0.43956044, 0.28937729, 0.75457875],\n",
	" [0.35805991, 0.15477889, 0.5042796 ]])"
	]
	},
	"execution_count": 259,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"centroids"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": 260,
	"metadata": {},
	"outputs": [],
	"source": [
	"def initMedoids( X,k):\n",
	" ''' \n",
	" Parameters\n",
	" ----------\n",
	" X: input data. \n",
	" '''\n",
	" medoids = []\n",
	" \n",
	" #Starting medoids will be random members from data set X\n",
	" indexes = np.random.randint(0, len(X)-1,k)\n",
	" medoids = X.iloc[indexes]\n",
	" return medoids"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 261,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Alcohol</th>\n",
	" <th>Malic acid</th>\n",
	" <th>Ash</th>\n",
	" <th>Alcalinity of ash</th>\n",
	" <th>Magnesium</th>\n",
	" <th>Total phenols</th>\n",
	" <th>Flavanoids</th>\n",
	" <th>Nonflavanoid phenols</th>\n",
	" <th>Proanthocyanins</th>\n",
	" <th>Color intensity</th>\n",
	" <th>Hue</th>\n",
	" <th>diluted wines</th>\n",
	" <th>Proline</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>31</th>\n",
	" <td>0.671053</td>\n",
	" <td>0.181818</td>\n",
	" <td>0.534759</td>\n",
	" <td>0.438144</td>\n",
	" <td>0.391304</td>\n",
	" <td>0.648276</td>\n",
	" <td>0.601266</td>\n",
	" <td>0.169811</td>\n",
	" <td>0.485804</td>\n",
	" <td>0.479522</td>\n",
	" <td>0.495935</td>\n",
	" <td>0.589744</td>\n",
	" <td>0.882311</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>106</th>\n",
	" <td>0.321053</td>\n",
	" <td>0.195652</td>\n",
	" <td>0.406417</td>\n",
	" <td>0.432990</td>\n",
	" <td>0.108696</td>\n",
	" <td>0.231034</td>\n",
	" <td>0.356540</td>\n",
	" <td>0.452830</td>\n",
	" <td>0.384858</td>\n",
	" <td>0.180887</td>\n",
	" <td>0.422764</td>\n",
	" <td>0.695971</td>\n",
	" <td>0.165478</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>47</th>\n",
	" <td>0.755263</td>\n",
	" <td>0.185771</td>\n",
	" <td>0.406417</td>\n",
	" <td>0.278351</td>\n",
	" <td>0.336957</td>\n",
	" <td>0.731034</td>\n",
	" <td>0.643460</td>\n",
	" <td>0.150943</td>\n",
	" <td>0.545741</td>\n",
	" <td>0.411263</td>\n",
	" <td>0.349593</td>\n",
	" <td>0.754579</td>\n",
	" <td>0.504280</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n",
	"31 0.671053 0.181818 0.534759 0.438144 0.391304 \n",
	"106 0.321053 0.195652 0.406417 0.432990 0.108696 \n",
	"47 0.755263 0.185771 0.406417 0.278351 0.336957 \n",
	"\n",
	" Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins \\\n",
	"31 0.648276 0.601266 0.169811 0.485804 \n",
	"106 0.231034 0.356540 0.452830 0.384858 \n",
	"47 0.731034 0.643460 0.150943 0.545741 \n",
	"\n",
	" Color intensity Hue diluted wines Proline \n",
	"31 0.479522 0.495935 0.589744 0.882311 \n",
	"106 0.180887 0.422764 0.695971 0.165478 \n",
	"47 0.411263 0.349593 0.754579 0.504280 "
	]
	},
	"execution_count": 261,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"pr = initMedoids( data,3)\n",
	"pr"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 277,
	"metadata": {},
	"outputs": [],
	"source": [
	"#create an empty array\n",
	"euclid = np.array([]).reshape(m,0)\n",
	"#find distance betweeen centroids and each point\n",
	"for k in range(K):\n",
	" dist=np.sum((data-centroids[:,k])**2,axis=1)\n",
	" euclid=np.c_[euclid,dist]\n",
	"#store the minimum distance value computed\n",
	"labels=np.argmin(euclid,axis=1)+1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 263,
	"metadata": {},
	"outputs": [],
	"source": [
	"#euclid"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 264,
	"metadata": {},
	"outputs": [],
	"source": [
	"def compute_distance (X, medoids, k):\n",
	"#create an empty array\n",
	" euclid = np.array([]).reshape(m,0)\n",
	"#find distance betweeen centroids and each point\n",
	" for k in range(K):\n",
	" dist=np.sum((X-medoids[:,k])**2,axis=1)\n",
	" euclid=np.c_[euclid,dist]\n",
	" #store the minimum distance value computed\n",
	" labels=np.argmin(euclid,axis=1)+1\n",
	" return labels"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 265,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
	" 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
	" 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 1, 3, 2, 2,\n",
	" 3, 1, 2, 3, 2, 3, 1, 3, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 3, 2, 2, 2,\n",
	" 2, 2, 2, 2, 2, 3, 3, 3, 1, 3, 3, 2, 3, 2, 1, 2, 3, 2, 2, 2, 2, 3,\n",
	" 3, 1, 2, 2, 2, 2, 2, 2, 1, 2, 3, 3, 2, 3, 3, 2, 2, 2, 2, 1, 1, 1,\n",
	" 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
	" 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
	" 1, 1], dtype=int64)"
	]
	},
	"execution_count": 265,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"labels = compute_distance (data,centroids, 3)\n",
	"labels"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 266,
	"metadata": {},
	"outputs": [],
	"source": [
	" def euclidian(param1, param2):\n",
	" euclidian_distance = np.sqrt(np.sum((param1-param2)**2))\n",
	" #euclidian_distance = np.sum(np.abs(param1-param2))\n",
	" return euclidian_distance"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 267,
	"metadata": {},
	"outputs": [],
	"source": [
	"def updateMedoids( X, labels):\n",
	" '''\n",
	" Parameters\n",
	" ----------\n",
	" labels: a list contains labels of data points\n",
	" '''\n",
	" #self.has_converged = True\n",
	" \n",
	" #Store data points to the current cluster they belong to\n",
	" k=3\n",
	" medoids_cost = np.sum(euclid)\n",
	" \n",
	" clusters = []\n",
	" for i in range(0,k):\n",
	" cluster = []\n",
	" for j in range(len(X)):\n",
	" if (labels[j] == i):\n",
	" cluster.append(X.loc[j])\n",
	" clusters.append(cluster)\n",
	" \n",
	" #Calculate the new medoids\n",
	" new_medoids = []\n",
	" for i in range(0, k):\n",
	" new_medoid = centroids[i]\n",
	" old_medoids_cost = medoids_cost\n",
	" for j in range(len(clusters[i])):\n",
	" \n",
	" #Cost of the current data points to be compared with the current optimal cost\n",
	" cur_medoids_cost = 0\n",
	" for dpoint_index in range(len(clusters[i])):\n",
	" cur_medoids_cost += np.sum(euclidian(clusters[i][j], clusters[i][dpoint_index]))\n",
	" \n",
	" #If current cost is less than current optimal cost,\n",
	" #make the current data point new medoid of the cluster\n",
	" if cur_medoids_cost < old_medoids_cost:\n",
	" new_medoid = clusters[i][j]\n",
	" old_medoids_cost = cur_medoids_cost\n",
	" \n",
	" #Now we have the optimal medoid of the current cluster\n",
	" new_medoids.append(new_medoid)\n",
	" return new_medoids"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 268,
	"metadata": {},
	"outputs": [],
	"source": [
	"update = updateMedoids( data, labels)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 233,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}