Skip to content

Instantly share code, notes, and snippets.

@egy1st
Created January 10, 2022 20:04
Show Gist options
  • Save egy1st/7f0ec689b9310c4e6187f37af88a6c50 to your computer and use it in GitHub Desktop.
Save egy1st/7f0ec689b9310c4e6187f37af88a6c50 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "zaaLaJHT35Fd"
},
"outputs": [],
"source": "import time\nimport os.path\nimport requests\nfrom numpy import genfromtxt\n!mkdir data #let us create data folder to hold our data"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "69XXeoif35Fn",
"scrolled": true
},
"outputs": [],
"source": "# install DenMune clustering algorithm using pip command from the offecial Python repository, PyPi\n# from https://pypi.org/project/denmune/\n!pip install denmune\n\n# now import it\nfrom denmune import DenMune"
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "H3H8DYwU35Fo"
},
"outputs": [],
"source": "dataset = 'iris' # let us take iris dataset as an example\n\nurl = \"https://zerobytes.one/denmune_data/\"\nfile_ext = \".txt\"\nground_ext = \"-gt\"\n\n\ndataset_url = url + dataset + file_ext\ngroundtruth_url = url + dataset + ground_ext + file_ext\n\ndata_path = 'data/' # change it to whatever you put your data, set it to ''; so it will retrive from current folder\ndata_file = data_path + dataset + file_ext # i.e. 'iris' + '.txt' ==> iris.txt\n\ndata_path = 'data/' # change it to whatever you put your data, set it to ''; so it will retrive from current folder\nif not os.path.isfile(data_path + dataset + file_ext):\n req = requests.get(dataset_url)\n with open(data_path + dataset + file_ext, 'wb') as f:\n f.write(req.content)\ndata = genfromtxt(data_file , delimiter='\\t') \n\nif not os.path.isfile(data_path + dataset + ground_ext + file_ext):\n req = requests.get(groundtruth_url)\n with open(data_path + dataset + ground_ext + file_ext, 'wb') as f:\n f.write(req.content) \ndata_labels = genfromtxt(groundtruth_url , delimiter='\\t') # i.e. 'iris' + + '-gt + '.txt' ==> iris-gt.txt \n\ndata2d_ext = '-2d'\nfile_2d = data_path + dataset + data2d_ext + file_ext # 'iris' + '-2d' + '.txt' ==> iris-2d.txt"
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "xm54UWO835Fq",
"outputId": "ebd47b77-4038-4415-b130-357e1e856ff0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "Dataset's Groundtruth\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 432x288 with 1 Axes>"
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "\n ==================================================================================================== \n\nDenMune Analyzer\n\u251c\u2500\u2500 exec_time\n\u2502 \u251c\u2500\u2500 DenMune: 0.014\n\u2502 \u251c\u2500\u2500 NGT: 0.002\n\u2502 \u2514\u2500\u2500 t_SNE: 0.594\n\u251c\u2500\u2500 n_clusters\n\u2502 \u251c\u2500\u2500 actual: 3\n\u2502 \u2514\u2500\u2500 detected: 3\n\u251c\u2500\u2500 n_points\n\u2502 \u251c\u2500\u2500 dim: 4\n\u2502 \u251c\u2500\u2500 noise\n\u2502 \u2502 \u251c\u2500\u2500 type-1: 0\n\u2502 \u2502 \u2514\u2500\u2500 type-2: 0\n\u2502 \u251c\u2500\u2500 size: 150\n\u2502 \u251c\u2500\u2500 strong: 86\n\u2502 \u2514\u2500\u2500 weak\n\u2502 \u251c\u2500\u2500 all: 64\n\u2502 \u251c\u2500\u2500 failed to merge: 0\n\u2502 \u2514\u2500\u2500 succeeded to merge: 64\n\u2514\u2500\u2500 validity\n \u251c\u2500\u2500 ACC: 135\n \u251c\u2500\u2500 AMI: 0.795\n \u251c\u2500\u2500 ARI: 0.746\n \u251c\u2500\u2500 F1: 0.898\n \u251c\u2500\u2500 NMI: 0.798\n \u251c\u2500\u2500 completeness: 0.809\n \u2514\u2500\u2500 homogeneity: 0.787\n\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 432x288 with 1 Axes>"
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "k= 11 F1 score is: 0.898\n"
}
],
"source": "# Denmune's Paramaters\nverpose_mode = True # view in-depth analysis of time complexity and outlier detection, num of clusters\nshow_groundtrugh = True # show plots on/off\nshow_noise = True # show noise and outlier on/off\n\nknn = 11\ndm = DenMune(data=data, file_2d=file_2d, k_nearest=knn, verpose=verpose_mode, show_noise=show_noise, rgn_tsne=False)\n\nif show_groundtrugh:\n # Let us plot the groundtruth of this dataset which is reduced to 2-d using t-SNE\n print (\"Dataset\\'s Groundtruth\")\n dm.plot_clusters(labels=data_labels, ground=True)\n print('\\n', \"=====\" * 20 , '\\n') \n\nlabels_pred = dm.fit_predict()\nvalidity = dm.validate_Clusters(labels_true=data_labels, labels_pred=labels_pred)\n\ndm.plot_clusters(labels=labels_pred, show_noise=show_noise)\n \nvalidity_key = \"F1\"\n# Acc=1, F1-score=2, NMI=3, AMI=4, ARI=5, Homogeneity=6, and Completeness=7 \nprint ('k=' , knn, validity_key , 'score is:', round(validity[validity_key],3))"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "vCu_H5vz-Oia"
},
"outputs": [],
"source": ""
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "iris_dataset.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.8",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment