Skip to content

Instantly share code, notes, and snippets.

@rphes
Created June 2, 2018 13:18
Show Gist options
  • Save rphes/48569eb0c929d33deef18c9de0d96aa8 to your computer and use it in GitHub Desktop.
Save rphes/48569eb0c929d33deef18c9de0d96aa8 to your computer and use it in GitHub Desktop.
Parallel K-Modes and K-Prototypes
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from kmodes.kmodes import KModes\n",
"from kmodes.kprototypes import KPrototypes"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data = ...\n",
"cat_cols = data.x_data.select_dtypes('category').columns\n",
"cat_index = [data.x_data.columns.get_loc(column)\n",
" for column, dtype in data.x_data.dtypes.items()\n",
" if pd.api.types.is_categorical_dtype(dtype)]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished with 1 jobs in 14.94139575958252 s.\n",
"Finished with 2 jobs in 7.5124900341033936 s.\n",
"Finished with 3 jobs in 5.480122327804565 s.\n",
"Finished with 4 jobs in 4.830286026000977 s.\n",
"Finished with 5 jobs in 5.453425884246826 s.\n",
"Finished with 6 jobs in 5.0629661083221436 s.\n",
"Finished with 7 jobs in 5.260700702667236 s.\n",
"Finished with 8 jobs in 6.280949831008911 s.\n"
]
}
],
"source": [
"labels = []\n",
"for n_jobs in range(1, 9):\n",
" np.random.seed(42)\n",
" kproto = KPrototypes(20, n_jobs=n_jobs, random_state=42)\n",
" start = time.time()\n",
" kproto.fit(data.x_data.sample(n=1000, random_state=42), data.x_data, categorical=cat_index)\n",
" print(\"Finished with {} jobs in {} s.\".format(n_jobs, time.time() - start))\n",
" labels.append(kproto.labels_)\n",
" \n",
" if n_jobs > 1:\n",
" assert np.all(np.equal(labels[n_jobs-2], labels[n_jobs-1]))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished with 1 jobs in 22.188632011413574 s.\n",
"Finished with 2 jobs in 26.93237280845642 s.\n",
"Finished with 3 jobs in 26.965989112854004 s.\n",
"Finished with 4 jobs in 26.93553900718689 s.\n",
"Finished with 5 jobs in 27.35899806022644 s.\n",
"Finished with 6 jobs in 28.72810435295105 s.\n",
"Finished with 7 jobs in 27.51678490638733 s.\n",
"Finished with 8 jobs in 27.7084379196167 s.\n"
]
}
],
"source": [
"labels = []\n",
"for n_jobs in range(1, 9):\n",
" np.random.seed(42)\n",
" kmodes = KModes(20, n_jobs=n_jobs, random_state=42)\n",
" start = time.time()\n",
" kmodes.fit(data.x_data[cat_cols].sample(n=100000, random_state=42), data.x_data)\n",
" print(\"Finished with {} jobs in {} s.\".format(n_jobs, time.time() - start))\n",
" labels.append(kmodes.labels_)\n",
" \n",
" if n_jobs > 1:\n",
" assert np.all(np.equal(labels[n_jobs-2], labels[n_jobs-1]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@ramakrv
Copy link

ramakrv commented May 8, 2019

Thanks @rphes for the parallel execution in Kprototypes

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment