Created
July 14, 2024 08:36
-
-
Save afvanwoudenberg/a08504a8bbf7aec30897fe106fa115c1 to your computer and use it in GitHub Desktop.
Organizing mobile apps using the Chamfer distance metric, hierarchical clustering, and a simulated annealing-based algorithm for finding Hamiltonian paths
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "341f12da-97bf-4ec1-ac37-bf0db5bed168", | |
"metadata": {}, | |
"source": [ | |
"# Phone Prettifier\n", | |
"\n", | |
"Aswin van Woudenberg (https://www.aswinvanwoudenberg.com | https://github.com/afvanwoudenberg)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "77bd9c92-174c-4059-99be-367760264017", | |
"metadata": {}, | |
"source": [ | |
"## Importing libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d072de94-5cf2-4a96-96c4-590c1d3188bc", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import glob\n", | |
"import random\n", | |
"import math\n", | |
"import matplotlib.pyplot as plt\n", | |
"import ipywidgets as widgets\n", | |
"\n", | |
"from PIL import Image, ImageDraw\n", | |
"from scipy.spatial import KDTree\n", | |
"from ipywidgets import interact, interact_manual\n", | |
"from scipy.cluster.hierarchy import linkage, dendrogram, fcluster\n", | |
"from matplotlib.offsetbox import OffsetImage, AnnotationBbox\n", | |
"from mpl_toolkits.mplot3d import Axes3D" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8284b1a7-5af6-40b5-a53a-c6394f3f0e12", | |
"metadata": {}, | |
"source": [ | |
"## Getting the icons\n", | |
"\n", | |
"Use the [Apk Analyzer](https://play.google.com/store/apps/details?id=sk.styk.martin.apkanalyzer) app (on Android) to save app icons. Next transfer them to your computer and place them in a folder called `app_icons`." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c53883f4-cf3a-4ab7-b407-2d0c7504e4d2", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"files = glob.glob(\"app_icons/*.jpeg\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "87af6b68-fc41-4c94-a26c-4c65501b8e12", | |
"metadata": {}, | |
"source": [ | |
"## Loading all images and removing the background" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "ddd46e1b-a5c9-45f3-9be7-ff4f6d687e5c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def create_mask(size):\n", | |
" # Create a mask image with a transparent (black) background\n", | |
" mask = Image.new(\"L\", size, 0)\n", | |
"\n", | |
" margin = int(size[0] * 0.03)\n", | |
" radius = int(size[0] * 0.37)\n", | |
" \n", | |
" # Draw a white rounded rectangle on the mask\n", | |
" draw = ImageDraw.Draw(mask)\n", | |
" draw.rounded_rectangle([(margin, margin), (size[0] - margin, size[1] - margin)], radius=radius, fill=255)\n", | |
"\n", | |
" return mask" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bee11fa9-5e48-483a-9b24-98c1f22d1dc5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def apply_mask(image, mask):\n", | |
" # Create a white background image\n", | |
" white_bg = Image.new(\"RGBA\", image.size, (255, 255, 255, 255))\n", | |
"\n", | |
" # Composite the images using the mask\n", | |
" result = Image.composite(image, white_bg, mask)\n", | |
" \n", | |
" return result.convert(\"RGB\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8b342034-8906-4d5b-a683-e748732efc54", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def remove_background(image):\n", | |
" mask = create_mask(image.size)\n", | |
" return apply_mask(image, mask)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c8745cd6-5491-4615-b190-8c3f851f7acd", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"size = (64, 64)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "4d64052c-6065-4c43-8f66-e5dbc9501050", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mask = create_mask((265,265)).resize(size)\n", | |
"mask" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c88c7078-da6b-41ff-84c1-92ed1c14c5a1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"images = [remove_background(Image.open(f)).resize(size) for f in files]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e1ea21cb-32d5-42ba-bd6b-5643c468a2e2", | |
"metadata": {}, | |
"source": [ | |
"## Plotting all images in a grid" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "92338447-6466-4fcc-8e44-5380e9cc670d", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"def plot_image_grid(images, ncols=None, title=''):\n", | |
" ncols = ncols or int(np.sqrt(len(images)))\n", | |
" nrows, ncols = (len(images) + ncols - 1) // ncols, ncols\n", | |
" imgs = images + [None] * (nrows * ncols - len(images))\n", | |
" fig, axes = plt.subplots(nrows, ncols, figsize=(ncols, nrows))\n", | |
" if title:\n", | |
" fig.suptitle(title, x=0.125, ha='left')\n", | |
" for img, ax in zip(imgs, axes.flatten()): \n", | |
" if img is not None:\n", | |
" ax.imshow(img)\n", | |
" ax.axis('off')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d032c0a0-fb44-4ddb-b23f-c0de2ada507f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plot_image_grid(images, 15)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0d1e6c38-1b19-48d1-9b4f-417ea70d9d41", | |
"metadata": {}, | |
"source": [ | |
"## The Chamfer distance" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "5625d241-c619-4f24-8792-b64d6d20992a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def chamfer_distance(a, b):\n", | |
" tree = KDTree(b)\n", | |
" dist_a = tree.query(a)[0]\n", | |
" tree = KDTree(a)\n", | |
" dist_b = tree.query(b)[0]\n", | |
" return np.mean(dist_a) + np.mean(dist_b)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "6a59af77-345e-461d-80d2-e4ac8f077de7", | |
"metadata": {}, | |
"source": [ | |
"## Comparing images pairwise" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f6c3e61b-ad80-4639-8ed2-13959dc54f64", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def plot_image_colors(image, ax=None, title=''):\n", | |
" boolean_mask = np.array(mask).astype(bool).flatten()\n", | |
" data = np.array(image).reshape((64*64, -1))[boolean_mask]\n", | |
"\n", | |
" R, G, B = data[:, 0], data[:, 1], data[:, 2]\n", | |
" colors = data / 255.0\n", | |
" \n", | |
" if not ax:\n", | |
" fig = plt.figure(figsize=(7, 6))\n", | |
" ax = fig.add_subplot(111, projection='3d')\n", | |
" else:\n", | |
" fig = ax.get_figure()\n", | |
" \n", | |
" scatter = ax.scatter(R, G, B, c=colors, s=100)\n", | |
" ax.set_xlabel('Red')\n", | |
" ax.set_ylabel('Green')\n", | |
" ax.set_zlabel('Blue', labelpad=0)\n", | |
" ax.zaxis.label.set_rotation(90)\n", | |
" ax.xaxis.set_ticks(np.arange(0, 256, 50))\n", | |
" ax.yaxis.set_ticks(np.arange(0, 256, 50))\n", | |
" ax.zaxis.set_ticks(np.arange(0, 256, 50))\n", | |
" ax.set_title(title)\n", | |
" \n", | |
" # Add inset axis for the image\n", | |
" inset_ax = fig.add_axes([ax.get_position().x0, ax.get_position().y1 - 0.15, 0.1, 0.1])\n", | |
" inset_ax.imshow(image)\n", | |
" inset_ax.axis('off')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0d6fa03e-8d52-4351-a4c0-2cfd95094d37", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Create a boolean mask\n", | |
"boolean_mask = np.array(mask).astype(bool).flatten()\n", | |
"\n", | |
"# The number of pixels\n", | |
"num_pixels = math.prod(size)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "ce959faa-b89b-451a-83ec-6c468343c84d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np_images_foreground = [np.array(image).reshape((num_pixels, -1))[boolean_mask] for image in images]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "1cab2e83-320e-42c0-8253-7c2cfa51eb3c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def plot_images_colors_side_by_side(image_num1, image_num2):\n", | |
" image1 = images[image_num1]\n", | |
" image2 = images[image_num2]\n", | |
" \n", | |
" fig = plt.figure(figsize=(14, 6))\n", | |
" \n", | |
" # First subplot\n", | |
" ax1 = fig.add_subplot(121, projection='3d')\n", | |
" plot_image_colors(image1, ax1, 'Image 1')\n", | |
" \n", | |
" # Second subplot\n", | |
" ax2 = fig.add_subplot(122, projection='3d')\n", | |
" plot_image_colors(image2, ax2, 'Image 2')\n", | |
"\n", | |
" fig.text(0.5, 0.02, f'Chamfer distance: {chamfer_distance(np_images_foreground[image_num1], np_images_foreground[image_num2])}', ha='center', va='center', fontsize=12)\n", | |
" \n", | |
" plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "0f0fd986-df03-4a01-a62d-28017ea07600", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"_ = interact(plot_images_colors_side_by_side, \n", | |
" image_num1=widgets.IntSlider(min=0, max=len(images) - 1, step=1, value=9, description=\"Image 1: \"), \n", | |
" image_num2=widgets.IntSlider(min=0, max=len(images) - 1, step=1, value=30, description=\"Image 2: \")\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "519e9dc1-9e82-4c1a-bee7-0ad139382f8e", | |
"metadata": {}, | |
"source": [ | |
"## Computing the distance matrix" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a5e22752-90b3-4610-9a51-88daf5a59c8e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Initialize the distance matrix\n", | |
"num_sets = len(np_images_foreground)\n", | |
"distance_matrix = np.zeros((num_sets, num_sets))\n", | |
"\n", | |
"# Populate the distance matrix\n", | |
"for i in range(num_sets):\n", | |
" for j in range(i + 1, num_sets):\n", | |
" distance = chamfer_distance(np_images_foreground[i], np_images_foreground[j])\n", | |
" distance_matrix[i, j] = distance\n", | |
" distance_matrix[j, i] = distance # Use symmetry" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "74f1cc80-a6a0-47d3-9dcf-0e81d10df3c0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"start, end = np.unravel_index(np.argmax(distance_matrix), distance_matrix.shape)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "56761289-4fa3-4ba1-92de-8e64a90a3f90", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plot_image_grid([images[start],images[end]], 2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "917daa46-0b97-4259-8c7f-7b73bd6a99ea", | |
"metadata": {}, | |
"source": [ | |
"## Finding the shortest Hamiltonian path" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "9fd9d19e-c291-4a3c-bb72-716c85345124", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def total_distance(path, distance_matrix):\n", | |
" return sum(distance_matrix[path[i], path[i + 1]] for i in range(len(path) - 1))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "15a8102d-22fc-4e5e-a633-262403fc3da3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def shortest_hamiltonian_path(distance_matrix, start, end, initial_temp, cooling_rate, stopping_temp, initial_path=None):\n", | |
" current_path = initial_path or list(range(distance_matrix.shape[0]))\n", | |
" current_path.remove(start)\n", | |
" if start != end:\n", | |
" current_path.remove(end)\n", | |
" current_path = [start] + current_path + [end]\n", | |
" \n", | |
" n = len(current_path)\n", | |
"\n", | |
" current_distance = total_distance(current_path, distance_matrix)\n", | |
" temperature = initial_temp\n", | |
" \n", | |
" while temperature > stopping_temp:\n", | |
" # Generate a neighbor by reversing a subtour (excluding start and end)\n", | |
" i, j = sorted(random.sample(range(1, n - 1), 2))\n", | |
" new_path = current_path[0:i] + current_path[i:j+1][::-1] + current_path[j+1:]\n", | |
" \n", | |
" new_distance = total_distance(new_path, distance_matrix)\n", | |
" \n", | |
" # Acceptance probability\n", | |
" if new_distance < current_distance or random.random() < np.exp((current_distance - new_distance) / temperature):\n", | |
" current_path, current_distance = new_path, new_distance\n", | |
" \n", | |
" temperature *= cooling_rate\n", | |
" \n", | |
" return current_path, current_distance" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b5e0fbf8-9d4b-43f7-a46d-73ed6052c2df", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"initial_temp = 400000\n", | |
"cooling_rate = 0.99995\n", | |
"stopping_temp = 1e-8\n", | |
"\n", | |
"path, distance = shortest_hamiltonian_path(distance_matrix, start, end, initial_temp, cooling_rate, stopping_temp)\n", | |
"print(\"Shortest Hamiltonian path:\", path)\n", | |
"print(\"Distance:\", distance)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "202a2d2c-78fa-4b36-89b9-9f84e93f9e53", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sorted_images = [images[i] for i in path]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "be2cac7a-da49-4bf3-9e85-094b09914b1a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plot_image_grid(sorted_images, 15)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "18536211-9b1f-4da2-aea5-8c34de2e4288", | |
"metadata": {}, | |
"source": [ | |
"## Grouping apps using hierarchical clustering" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "ba6bdfee-5a9b-41ce-8bbb-42571a33d3c9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Convert the distance matrix to a condensed distance matrix\n", | |
"# The condensed distance matrix contains the upper triangular part of the distance matrix\n", | |
"condensed_distance_matrix = distance_matrix[np.triu_indices_from(distance_matrix, k=1)]\n", | |
"\n", | |
"# Perform hierarchical clustering using the linkage method\n", | |
"Z = linkage(condensed_distance_matrix, method='ward')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "736eb871-52f8-4c3f-9d97-727c6807cf69", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def plot_dendrogram(max_d=0):\n", | |
" fig, ax = plt.subplots(figsize=(18, 6))\n", | |
" dendrogram(Z, ax=ax)\n", | |
" ax.set_title('Hierarchical Clustering Dendrogram')\n", | |
" ax.set_ylabel('Distance')\n", | |
"\n", | |
" if max_d:\n", | |
" ax.axhline(y=max_d, linestyle='--', color='grey')\n", | |
" \n", | |
" tick_labels = ax.xaxis.get_ticklabels()\n", | |
" for i in range(len(images)):\n", | |
" ib = OffsetImage(images[int(tick_labels[i].get_text())], zoom=.18)\n", | |
" ib.image.axes = ax\n", | |
" ab = AnnotationBbox(ib, tick_labels[i].get_position(), frameon=False, box_alignment=(0.5, 1.2))\n", | |
" ax.add_artist(ab)\n", | |
" ax.set_xticks([])\n", | |
" \n", | |
" plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "700e59b0-4eae-49c8-a4ca-d1a40628ea61", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plot_dendrogram()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "9ddd01c8-156f-4156-85f2-d4f50f1812f4", | |
"metadata": {}, | |
"source": [ | |
"## Selecting clusters and ordering apps" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "15f9ee15-415b-4cf9-85b1-cd460176ffa6", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@interact(max_d=widgets.IntSlider(min=0, max=int(np.max(Z)), step=1, value=int(np.max(Z) / 2), description=\"Distance: \"))\n", | |
"def plot_dendrogram_with_threshold(max_d):\n", | |
" plot_dendrogram(max_d)\n", | |
"\n", | |
" @interact_manual.options(manual_name=\"Show clusters\")\n", | |
" def plot_clusters():\n", | |
" clusters = fcluster(Z, max_d, criterion='distance')\n", | |
" \n", | |
" for cluster_label in range(1, max(clusters) + 1):\n", | |
" indices = np.where(clusters == cluster_label)[0]\n", | |
" plot_image_grid([images[i] for i in indices], 25, f'Cluster {cluster_label}')\n", | |
"\n", | |
" find_path_interact_manual = interact_manual.options(manual_name=\"Find path\")\n", | |
" @find_path_interact_manual(cluster_labels=widgets.widgets.SelectMultiple(options=[(f'Cluster {cl}', cl) for cl in range(1, max(clusters) + 1)], description='Clusters: '))\n", | |
" def find_hamiltonian_path(cluster_labels):\n", | |
" selected_indices = np.where(np.isin(clusters, cluster_labels))[0]\n", | |
" if selected_indices.size > 0:\n", | |
" distance_submatrix = distance_matrix[np.ix_(selected_indices, selected_indices)]\n", | |
" max_value = np.max(distance_submatrix)\n", | |
" result = np.where(distance_matrix == max_value)\n", | |
" start, end = result[0][0], result[1][0]\n", | |
"\n", | |
" initial_temp = 400000\n", | |
" cooling_rate = 0.99995\n", | |
" stopping_temp = 1e-8\n", | |
" path, _ = shortest_hamiltonian_path(distance_matrix, start, end, initial_temp, cooling_rate, stopping_temp, list(selected_indices))\n", | |
" \n", | |
" sorted_images = [images[i] for i in path]\n", | |
" plot_image_grid(sorted_images, 5)\n", | |
" else:\n", | |
" print(\"Select on or more clusters.\") " | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment