Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save vaclavdekanovsky/fd6e20a7e5b5e9188468bddd263ae89f to your computer and use it in GitHub Desktop.
Save vaclavdekanovsky/fd6e20a7e5b5e9188468bddd263ae89f to your computer and use it in GitHub Desktop.
Solving Travelling Salesman problem using genetic algorithm
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Travelling Salesperson Problem\n",
"To find the optimal route between a list of points based on a parameter (e.g. duration or distance between the places) we can use the Travelling Salesperson Problem (https://en.wikipedia.org/wiki/Travelling_salesman_problem). Travelling Sales person wants to visit all the points with the minimal effort. \n",
"\n",
"Python implementation is described in the following article. https://towardsdatascience.com/solving-travelling-salesperson-problems-with-python-5de7e883d847. You can also read the documentation of the mlrose package https://mlrose.readthedocs.io/en/stable/source/tutorial2.html."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests # to get the distances from the API\n",
"import json # to read the API response\n",
"import mlrose # for travelling salesman problem\n",
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# load the dataframe with capitals\n",
"df = pd.read_csv(\"concap.csv\")\n",
"\n",
"# rename so that the column names are shorter and comply with PEP-8\n",
"df.rename(columns={\"CountryName\": \"Country\", \"CapitalName\": \"capital\", \"CapitalLatitude\": \"lat\", \"CapitalLongitude\": \"lon\", \"CountryCode\": \"code\", \"ContinentName\": \"continent\"}, inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# filter only the capitals of the Central Europe\n",
"ce_countries = [\"AT\",\"CZ\",\"DE\",\"HU\",\"LI\",\"PL\",\"SK\",\"SI\",\"CH\"]\n",
"ce_cities = df[df[\"code\"].isin(ce_countries)].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def get_distance(point1: dict, point2: dict) -> tuple:\n",
" \"\"\"Gets distance between two points en route using http://project-osrm.org/docs/v5.10.0/api/#nearest-service\"\"\"\n",
" \n",
" url = f\"\"\"http://router.project-osrm.org/route/v1/driving/{point1[\"lon\"]},{point1[\"lat\"]};{point2[\"lon\"]},{point2[\"lat\"]}?overview=false&alternatives=false\"\"\"\n",
" r = requests.get(url)\n",
" \n",
" # get the distance from the returned values\n",
" route = json.loads(r.content)[\"routes\"][0]\n",
" return (route[\"distance\"], route[\"duration\"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# get the distances and durations\n",
"dist_array = []\n",
"for i , r in ce_cities.iterrows():\n",
" point1 = {\"lat\": r[\"lat\"], \"lon\": r[\"lon\"]}\n",
" for j, o in ce_cities[ce_cities.index != i].iterrows():\n",
" point2 = {\"lat\": o[\"lat\"], \"lon\": o[\"lon\"]}\n",
" dist, duration = get_distance(point1, point2)\n",
" #dist = geodesic((i_lat, i_lon), (o[\"CapitalLatitude\"], o[\"CapitalLongitude\"])).km\n",
" dist_array.append((i, j, duration, dist))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"distances_df = pd.DataFrame(dist_array,columns=[\"origin\",\"destination\",\"duration(s)\",\"distnace(m)\"])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(0, 1, 13949.1),\n",
" (0, 2, 27424.4),\n",
" (0, 3, 9661.4),\n",
" (0, 4, 24166.6),\n",
" (0, 5, 29367.),\n",
" '...',\n",
" (8, 3, 41862.1),\n",
" (8, 4, 9856.),\n",
" (8, 5, 52573.8),\n",
" (8, 6, 35718.7),\n",
" (8, 7, 32803.4)]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# turn the first three columns of the dataframe into the list of tuples\n",
"dist_list = list(distances_df[[\"origin\",\"destination\",\"duration(s)\"]].sort_values(by=[\"origin\",\"destination\"]).to_records(index=False))\n",
"dist_list[:5] + [\"...\"] + dist_list[-5:]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# we plan to use the list of distnaces (durations in our case), that's why we initialize with `distances = dist_list` param.\n",
"fitness_dists = mlrose.TravellingSales(distances = dist_array)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# we plan to visit 9 cities\n",
"length = ce_cities.shape[0]\n",
"\n",
"problem_fit = mlrose.TSPOpt(length = length, fitness_fn = fitness_dists,\n",
" maximize=False)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([5, 2, 1, 6, 3, 7, 4, 8, 0]), 168798.30000000002)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# non-ideal solution, without specifying mlrose optimization\n",
"mlrose.genetic_alg(problem_fit, random_state = 2)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# better but more resource intensive solutions\n",
"best_state, best_fitness = mlrose.genetic_alg(problem_fit, mutation_prob = 0.2, max_attempts = 500, random_state = 2)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The best state found is: [1 8 4 7 3 6 0 5 2], taking 157397.0 (1 day, 19:43:17)\n"
]
}
],
"source": [
"print(f\"The best state found is: {best_state}, taking {best_fitness} ({str(datetime.timedelta(seconds=best_fitness))})\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{1: 0, 8: 1, 4: 2, 7: 3, 3: 4, 6: 5, 0: 6, 5: 7, 2: 8}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# turn the results to an ordered dict\n",
"orders = {city: order for order, city in enumerate(best_state)}\n",
"orders"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country</th>\n",
" <th>capital</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>code</th>\n",
" <th>continent</th>\n",
" <th>order</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Czech Republic</td>\n",
" <td>Prague</td>\n",
" <td>50.083333</td>\n",
" <td>14.466667</td>\n",
" <td>CZ</td>\n",
" <td>Europe</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Switzerland</td>\n",
" <td>Bern</td>\n",
" <td>46.916667</td>\n",
" <td>7.466667</td>\n",
" <td>CH</td>\n",
" <td>Europe</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Liechtenstein</td>\n",
" <td>Vaduz</td>\n",
" <td>47.133333</td>\n",
" <td>9.516667</td>\n",
" <td>LI</td>\n",
" <td>Europe</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Slovenia</td>\n",
" <td>Ljubljana</td>\n",
" <td>46.050000</td>\n",
" <td>14.516667</td>\n",
" <td>SI</td>\n",
" <td>Europe</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Hungary</td>\n",
" <td>Budapest</td>\n",
" <td>47.500000</td>\n",
" <td>19.083333</td>\n",
" <td>HU</td>\n",
" <td>Europe</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Slovakia</td>\n",
" <td>Bratislava</td>\n",
" <td>48.150000</td>\n",
" <td>17.116667</td>\n",
" <td>SK</td>\n",
" <td>Europe</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Austria</td>\n",
" <td>Vienna</td>\n",
" <td>48.200000</td>\n",
" <td>16.366667</td>\n",
" <td>AT</td>\n",
" <td>Europe</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Poland</td>\n",
" <td>Warsaw</td>\n",
" <td>52.250000</td>\n",
" <td>21.000000</td>\n",
" <td>PL</td>\n",
" <td>Europe</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Germany</td>\n",
" <td>Berlin</td>\n",
" <td>52.516667</td>\n",
" <td>13.400000</td>\n",
" <td>DE</td>\n",
" <td>Europe</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Country capital lat lon code continent order\n",
"1 Czech Republic Prague 50.083333 14.466667 CZ Europe 0\n",
"8 Switzerland Bern 46.916667 7.466667 CH Europe 1\n",
"4 Liechtenstein Vaduz 47.133333 9.516667 LI Europe 2\n",
"7 Slovenia Ljubljana 46.050000 14.516667 SI Europe 3\n",
"3 Hungary Budapest 47.500000 19.083333 HU Europe 4\n",
"6 Slovakia Bratislava 48.150000 17.116667 SK Europe 5\n",
"0 Austria Vienna 48.200000 16.366667 AT Europe 6\n",
"5 Poland Warsaw 52.250000 21.000000 PL Europe 7\n",
"2 Germany Berlin 52.516667 13.400000 DE Europe 8"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# apply this order to the dataframe with the cities\n",
"ce_cities[\"order\"] = ce_cities.index.map(orders)\n",
"ce_cities = ce_cities.sort_values(by=\"order\")\n",
"ce_cities"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mapsenv",
"language": "python",
"name": "mapsenv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment