This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The following row avoids the broadcasting, the dimension_table2 is very small | |
spark.conf.set("spark.sql.autoBroadcastJoinThreshold",-1) | |
// I'm using caching to simplify the DAG | |
dimension_table2.cache | |
dimension_table2.count | |
// One way to use the same partitioner is to partition on a column with the same name, | |
// let's rename the columns that we want to join | |
fact_table = fact_table.withColumnRenamed("dimension_2_key", "repartition_id") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import Pandas | |
import pandas as pd | |
# Import PuLP modeler functions | |
from pulp import * | |
# Math functions for distance calculation | |
import math | |
# Networkx to get connected components and subtours | |
import networkx as nx | |
# Matplotlib for debugging | |
import matplotlib.pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TSP(): | |
cities = None | |
santa = None | |
variables_dict = None | |
x = None | |
path = None | |
def __init__(self, population_lower_bound=10e5 * 3): | |
cities = pd.read_csv("worldcities.csv") | |
self.cities = cities.loc[cities["population"] >= population_lower_bound][["city","lat","lng","population"]].reset_index() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import Pandas | |
import pandas as pd | |
# Import PuLP modeler functions | |
from pulp import * | |
# Math functions for distance calculation | |
import math | |
# Networkx to get connected components and subtours | |
import networkx as nx | |
# Matplotlib for debugging | |
import matplotlib.pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def calculate_distance(self, lat_a, lng_a, lat_b, lng_b): | |
# Convert lat lng in radians | |
lng_a, lat_a, lng_b, lat_b = map(math.radians, [lng_a, lat_a, lng_b, lat_b]) | |
d_lat = lat_b - lat_a | |
d_lng = lng_a - lng_b | |
temp = ( | |
math.sin(d_lat / 2) ** 2 | |
+ math.cos(lat_a) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def build_model(self): | |
# Initialize the problem | |
santa = LpProblem("santa", LpMinimize) | |
# Generate distances | |
w = h = self.cities.shape[0] | |
distances = [[0 for x in range(w)] for y in range(h)] | |
for index_a, row_a in tqdm(self.cities.iterrows(), total=self.cities.shape[0]): | |
lat_a = row_a["lat"] | |
lon_a = row_a["lng"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def build_graph_from_current_solution(self): | |
varsdict = {} | |
for v in self.santa.variables(): | |
if v.varValue == 1: | |
varsdict[v.name] = v.varValue | |
G = nx.Graph() | |
# Add nodes to the Graph | |
for index_a, row_a in self.cities.iterrows(): | |
lat_a = row_a["lat"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def solve(self): | |
# Now, the model is missing of the subtour elimination constraints. | |
# The number of these constraints is 2^N-2 where N is the number of cities selected. Too many. | |
# What we can do is: | |
# 1. Solve the problem without the Subtour elimination constraints | |
# 2. Get the solution and see if there are subtours of length lower than N | |
# 3. Add the constraints that exclude those subtours | |
# 4. Goto 1 and repeat until there are no more subtours | |
while True: | |
self.santa.solve(solvers.PULP_CBC_CMD(msg=True)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tsp = TSP() | |
tsp.build_model() | |
tsp.solve() | |
tsp.extract_solution() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import Pandas - List of cities can be found here https://simplemaps.com/data/world-cities | |
import pandas as pd | |
# Import PuLP modeler functions | |
from pulp import * | |
# Math functions for distance calculation | |
import math | |
# Networkx to get connected components and subtours | |
import networkx as nx | |
# Matplotlib for debugging | |
import matplotlib.pyplot as plt |