AlexandraKapp/blog_emd_option2.py

## blog_emd_option2.py
from haversine import haversine, Unit

# construct the cost matrix
def get_cost_matrix(gdf1, gdf2):
    gdf1_centroids = gdf1.to_crs(3395).centroid.to_crs(4326)
    gdf2_centroids = gdf2.to_crs(3395).centroid.to_crs(4326)
    coords_sig1 = list(zip(gdf1_centroids.y, gdf1_centroids.x))
    coords_sig2 = list(zip(gdf2_centroids.y, gdf2_centroids.x))

    #get all potential combinations between all points from sig1 and sig2
    grid = np.meshgrid(range(0, len(coords_sig1)),
                        range(0, len(coords_sig2)))
    tile_combinations = np.array([grid[0].flatten(), grid[1].flatten()])

    # create an empty cost matrix with the length of all possible combinations
    cost_matrix = np.empty(tile_combinations.shape[1], dtype = np.float32) # float32 needed as input for cv2.emd!

    # compute haversine distance for all possible combinations
    for column in range(0, tile_combinations.shape[1]):
        tile_1 = tile_combinations[0, column]
        tile_2 = tile_combinations[1, column]
        cost_matrix[column]  = haversine(coords_sig1[tile_1], coords_sig2[tile_2], unit = Unit.METERS)

    # reshape array to matrix
    return np.reshape(cost_matrix, (len(coords_sig1),len(coords_sig2)))

# as the coordinates are the same in both scenarios
# you could use the same cost matrix for both scenarios
cost_matrix1 = get_cost_matrix(example, scenario1)
cost_matrix2 = get_cost_matrix(example, scenario2)

# The signature for the custom cost matrix does not need coordinates. It can only consist of an array of values
sig_original = np.array(example.example_data, dtype = np.float32)
sig_scen1 = np.array(scenario1.example_data, dtype = np.float32)

# Compute the EMD
emd_scen1, _ , _ = cv2.EMD(sig_original, sig_scen1, distType = cv2.DIST_USER, cost = cost_matrix1)
emd_scen2, _ , _ = cv2.EMD(sig_original, sig_scen2, distType = cv2.DIST_USER, cost = cost_matrix2)

print("Earth movers distance scenario 1: " + str(round(emd_scen1)) + " meters")
print("Earth movers distance scenario 2: " + str(round(emd_scen2)) + " meters")
sig_scen2 = np.array(scenario2.example_data, dtype = np.float32)
	from haversine import haversine, Unit

	# construct the cost matrix
	def get_cost_matrix(gdf1, gdf2):
	gdf1_centroids = gdf1.to_crs(3395).centroid.to_crs(4326)
	gdf2_centroids = gdf2.to_crs(3395).centroid.to_crs(4326)
	coords_sig1 = list(zip(gdf1_centroids.y, gdf1_centroids.x))
	coords_sig2 = list(zip(gdf2_centroids.y, gdf2_centroids.x))

	#get all potential combinations between all points from sig1 and sig2
	grid = np.meshgrid(range(0, len(coords_sig1)),
	range(0, len(coords_sig2)))
	tile_combinations = np.array([grid[0].flatten(), grid[1].flatten()])

	# create an empty cost matrix with the length of all possible combinations
	cost_matrix = np.empty(tile_combinations.shape[1], dtype = np.float32) # float32 needed as input for cv2.emd!

	# compute haversine distance for all possible combinations
	for column in range(0, tile_combinations.shape[1]):
	tile_1 = tile_combinations[0, column]
	tile_2 = tile_combinations[1, column]
	cost_matrix[column] = haversine(coords_sig1[tile_1], coords_sig2[tile_2], unit = Unit.METERS)

	# reshape array to matrix
	return np.reshape(cost_matrix, (len(coords_sig1),len(coords_sig2)))

	# as the coordinates are the same in both scenarios
	# you could use the same cost matrix for both scenarios
	cost_matrix1 = get_cost_matrix(example, scenario1)
	cost_matrix2 = get_cost_matrix(example, scenario2)

	# The signature for the custom cost matrix does not need coordinates. It can only consist of an array of values
	sig_original = np.array(example.example_data, dtype = np.float32)
	sig_scen1 = np.array(scenario1.example_data, dtype = np.float32)

	# Compute the EMD
	emd_scen1, _ , _ = cv2.EMD(sig_original, sig_scen1, distType = cv2.DIST_USER, cost = cost_matrix1)
	emd_scen2, _ , _ = cv2.EMD(sig_original, sig_scen2, distType = cv2.DIST_USER, cost = cost_matrix2)

	print("Earth movers distance scenario 1: " + str(round(emd_scen1)) + " meters")
	print("Earth movers distance scenario 2: " + str(round(emd_scen2)) + " meters")
	sig_scen2 = np.array(scenario2.example_data, dtype = np.float32)