Skip to content

Instantly share code, notes, and snippets.

@privefl
Last active May 9, 2017 10:03
Show Gist options
  • Save privefl/968ca64d0175ad4afcc0ad1813b07013 to your computer and use it in GitHub Desktop.
Save privefl/968ca64d0175ad4afcc0ad1813b07013 to your computer and use it in GitHub Desktop.
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
bool checkDuplicate(const IntegerMatrix& pop, int ind) {
if (ind == 1) {
return(true);
} else {
int i, j, k = ind - 1;
int n = pop.nrow();
LogicalVector remains(k, true);
int sum_remain = k;
for (j = 0; j < k; j++) {
for (i = 0; i < n; i++) {
if (remains[j]) {
if (pop(i, j) != pop(i, k)) {
remains[j] = false;
sum_remain--;
}
}
if (sum_remain == 0) return(true);
}
}
return(false);
}
}
mutate_agent2 <- function(agent_genome, max_mutations = 3) {
num_mutations <- sample.int(max_mutations, 1)
for (mutation in seq_len(num_mutations)) {
swap_indices <- sample.int(length(agent_genome), 2)
agent_genome[swap_indices] <- agent_genome[rev(swap_indices)]
}
agent_genome
}
run_genetic_algorithm2 <- function(distance_matrix, all_waypoints, generations=5000, population_size=100) {
population_subset_size <- floor(population_size / 10)
generations_10pct <- floor(generations / 10)
n <- length(all_waypoints)
population <- replicate(population_size, sample.int(n))
for (generation in seq_len(generations)) {
fitness <- rep(NA_real_, ncol(population))
for (i in seq_along(fitness)) {
if (checkDuplicate(population, i)) { # cpp
fitness[i] <- sum(distance_matrix[cbind(population[-n, i], population[-1, i])])
}
}
new_population <- matrix(NA_integer_, n, 10 * population_subset_size)
ord <- order(fitness, na.last = NA)
sorted_fitness <- fitness[ord]
for (i in seq_len(population_subset_size)) {
agent_genome <- population[, ord[i]]
if ((generation %% generations_10pct == 0 || generation == generations - 1) && i == 1) {
print(paste("Generation", generation, "best:", sorted_fitness[i], "|",
"Unique genomes:", length(sorted_fitness)))
}
offset <- 10 * (i - 1)
# Create 1 exact copy of each of the top road trips
new_population[, offset + 1] <- agent_genome
# Create 2 offspring with 1-3 point mutations
for (offspring in 2:3) {
new_population[, offset + offspring] <- mutate_agent2(agent_genome, 3)
}
# Create 7 offspring with a single shuffle mutation
for (offspring in 4:10) {
new_population[, offset + offspring] <- shuffle_mutation(agent_genome)
}
}
population <- new_population
}
population
}
data_store_us <- readr::read_tsv("https://raw.githubusercontent.com/rhiever/Data-Analysis-and-Machine-Learning-Projects/master/optimal-road-trip/my-waypoints-dist-dur.tsv" )
all_waypoints_us <- dplyr::union(data_store_us$waypoint1, data_store_us$waypoint2)
build_distance_matrix2 <- function(data_store) {
all_waypoints <- dplyr::union(data_store$waypoint1, data_store$waypoint2)
n <- length(all_waypoints)
dist_m <- matrix(0L, n, n)
rows <- match(data_store$waypoint1, all_waypoints)
cols <- match(data_store$waypoint2, all_waypoints)
dist_m[cbind(rows, cols)] <- dist_m[cbind(cols, rows)] <- data_store$distance_m
colnames(dist_m) <- rownames(dist_m) <- all_waypoints # really need names?
dist_m
}
source("https://raw.githubusercontent.com/expectopatronum/ds-learning-club/master/16-genetic-algorithms/data_store_helper.R")
library(microbenchmark)
print(microbenchmark(
"1" = dist_matrix <- build_distance_matrix(data_store_us),
"2" = dist_matrix2 <- build_distance_matrix2(data_store_us),
times = 10
))
all.equal(dist_matrix, dist_matrix2)
####
####
tmpfile <- tempfile(fileext = ".cpp")
download.file("https://gist.githubusercontent.com/privefl/968ca64d0175ad4afcc0ad1813b07013/raw/f6f1eb4a93e4a759527f19b57f405e8f3ff7d9d3/checkDuplicate.cpp", destfile = tmpfile)
Rcpp::sourceCpp(tmpfile)
source("https://gist.githubusercontent.com/privefl/968ca64d0175ad4afcc0ad1813b07013/raw/f6f1eb4a93e4a759527f19b57f405e8f3ff7d9d3/genetic_algo.R")
source("https://raw.githubusercontent.com/expectopatronum/ds-learning-club/master/16-genetic-algorithms/list_helper.R")
source("https://raw.githubusercontent.com/expectopatronum/ds-learning-club/master/16-genetic-algorithms/genetic_algorithm.R")
print(microbenchmark(
"1" = run_genetic_algorithm(dist_matrix, all_waypoints_us,
generations = 100, population_size = 100),
"2" = run_genetic_algorithm2(dist_matrix, all_waypoints_us,
generations = 100, population_size = 100),
times = 5
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment