Last active
August 29, 2015 14:07
-
-
Save vftools/0476b357215f1ca4478a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Rcpp.h> | |
#include <math.h> | |
using namespace Rcpp; | |
enum DIRECTION { NONE = 0, | |
LEFT_TO_RIGHT = 1, | |
RIGHT_TO_LEFT = 2 | |
}; | |
// Function used for checking whether ships are in certain regions. | |
double squared_distance(const double x1, const double y1, | |
const double x2, const double y2); | |
double cross_product_2d_z(const double x1, const double y1, | |
const double x2, const double y2); | |
int is_in_semicircle(const double x1, const double y1, | |
const double x2, const double y2, | |
const double lat, const double lon); | |
/** | |
* Find all ships that cross a line and reaches a destination within range. | |
* | |
* @param df A dataframe with observations of ship movements. | |
* @param x1 Upper latitude | |
* @param y1 Upper longitude | |
* @param x2 Lower latitude | |
* @param y2 Lower longitude | |
* @param direction The direction in which the line should be crossed | |
* @param dest_lat Latitude of the destination | |
* @param dest_lon Longitude of the destination | |
* @param range The minimum range the ship should come to destination. | |
*/ | |
// [[Rcpp::export]] | |
DataFrame crosses_and_arrives(const DataFrame& df, | |
const double x1, const double y1, | |
const double x2, const double y2, | |
const int direction, | |
const double dest_lon, const double dest_lat, const double range | |
) { | |
// Unpack the dataframe in lists. | |
Rcpp::IntegerVector start_date_v = df["start_date"]; | |
Rcpp::IntegerVector end_date_v = df["end_date"]; | |
Rcpp::IntegerVector mmsi_v = df["mmsi"]; | |
Rcpp::DoubleVector latitude_v = df["latitude"]; | |
Rcpp::DoubleVector longitude_v = df["longitude"]; | |
Rcpp::CharacterVector lkp_v = df["last_known_port"]; | |
Rcpp::CharacterVector destination_v = df["destination"]; | |
// Lists making up the output dataframe. | |
std::vector<long> start_date_out; | |
std::vector<long> end_date_out; | |
std::vector<long> mmsi_out; | |
std::vector<double> latitude_out; | |
std::vector<double> longitude_out; | |
std::vector<std::string> lkp_out; | |
std::vector<std::string> destination_out; | |
std::vector<int> num_routes_out; | |
std::vector<long> duration_out; | |
// Setting up the loop. | |
long current_mmsi = mmsi_v[0]; | |
// Vectors giving the begin and end of trips | |
std::vector<size_t> start_rows; | |
std::vector<size_t> end_rows; | |
// Current trip information | |
bool has_crossed = false; | |
bool has_arrived = false; | |
int prev_semicircle = 0; | |
// Square the accepted range so we can compare distance without taking the | |
// square root. | |
const double range_squared = range * range; | |
// Start at 1 because we need to look back. | |
for (int i=1; i < mmsi_v.size(); i++) | |
{ | |
bool crossed = false; | |
// Check whether line has been crossed. | |
int in_semicircle = is_in_semicircle(x1, y1, | |
x2, y2, longitude_v[i], latitude_v[i]); | |
// Check whether the line has been crossed in the given direction. | |
if (direction == DIRECTION::LEFT_TO_RIGHT) { | |
// From the clockwise first box to the clockwise second block. | |
if (in_semicircle == DIRECTION::LEFT_TO_RIGHT | |
&& prev_semicircle == DIRECTION::RIGHT_TO_LEFT | |
) { | |
crossed = true; | |
} | |
} else { | |
// From the counter clockwise first box to the clockwise second block. | |
if (in_semicircle == DIRECTION::RIGHT_TO_LEFT | |
&& prev_semicircle == DIRECTION::LEFT_TO_RIGHT | |
) { | |
crossed = true; | |
} | |
} | |
prev_semicircle = in_semicircle; | |
if (crossed) | |
{ | |
has_crossed = true; | |
// Sometimes, the ship goes in circles and crosses multiple times | |
// before arriving. In that case overwrite the previous start row. | |
// Minus one to include the observation just before crossing. | |
if (start_rows.size() == end_rows.size()) { | |
start_rows.push_back(i - 1); | |
} else { | |
start_rows[start_rows.size() - 1] = i - 1; | |
} | |
} | |
// Only check arrival for ships that have crossed the line. | |
if (has_crossed) | |
{ | |
// Calculate the squared distance to the destination. | |
double sq_distance = squared_distance(longitude_v[i], latitude_v[i], | |
dest_lon, dest_lat); | |
// If we're in the circle add it. | |
if (sq_distance < range_squared) | |
{ | |
// A next trip requires that first the line is crossed from the right | |
// side and that we arrive again. | |
has_crossed = false; | |
has_arrived = true; | |
end_rows.push_back(i); | |
} | |
} | |
// Check whether we have a new ship or this is the last observation. | |
if (current_mmsi != mmsi_v[i] || i == mmsi_v.size() - 1) | |
{ | |
// If there is an end observation at least one trip has been completed. | |
if (end_rows.size() > 0) | |
{ | |
// Add the complete route taken between crossing and destination, | |
// calculate the route number and duration of the trip on the fly. | |
for (size_t num_route = 0; num_route < end_rows.size(); num_route++) | |
{ | |
// This should not be possible. | |
if (start_rows[num_route] >= end_rows[num_route]) | |
{ | |
throw(Rcpp::exception("Start row not smaller than end row")); | |
} | |
long duration = end_date_v[end_rows[num_route]] - | |
start_date_v[start_rows[num_route]]; | |
// Add all the fields to a new row. | |
for (unsigned long j = start_rows[num_route]; j <= end_rows[num_route]; j++) | |
{ | |
start_date_out.push_back(start_date_v[j]); | |
end_date_out.push_back(end_date_v[j]); | |
mmsi_out.push_back(mmsi_v[j]); | |
latitude_out.push_back(latitude_v[j]); | |
longitude_out.push_back(longitude_v[j]); | |
lkp_out.push_back(std::string(lkp_v[j])); | |
destination_out.push_back(std::string(destination_v[j])); | |
num_routes_out.push_back(num_route + 1); | |
duration_out.push_back(duration); | |
} | |
} | |
} | |
// Reset for the next ship. | |
has_crossed = false; | |
has_arrived = false; | |
start_rows.clear(); | |
end_rows.clear(); | |
current_mmsi = mmsi_v[i]; | |
} | |
} | |
// Create the DataFrame and return it. | |
return DataFrame::create(Named("start_date") = start_date_out, | |
Named("end_date") = end_date_out, | |
Named("mmsi") = mmsi_out, | |
Named("latitude") = latitude_out, | |
Named("longitude") = longitude_out, | |
Named("last_known_port") = lkp_out, | |
Named("destination") = destination_out, | |
Named("num_route") = num_routes_out, | |
Named("duration") = duration_out | |
); | |
} | |
/** | |
* Calculate the Euclidean squared distance. | |
* | |
* @param x1 The first x coordinate | |
* @param y1 The first y coordinate | |
* @param x2 The second x coordinate | |
* @param y2 The second y coordinate | |
*/ | |
double squared_distance(const double x1, const double y1, | |
const double x2, const double y2) | |
{ | |
return pow(x1 - x2, 2) + pow(y1 - y2, 2); | |
} | |
/** | |
* Calculate the cross product of two 2d vectors. | |
* | |
* Calculates the z of a cross product of the 2D vectors by setting the z | |
* coordinates of the input to zero to obtain 3D vectors. | |
* | |
* @param x1 The first x coordinate | |
* @param y1 The first y coordinate | |
* @param x2 The second x coordinate | |
* @param y2 The second y coordinate | |
*/ | |
double cross_product_2d_z(const double x1, const double y1, | |
const double x2, const double y2) | |
{ | |
return x1 * y2 - y1 * x2; | |
} | |
/** | |
* Check whether lat and lon are in a semicircle. | |
* | |
* The semicircle is from (x1, y1) clockwise to (x2, y2). | |
* | |
* Exported mainly for testing convenience but the function seems useful in | |
* its own right. | |
* | |
* The type is int and not DIRECTION so that it can be exported to Rcpp. | |
* | |
* Taken from: http://stackoverflow.com/a/535042/862288 | |
* | |
* @param x1 Upper latitude | |
* @param y1 Upper longitude | |
* @param x2 Lower latitude | |
* @param y2 Lower longitude | |
* @param lat Latitude point | |
* @param lon Longitude point | |
*/ | |
// [[Rcpp::export]] | |
int is_in_semicircle(const double x1, const double y1, | |
const double x2, const double y2, | |
const double p_x, const double p_y) | |
{ | |
const double center_x = (x1 + x2) / 2; | |
const double center_y = (y1 + y2) / 2; | |
const double diameter = squared_distance(x1, y1, x2, y2); | |
const double vec1_x = x2 - center_x; | |
const double vec1_y = y2 - center_y; | |
const double vec2_x = p_x - center_x; | |
const double vec2_y = p_y - center_y; | |
// Check on which side of the circle the point is located. | |
const double cross_product_z = cross_product_2d_z(vec1_x, vec1_y, | |
vec2_x, vec2_y); | |
// Divide by 4, because this is the square of the diameter. We add an | |
// epsilon to the comparison to satisfy the test cases as the calculations | |
// are not 100% precise. | |
if (squared_distance(center_x, center_y, p_x, p_y) <= diameter / 4 + 1e-7) | |
{ | |
if (cross_product_z >= 0) { | |
return DIRECTION::LEFT_TO_RIGHT; | |
} else { | |
return DIRECTION::RIGHT_TO_LEFT; | |
} | |
} | |
return DIRECTION::NONE; | |
} | |
/** | |
* Remove repeated observations from the dataframe | |
* | |
* @param df The dataframe. | |
*/ | |
// [[Rcpp::export]] | |
DataFrame filter_dataframe(const DataFrame& df) | |
{ | |
// Unpack the dataframe in lists. | |
Rcpp::IntegerVector dates_v = df["scrape_datetime"]; | |
Rcpp::IntegerVector mmsi_v = df["mmsi"]; | |
Rcpp::DoubleVector latitude_v = df["latitude"]; | |
Rcpp::DoubleVector longitude_v = df["longitude"]; | |
Rcpp::CharacterVector lkp_v = df["last_known_port"]; | |
Rcpp::CharacterVector destination_v = df["destination"]; | |
// Prepare output lists | |
std::vector<long> start_date_out; | |
std::vector<long> end_date_out; | |
std::vector<long> mmsi_out; | |
std::vector<double> latitude_out; | |
std::vector<double> longitude_out; | |
std::vector<std::string> lkp_out; | |
std::vector<std::string> destination_out; | |
long start_date = dates_v[0]; | |
long last_mmsi = mmsi_v[0]; | |
double last_latitude = latitude_v[0]; | |
double last_longitude = longitude_v[0]; | |
std::string last_lkp = std::string(lkp_v[0]); | |
std::string last_destination = std::string(destination_v[0]); | |
// Start at 1 because we need to look back. | |
for (int i=1; i < mmsi_v.size(); i++) { | |
if (start_date == 0) { | |
start_date = dates_v[i]; | |
} | |
// Check for a new row. | |
if ( | |
last_mmsi != mmsi_v[i] || | |
last_latitude != latitude_v[i] || | |
// Only make string objects when necessary as || short circuits. | |
last_longitude != longitude_v[i] || | |
last_lkp != std::string(lkp_v[i]) || | |
last_destination != std::string(destination_v[i]) | |
) { | |
// Update last. | |
last_mmsi = mmsi_v[i]; | |
last_latitude = latitude_v[i]; | |
last_longitude = longitude_v[i]; | |
last_lkp = lkp_v[i]; | |
last_destination = destination_v[i]; | |
// Add dates to output. | |
start_date_out.push_back(start_date); | |
end_date_out.push_back(dates_v[i]); | |
start_date = 0; | |
// Add copied data to output. | |
mmsi_out.push_back(last_mmsi); | |
latitude_out.push_back(last_latitude); | |
longitude_out.push_back(last_longitude); | |
lkp_out.push_back(last_lkp); | |
destination_out.push_back(last_destination); | |
} | |
} | |
return DataFrame::create(Named("start_date") = start_date_out, | |
Named("end_date") = end_date_out, | |
Named("mmsi") = mmsi_out, | |
Named("latitude") = latitude_out, | |
Named("longitude") = longitude_out, | |
Named("last_known_port") = lkp_out, | |
Named("destination") = destination_out | |
); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
squared_distance <- function(x1, y1, x2, y2) { | |
(x1 - x2) ** 2 + (y1 - y2) ** 2 | |
} | |
cross_product_2d_z <- function(x1, y1, x2, y2) { | |
x1 * y2 - y1 * x2 | |
} | |
is_in_semicircle <- function(x1, y1, x2, y2, p_x, p_y) { | |
center_x <- (x1 + x2) / 2; | |
center_y <- (y1 + y2) / 2; | |
diameter <- squared_distance(x1, y1, x2, y2) | |
vec1_x <- x2 - center_x | |
vec1_y <- y2 - center_y | |
vec2_x <- p_x - center_x | |
vec2_y <- p_y - center_y | |
cross_product_z <- cross_product_2d_z(vec1_x, vec1_y, vec2_x, vec2_y) | |
if (squared_distance(center_x, center_y, p_x, p_y) <= diameter / 4 + 1e-7) { | |
if (cross_product_z >= 0) { | |
return(1) | |
} else { | |
return(2) | |
} | |
} | |
0 | |
} | |
crosses_and_arrivesR <- function(df, x1, y1, x2, y2, direction, | |
dest_lon, dest_lat, range) { | |
current_mmsi <- df$mmsi[[1]] | |
has_crossed <- FALSE | |
has_arrived <- FALSE | |
prev_semicircle <- 0 | |
range_squared <- range * range | |
# Rows boundaries | |
start_rows <- c() | |
end_rows <- c() | |
# Output vectors | |
start_date_out <- c() | |
end_date_out <- c() | |
mmsi_out <- c() | |
latitude_out <- c() | |
longitude_out <- c() | |
lkp_out <- c() | |
destination_out <- c() | |
num_routes_out <- c() | |
duration_out <- c() | |
for (i in 1:nrow(df)) { | |
crossed <- FALSE | |
in_semicircle <- is_in_semicircle(x1, y1, x2, y2, | |
df$longitude[[i]], df$latitude[[i]]) | |
if (direction == 1) { | |
if (in_semicircle == 1 && prev_semicircle == 2) { | |
crossed <- TRUE | |
} | |
} else { | |
if (in_semicircle == 2 && prev_semicircle == 1) { | |
crossed <- TRUE | |
} | |
} | |
prev_semicircle = in_semicircle | |
if (crossed) { | |
has_crossed <- TRUE | |
if (length(start_rows) == length(end_rows)) { | |
start_rows <- c(start_rows, i - 1) | |
} else { | |
start_rows[[length(start_rows)]] <- i - 1 | |
} | |
} | |
if (has_crossed) { | |
sq_distance <- squared_distance(df$longitude[[i]], df$latitude[[i]], | |
dest_lon, dest_lat) | |
if (sq_distance < range_squared) { | |
has_crossed <- FALSE | |
has_arrived <- TRUE | |
end_rows <- c(end_rows, i) | |
} | |
} | |
if (current_mmsi != df$mmsi[i] || i == nrow(df)) { | |
if (length(end_rows) > 0) { | |
for (num_route in 1:length(end_rows)) { | |
duration = df$end_date[[end_rows[[num_route]]]] - | |
df$start_date[[start_rows[[num_route]]]] | |
for (j in start_rows[[num_route]]:end_rows[[num_route]]) { | |
start_date_out <- c(start_date_out, df$start_date[[j]]) | |
end_date_out <- c(end_date_out, df$end_date[[j]]) | |
mmsi_out <- c(mmsi_out, df$mmsi[[j]]) | |
latitude_out <- c(latitude_out, df$latitude[[j]]) | |
longitude_out <- c(longitude_out, df$longitude[[j]]) | |
lkp_out <- c(lkp_out, df$last_known_port[[j]]) | |
destination_out <- c(destination_out, df$destination[[j]]) | |
num_routes_out <- c(num_routes_out, num_route) | |
duration_out <- c(duration_out, duration) | |
} | |
} | |
} | |
has_crossed <- FALSE | |
has_arrived <- FALSE | |
start_rows <- c() | |
end_rows <- c() | |
current_mmsi <- df$mmsi[i] | |
} | |
} | |
data.frame(start_date_out, | |
end_date_out, | |
mmsi_out, | |
latitude_out, | |
longitude_out, | |
lkp_out, | |
destination_out, | |
num_routes_out, | |
duration_out) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_on_map <- function(df) { | |
df$LatLong <- with(df, paste0(round(latitude,2), ":" ,round(longitude,2))) | |
m_ships <- gvisMap(df, locationvar = "LatLong" , tipvar = "start_date") | |
plot(m_ships) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(Rcpp) | |
library(RUnit) | |
sourceCpp("filter_trips.cpp") | |
test.is_in_semicircle <- function () { | |
## The first 4 coordinates describe a circle starting at (1, 0) going to | |
## (-1, 0). So the circle is split in halves along the x-axis. | |
# Points on the x-axis. | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, 0), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, 0), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, 0), 1) | |
# Points above the x-axis | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, 0.2), 2) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, 0.2), 2) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, 0.2), 2) | |
# Points below the y-axis | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, -0.2), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, -0.2), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, -0.2), 1) | |
## The first 4 coordinates describe a circle starting at (0, 1) going to | |
## (0, -1). So the circle is split in halves along the x-axis. | |
# Points nearly on the y-axis. | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, 0), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, 0), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, 0), 1) | |
deg_45 <- sin(pi / 4) | |
# Points on edge in a 45 degree angle. | |
checkEquals(is_in_semicircle(1, 0, -1, 0, deg_45, -deg_45), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, -deg_45, -deg_45), 1) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, deg_45, deg_45), 2) | |
checkEquals(is_in_semicircle(1, 0, -1, 0, -deg_45, deg_45), 2) | |
# Points in a bigger circle | |
checkEquals(is_in_semicircle(2, 0, -2, 0, deg_45, -deg_45), 1) | |
checkEquals(is_in_semicircle(2, 0, -2, 0, -deg_45, -deg_45), 1) | |
checkEquals(is_in_semicircle(2, 0, -2, 0, deg_45, deg_45), 2) | |
checkEquals(is_in_semicircle(2, 0, -2, 0, -deg_45, deg_45), 2) | |
# Dividing line at a 45 degree angle | |
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, 1, 0), 1) | |
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, 0, -1), 1) | |
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, -1, 0), 2) | |
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, 0, 1), 2) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RUnit) | |
testsuite.crossesAndArrives <- defineTestSuite("is_in_semicircle", | |
dirs = file.path(".")) | |
testResult <- runTestSuite(testsuite.crossesAndArrives) | |
printTextProtocol(testResult) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment