Skip to content

Instantly share code, notes, and snippets.

@vftools
Last active August 29, 2015 14:07
Show Gist options
  • Save vftools/0476b357215f1ca4478a to your computer and use it in GitHub Desktop.
Save vftools/0476b357215f1ca4478a to your computer and use it in GitHub Desktop.
#include <Rcpp.h>
#include <math.h>
using namespace Rcpp;
enum DIRECTION { NONE = 0,
LEFT_TO_RIGHT = 1,
RIGHT_TO_LEFT = 2
};
// Function used for checking whether ships are in certain regions.
double squared_distance(const double x1, const double y1,
const double x2, const double y2);
double cross_product_2d_z(const double x1, const double y1,
const double x2, const double y2);
int is_in_semicircle(const double x1, const double y1,
const double x2, const double y2,
const double lat, const double lon);
/**
* Find all ships that cross a line and reaches a destination within range.
*
* @param df A dataframe with observations of ship movements.
* @param x1 Upper latitude
* @param y1 Upper longitude
* @param x2 Lower latitude
* @param y2 Lower longitude
* @param direction The direction in which the line should be crossed
* @param dest_lat Latitude of the destination
* @param dest_lon Longitude of the destination
* @param range The minimum range the ship should come to destination.
*/
// [[Rcpp::export]]
DataFrame crosses_and_arrives(const DataFrame& df,
const double x1, const double y1,
const double x2, const double y2,
const int direction,
const double dest_lon, const double dest_lat, const double range
) {
// Unpack the dataframe in lists.
Rcpp::IntegerVector start_date_v = df["start_date"];
Rcpp::IntegerVector end_date_v = df["end_date"];
Rcpp::IntegerVector mmsi_v = df["mmsi"];
Rcpp::DoubleVector latitude_v = df["latitude"];
Rcpp::DoubleVector longitude_v = df["longitude"];
Rcpp::CharacterVector lkp_v = df["last_known_port"];
Rcpp::CharacterVector destination_v = df["destination"];
// Lists making up the output dataframe.
std::vector<long> start_date_out;
std::vector<long> end_date_out;
std::vector<long> mmsi_out;
std::vector<double> latitude_out;
std::vector<double> longitude_out;
std::vector<std::string> lkp_out;
std::vector<std::string> destination_out;
std::vector<int> num_routes_out;
std::vector<long> duration_out;
// Setting up the loop.
long current_mmsi = mmsi_v[0];
// Vectors giving the begin and end of trips
std::vector<size_t> start_rows;
std::vector<size_t> end_rows;
// Current trip information
bool has_crossed = false;
bool has_arrived = false;
int prev_semicircle = 0;
// Square the accepted range so we can compare distance without taking the
// square root.
const double range_squared = range * range;
// Start at 1 because we need to look back.
for (int i=1; i < mmsi_v.size(); i++)
{
bool crossed = false;
// Check whether line has been crossed.
int in_semicircle = is_in_semicircle(x1, y1,
x2, y2, longitude_v[i], latitude_v[i]);
// Check whether the line has been crossed in the given direction.
if (direction == DIRECTION::LEFT_TO_RIGHT) {
// From the clockwise first box to the clockwise second block.
if (in_semicircle == DIRECTION::LEFT_TO_RIGHT
&& prev_semicircle == DIRECTION::RIGHT_TO_LEFT
) {
crossed = true;
}
} else {
// From the counter clockwise first box to the clockwise second block.
if (in_semicircle == DIRECTION::RIGHT_TO_LEFT
&& prev_semicircle == DIRECTION::LEFT_TO_RIGHT
) {
crossed = true;
}
}
prev_semicircle = in_semicircle;
if (crossed)
{
has_crossed = true;
// Sometimes, the ship goes in circles and crosses multiple times
// before arriving. In that case overwrite the previous start row.
// Minus one to include the observation just before crossing.
if (start_rows.size() == end_rows.size()) {
start_rows.push_back(i - 1);
} else {
start_rows[start_rows.size() - 1] = i - 1;
}
}
// Only check arrival for ships that have crossed the line.
if (has_crossed)
{
// Calculate the squared distance to the destination.
double sq_distance = squared_distance(longitude_v[i], latitude_v[i],
dest_lon, dest_lat);
// If we're in the circle add it.
if (sq_distance < range_squared)
{
// A next trip requires that first the line is crossed from the right
// side and that we arrive again.
has_crossed = false;
has_arrived = true;
end_rows.push_back(i);
}
}
// Check whether we have a new ship or this is the last observation.
if (current_mmsi != mmsi_v[i] || i == mmsi_v.size() - 1)
{
// If there is an end observation at least one trip has been completed.
if (end_rows.size() > 0)
{
// Add the complete route taken between crossing and destination,
// calculate the route number and duration of the trip on the fly.
for (size_t num_route = 0; num_route < end_rows.size(); num_route++)
{
// This should not be possible.
if (start_rows[num_route] >= end_rows[num_route])
{
throw(Rcpp::exception("Start row not smaller than end row"));
}
long duration = end_date_v[end_rows[num_route]] -
start_date_v[start_rows[num_route]];
// Add all the fields to a new row.
for (unsigned long j = start_rows[num_route]; j <= end_rows[num_route]; j++)
{
start_date_out.push_back(start_date_v[j]);
end_date_out.push_back(end_date_v[j]);
mmsi_out.push_back(mmsi_v[j]);
latitude_out.push_back(latitude_v[j]);
longitude_out.push_back(longitude_v[j]);
lkp_out.push_back(std::string(lkp_v[j]));
destination_out.push_back(std::string(destination_v[j]));
num_routes_out.push_back(num_route + 1);
duration_out.push_back(duration);
}
}
}
// Reset for the next ship.
has_crossed = false;
has_arrived = false;
start_rows.clear();
end_rows.clear();
current_mmsi = mmsi_v[i];
}
}
// Create the DataFrame and return it.
return DataFrame::create(Named("start_date") = start_date_out,
Named("end_date") = end_date_out,
Named("mmsi") = mmsi_out,
Named("latitude") = latitude_out,
Named("longitude") = longitude_out,
Named("last_known_port") = lkp_out,
Named("destination") = destination_out,
Named("num_route") = num_routes_out,
Named("duration") = duration_out
);
}
/**
* Calculate the Euclidean squared distance.
*
* @param x1 The first x coordinate
* @param y1 The first y coordinate
* @param x2 The second x coordinate
* @param y2 The second y coordinate
*/
double squared_distance(const double x1, const double y1,
const double x2, const double y2)
{
return pow(x1 - x2, 2) + pow(y1 - y2, 2);
}
/**
* Calculate the cross product of two 2d vectors.
*
* Calculates the z of a cross product of the 2D vectors by setting the z
* coordinates of the input to zero to obtain 3D vectors.
*
* @param x1 The first x coordinate
* @param y1 The first y coordinate
* @param x2 The second x coordinate
* @param y2 The second y coordinate
*/
double cross_product_2d_z(const double x1, const double y1,
const double x2, const double y2)
{
return x1 * y2 - y1 * x2;
}
/**
* Check whether lat and lon are in a semicircle.
*
* The semicircle is from (x1, y1) clockwise to (x2, y2).
*
* Exported mainly for testing convenience but the function seems useful in
* its own right.
*
* The type is int and not DIRECTION so that it can be exported to Rcpp.
*
* Taken from: http://stackoverflow.com/a/535042/862288
*
* @param x1 Upper latitude
* @param y1 Upper longitude
* @param x2 Lower latitude
* @param y2 Lower longitude
* @param lat Latitude point
* @param lon Longitude point
*/
// [[Rcpp::export]]
int is_in_semicircle(const double x1, const double y1,
const double x2, const double y2,
const double p_x, const double p_y)
{
const double center_x = (x1 + x2) / 2;
const double center_y = (y1 + y2) / 2;
const double diameter = squared_distance(x1, y1, x2, y2);
const double vec1_x = x2 - center_x;
const double vec1_y = y2 - center_y;
const double vec2_x = p_x - center_x;
const double vec2_y = p_y - center_y;
// Check on which side of the circle the point is located.
const double cross_product_z = cross_product_2d_z(vec1_x, vec1_y,
vec2_x, vec2_y);
// Divide by 4, because this is the square of the diameter. We add an
// epsilon to the comparison to satisfy the test cases as the calculations
// are not 100% precise.
if (squared_distance(center_x, center_y, p_x, p_y) <= diameter / 4 + 1e-7)
{
if (cross_product_z >= 0) {
return DIRECTION::LEFT_TO_RIGHT;
} else {
return DIRECTION::RIGHT_TO_LEFT;
}
}
return DIRECTION::NONE;
}
/**
* Remove repeated observations from the dataframe
*
* @param df The dataframe.
*/
// [[Rcpp::export]]
DataFrame filter_dataframe(const DataFrame& df)
{
// Unpack the dataframe in lists.
Rcpp::IntegerVector dates_v = df["scrape_datetime"];
Rcpp::IntegerVector mmsi_v = df["mmsi"];
Rcpp::DoubleVector latitude_v = df["latitude"];
Rcpp::DoubleVector longitude_v = df["longitude"];
Rcpp::CharacterVector lkp_v = df["last_known_port"];
Rcpp::CharacterVector destination_v = df["destination"];
// Prepare output lists
std::vector<long> start_date_out;
std::vector<long> end_date_out;
std::vector<long> mmsi_out;
std::vector<double> latitude_out;
std::vector<double> longitude_out;
std::vector<std::string> lkp_out;
std::vector<std::string> destination_out;
long start_date = dates_v[0];
long last_mmsi = mmsi_v[0];
double last_latitude = latitude_v[0];
double last_longitude = longitude_v[0];
std::string last_lkp = std::string(lkp_v[0]);
std::string last_destination = std::string(destination_v[0]);
// Start at 1 because we need to look back.
for (int i=1; i < mmsi_v.size(); i++) {
if (start_date == 0) {
start_date = dates_v[i];
}
// Check for a new row.
if (
last_mmsi != mmsi_v[i] ||
last_latitude != latitude_v[i] ||
// Only make string objects when necessary as || short circuits.
last_longitude != longitude_v[i] ||
last_lkp != std::string(lkp_v[i]) ||
last_destination != std::string(destination_v[i])
) {
// Update last.
last_mmsi = mmsi_v[i];
last_latitude = latitude_v[i];
last_longitude = longitude_v[i];
last_lkp = lkp_v[i];
last_destination = destination_v[i];
// Add dates to output.
start_date_out.push_back(start_date);
end_date_out.push_back(dates_v[i]);
start_date = 0;
// Add copied data to output.
mmsi_out.push_back(last_mmsi);
latitude_out.push_back(last_latitude);
longitude_out.push_back(last_longitude);
lkp_out.push_back(last_lkp);
destination_out.push_back(last_destination);
}
}
return DataFrame::create(Named("start_date") = start_date_out,
Named("end_date") = end_date_out,
Named("mmsi") = mmsi_out,
Named("latitude") = latitude_out,
Named("longitude") = longitude_out,
Named("last_known_port") = lkp_out,
Named("destination") = destination_out
);
}
squared_distance <- function(x1, y1, x2, y2) {
(x1 - x2) ** 2 + (y1 - y2) ** 2
}
cross_product_2d_z <- function(x1, y1, x2, y2) {
x1 * y2 - y1 * x2
}
is_in_semicircle <- function(x1, y1, x2, y2, p_x, p_y) {
center_x <- (x1 + x2) / 2;
center_y <- (y1 + y2) / 2;
diameter <- squared_distance(x1, y1, x2, y2)
vec1_x <- x2 - center_x
vec1_y <- y2 - center_y
vec2_x <- p_x - center_x
vec2_y <- p_y - center_y
cross_product_z <- cross_product_2d_z(vec1_x, vec1_y, vec2_x, vec2_y)
if (squared_distance(center_x, center_y, p_x, p_y) <= diameter / 4 + 1e-7) {
if (cross_product_z >= 0) {
return(1)
} else {
return(2)
}
}
0
}
crosses_and_arrivesR <- function(df, x1, y1, x2, y2, direction,
dest_lon, dest_lat, range) {
current_mmsi <- df$mmsi[[1]]
has_crossed <- FALSE
has_arrived <- FALSE
prev_semicircle <- 0
range_squared <- range * range
# Rows boundaries
start_rows <- c()
end_rows <- c()
# Output vectors
start_date_out <- c()
end_date_out <- c()
mmsi_out <- c()
latitude_out <- c()
longitude_out <- c()
lkp_out <- c()
destination_out <- c()
num_routes_out <- c()
duration_out <- c()
for (i in 1:nrow(df)) {
crossed <- FALSE
in_semicircle <- is_in_semicircle(x1, y1, x2, y2,
df$longitude[[i]], df$latitude[[i]])
if (direction == 1) {
if (in_semicircle == 1 && prev_semicircle == 2) {
crossed <- TRUE
}
} else {
if (in_semicircle == 2 && prev_semicircle == 1) {
crossed <- TRUE
}
}
prev_semicircle = in_semicircle
if (crossed) {
has_crossed <- TRUE
if (length(start_rows) == length(end_rows)) {
start_rows <- c(start_rows, i - 1)
} else {
start_rows[[length(start_rows)]] <- i - 1
}
}
if (has_crossed) {
sq_distance <- squared_distance(df$longitude[[i]], df$latitude[[i]],
dest_lon, dest_lat)
if (sq_distance < range_squared) {
has_crossed <- FALSE
has_arrived <- TRUE
end_rows <- c(end_rows, i)
}
}
if (current_mmsi != df$mmsi[i] || i == nrow(df)) {
if (length(end_rows) > 0) {
for (num_route in 1:length(end_rows)) {
duration = df$end_date[[end_rows[[num_route]]]] -
df$start_date[[start_rows[[num_route]]]]
for (j in start_rows[[num_route]]:end_rows[[num_route]]) {
start_date_out <- c(start_date_out, df$start_date[[j]])
end_date_out <- c(end_date_out, df$end_date[[j]])
mmsi_out <- c(mmsi_out, df$mmsi[[j]])
latitude_out <- c(latitude_out, df$latitude[[j]])
longitude_out <- c(longitude_out, df$longitude[[j]])
lkp_out <- c(lkp_out, df$last_known_port[[j]])
destination_out <- c(destination_out, df$destination[[j]])
num_routes_out <- c(num_routes_out, num_route)
duration_out <- c(duration_out, duration)
}
}
}
has_crossed <- FALSE
has_arrived <- FALSE
start_rows <- c()
end_rows <- c()
current_mmsi <- df$mmsi[i]
}
}
data.frame(start_date_out,
end_date_out,
mmsi_out,
latitude_out,
longitude_out,
lkp_out,
destination_out,
num_routes_out,
duration_out)
}
plot_on_map <- function(df) {
df$LatLong <- with(df, paste0(round(latitude,2), ":" ,round(longitude,2)))
m_ships <- gvisMap(df, locationvar = "LatLong" , tipvar = "start_date")
plot(m_ships)
}
library(Rcpp)
library(RUnit)
sourceCpp("filter_trips.cpp")
test.is_in_semicircle <- function () {
## The first 4 coordinates describe a circle starting at (1, 0) going to
## (-1, 0). So the circle is split in halves along the x-axis.
# Points on the x-axis.
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, 0), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, 0), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, 0), 1)
# Points above the x-axis
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, 0.2), 2)
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, 0.2), 2)
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, 0.2), 2)
# Points below the y-axis
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, -0.2), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, -0.2), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, -0.2), 1)
## The first 4 coordinates describe a circle starting at (0, 1) going to
## (0, -1). So the circle is split in halves along the x-axis.
# Points nearly on the y-axis.
checkEquals(is_in_semicircle(1, 0, -1, 0, 0, 0), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, 0.5, 0), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, -0.5, 0), 1)
deg_45 <- sin(pi / 4)
# Points on edge in a 45 degree angle.
checkEquals(is_in_semicircle(1, 0, -1, 0, deg_45, -deg_45), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, -deg_45, -deg_45), 1)
checkEquals(is_in_semicircle(1, 0, -1, 0, deg_45, deg_45), 2)
checkEquals(is_in_semicircle(1, 0, -1, 0, -deg_45, deg_45), 2)
# Points in a bigger circle
checkEquals(is_in_semicircle(2, 0, -2, 0, deg_45, -deg_45), 1)
checkEquals(is_in_semicircle(2, 0, -2, 0, -deg_45, -deg_45), 1)
checkEquals(is_in_semicircle(2, 0, -2, 0, deg_45, deg_45), 2)
checkEquals(is_in_semicircle(2, 0, -2, 0, -deg_45, deg_45), 2)
# Dividing line at a 45 degree angle
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, 1, 0), 1)
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, 0, -1), 1)
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, -1, 0), 2)
checkEquals(is_in_semicircle(deg_45, deg_45, -deg_45, -deg_45, 0, 1), 2)
}
library(RUnit)
testsuite.crossesAndArrives <- defineTestSuite("is_in_semicircle",
dirs = file.path("."))
testResult <- runTestSuite(testsuite.crossesAndArrives)
printTextProtocol(testResult)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment