Last active
May 28, 2019 06:35
-
-
Save chapmanjacobd/dd299c8636062095590d54916b625bc7 to your computer and use it in GitHub Desktop.
slow way to tease out distinct areas from overlapping areas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### =delete overlapping buffers= ### | |
options("scipen"=100) | |
require(varhandle) | |
require(data.table) | |
require(tidyr) | |
require(tidyverse) | |
require(dplyr) | |
require(DBI) | |
require(RSQLite) | |
require(sp) | |
require(gmt) | |
require(stringr) | |
require(geosphere) | |
db = dbConnect(RSQLite::SQLite(), "~/cities.sqlite") | |
dbListTables(db) | |
geocities = dbReadTable(db,"cities") | |
setDT(geocities) | |
dbDisconnect(db) | |
geocities = geocities[order(X30s_02_mean)] | |
# split up data into regions | |
splitdt = split(geocities, geocities$c) | |
# function to get the total number of regions in the list | |
NROW(splitdt) | |
# function to get the total number of rows in a given region | |
NROW(splitdt[[1]]) | |
## setup | |
dat=geocities[FALSE,][] | |
region_pointer=1 | |
while (region_pointer <= NROW(splitdt)){ | |
## setup new region | |
origin_pointer=1 | |
region_data = as.data.frame(splitdt[[region_pointer]]) | |
region_data$remove = FALSE | |
setDT(region_data) | |
## process a region | |
while (origin_pointer <= NROW(region_data)) { | |
# choose which row to use | |
# as the first part of the distance formula | |
origin_data = region_data[,c("longitude","latitude")] %>% slice(origin_pointer) ## set LeadingRow city | |
setDT(origin_data) | |
# calculate the distance from the specific row chosen | |
# and only keep ones which are further than 5.4km | |
region_data = region_data %>% rowwise() %>% mutate(remove = ifelse(distHaversine(c(longitude,latitude), origin_data) < 5400, | |
TRUE, region_data$remove) ) # keep origin_data city | |
# remove matched cities | |
region_data = region_data[region_data$remove!=TRUE,] | |
print(origin_pointer/NROW(splitdt[[region_pointer]])*100) | |
print(NROW(region_data)/NROW(splitdt[[region_pointer]])*100) | |
origin_pointer = origin_pointer+1 | |
} | |
region_pointer = region_pointer+1 | |
print('#####################################') | |
print(region_pointer/NROW(splitdt)*100) | |
print('#####################################') | |
# save results | |
region_data = region_data[region_data$remove!=TRUE,] | |
dat = rbind(dat, region_data, fill=TRUE) | |
} | |
fwrite(dat, file = '~/data/cities_processed.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment