Skip to content

Instantly share code, notes, and snippets.

@chapmanjacobd
Last active May 28, 2019 06:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chapmanjacobd/dd299c8636062095590d54916b625bc7 to your computer and use it in GitHub Desktop.
Save chapmanjacobd/dd299c8636062095590d54916b625bc7 to your computer and use it in GitHub Desktop.
slow way to tease out distinct areas from overlapping areas
### =delete overlapping buffers= ###
options("scipen"=100)
require(varhandle)
require(data.table)
require(tidyr)
require(tidyverse)
require(dplyr)
require(DBI)
require(RSQLite)
require(sp)
require(gmt)
require(stringr)
require(geosphere)
db = dbConnect(RSQLite::SQLite(), "~/cities.sqlite")
dbListTables(db)
geocities = dbReadTable(db,"cities")
setDT(geocities)
dbDisconnect(db)
geocities = geocities[order(X30s_02_mean)]
# split up data into regions
splitdt = split(geocities, geocities$c)
# function to get the total number of regions in the list
NROW(splitdt)
# function to get the total number of rows in a given region
NROW(splitdt[[1]])
## setup
dat=geocities[FALSE,][]
region_pointer=1
while (region_pointer <= NROW(splitdt)){
## setup new region
origin_pointer=1
region_data = as.data.frame(splitdt[[region_pointer]])
region_data$remove = FALSE
setDT(region_data)
## process a region
while (origin_pointer <= NROW(region_data)) {
# choose which row to use
# as the first part of the distance formula
origin_data = region_data[,c("longitude","latitude")] %>% slice(origin_pointer) ## set LeadingRow city
setDT(origin_data)
# calculate the distance from the specific row chosen
# and only keep ones which are further than 5.4km
region_data = region_data %>% rowwise() %>% mutate(remove = ifelse(distHaversine(c(longitude,latitude), origin_data) < 5400,
TRUE, region_data$remove) ) # keep origin_data city
# remove matched cities
region_data = region_data[region_data$remove!=TRUE,]
print(origin_pointer/NROW(splitdt[[region_pointer]])*100)
print(NROW(region_data)/NROW(splitdt[[region_pointer]])*100)
origin_pointer = origin_pointer+1
}
region_pointer = region_pointer+1
print('#####################################')
print(region_pointer/NROW(splitdt)*100)
print('#####################################')
# save results
region_data = region_data[region_data$remove!=TRUE,]
dat = rbind(dat, region_data, fill=TRUE)
}
fwrite(dat, file = '~/data/cities_processed.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment