Skip to content

Instantly share code, notes, and snippets.

@rafapereirabr
Last active June 21, 2021 21:48
Show Gist options
  • Save rafapereirabr/2ab8dfbebf237fa243e26c8d7ce5e97e to your computer and use it in GitHub Desktop.
Save rafapereirabr/2ab8dfbebf237fa243e26c8d7ce5e97e to your computer and use it in GitHub Desktop.
categorize date using jenks natural breaks
library(stringr)
library(stringi)
library(ggplot2)
library(viridis)
library(data.table)
library(BAMMtools) # fast calculation of jenks natural breaks
library(ggt)
# load data
data("iris")
# create Natural Jenks function
jenks_natural <- function(data, var, breaks){
# data <- copy(iris)
# var <- "Petal.Length"
# breaks <- 5
# conver df to data.table
setDT(data)
# name of new column
newvar <- paste0(var,"_jenks")
# calculate jenks natural breaks
data[, paste0(newvar) := as.character(cut(get(var), breaks= getJenksBreaks(get(var), breaks), include.lowest = TRUE, dig.lab=3)) ]
# Edit factor text
data[, paste0(newvar) := str_replace_all(get(newvar), "\\[|\\(|\\]", "") ]
data[, paste0(newvar) := stri_replace_all_regex(get(newvar), "[,]", " - ") ]
# get factor labels
jenks_labels <- data[, get(newvar)] %>% table %>% names() %>% sort(decreasing = F)
# recode variable
data[, paste0(newvar) := factor(get(newvar), levels = jenks_labels)]
return(data)
}
# apply function
iris <- jenks_natural(iris, "Petal.Length", 5)
# plot using continuous values
ggplot(iris, aes(x=Sepal.Length, y=Petal.Length, color=Petal.Length)) +
scale_color_viridis( ) +
geom_point() +
theme_bw()
# plot using discrete values
ggplot(iris, aes(x=Sepal.Length, y=Petal.Length, color=Petal.Length_jenks)) +
scale_color_viridis( discrete = T) +
geom_point() +
theme_bw()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment