Created
May 22, 2018 14:11
-
-
Save a-lexwein/967203b72fae28fda5d8f14877d46efe to your computer and use it in GitHub Desktop.
Recursively maps intervals to non-overlapping segments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(lubridate) | |
library(ggplot2) | |
library(dplyr) | |
library(viridis) | |
## takes playlist list and returns min and max track_added_at for each playlist: | |
endpoints <- lol %>% | |
filter(playlist_owner == user_id) %>% | |
mutate(track_added_at = as.Date(track_added_at)) %>% | |
filter(track_added_at <= Sys.Date()) %>% | |
select(playlist_id, playlist_name, track_added_at) %>% | |
group_by(playlist_id, playlist_name) %>% | |
summarise(n_tracks = n(), start = min(track_added_at), end = max(track_added_at)) %>% | |
ungroup() | |
endpoints <- filter(endpoints, year(start) >= 2012) | |
## Initializing the recursive process. Takes a data frame of intervals, and sorts them into groups within which the intervals don't overlap. | |
## starts with the first interval, finds the next earliest interval whose start is greater that the first interval's endpoint. | |
## repeats process until we get to last non-overlapping interval. | |
## increments position, and starts with earliest remaining interval... | |
## Starts with a, a dataframe we continue to remove the first element from. | |
# a requires rows that contain intervals based on the variables start and end | |
a <- arrange(endpoints, start) | |
## empty dataset with the same fields as a, output will be same rows, with an additional | |
output <- a %>% filter(FALSE) | |
output <- output %>% mutate(position = 0) | |
# counter that maps to output$position | |
pos = 0 | |
# Recursive function: | |
place_item <- function(x) { | |
if (nrow(a) > 0) { | |
## Take the first row, of | |
b <- filter(x, row_number() == 1) %>% mutate(position = pos) | |
## map row to the output | |
output <<- bind_rows(output, b) | |
a <<- filter(a, playlist_id != b$playlist_id) | |
# takes all intervals | |
c <- filter(a, start > max(b$end)) | |
if (nrow(c) > 0) { | |
place_item(c) | |
} | |
{ | |
pos <<- pos + 1 | |
place_item(a) | |
} | |
} | |
} | |
# run the recursion | |
x <- place_item(a) | |
theme_void_alt <- function (base_size = 12, base_family = "") | |
{ | |
theme(rect = element_blank(), text = element_text(family = base_family, | |
face = "plain", colour = "black", size = base_size, lineheight = 0.9, | |
hjust = 0.5, vjust = 0.5, angle = 0, margin = margin(), | |
debug = FALSE), plot.margin = unit(c(0, 0, 0, 0), "lines"), | |
axis.text.x = element_blank(), axis.text.y = element_blank(), | |
axis.title.x = element_blank(), axis.title.y = element_blank(), | |
legend.text = element_text(size = rel(0.8)), legend.title = element_blank(), | |
axis.ticks = element_blank(), | |
strip.text = element_text(size = rel(0.8)), complete = TRUE) | |
} | |
output %>% | |
ggplot(aes(x = -position, ymin = start, ymax = end)) + geom_linerange(size = 5) + | |
coord_flip() + | |
theme_void() + theme(axis.text.x = element_text(), plot.margin = margin(0, 0, 10, 10)) | |
output_b <- output %>% | |
mutate(diff = end - start) %>% | |
arrange(desc(diff)) %>% | |
mutate(i = row_number(), | |
label = ifelse(i <= 5, playlist_name, ""), | |
col = ifelse(i <= 10, -i, -20), | |
middate = start + diff/2) | |
output_b %>% | |
filter(position <= 8) %>% | |
ggplot(aes(x = -position, ymin = start, ymax = end+1,y=middate, label = label)) + geom_linerange(aes(color = col),size = 7) + | |
coord_flip() + geom_text(color = viridis(1), fontface = 'bold') + | |
scale_color_viridis() + | |
theme_void_alt() + | |
theme(axis.text.x = element_text(), | |
plot.margin = margin(0, 0, 10, 10), | |
legend.position = 'none', | |
panel.grid.major.x = element_line(color = "gray") | |
) + ggtitle("X's Spotify Playlists", "Duration from playlist creation to last update") | |
# | |
# output_b %>% | |
# filter(position <= 8) %>% | |
# ggplot(aes(x = -position, ymin = start, ymax = end+1,y=middate, label = label)) + geom_linerange(aes(color = col),size = 7) + | |
# coord_flip() + geom_text(color = viridis(1), fontface = 'bold') + | |
# scale_color_viridis() + | |
# theme_void_alt() + | |
# theme(axis.text.x = element_text(), | |
# plot.margin = margin(0, 0, 10, 10), | |
# legend.position = 'none', | |
# panel.grid.major.x = element_line(color = "gray") + ggtitle("Alex's Spotify Playlists") | |
# lol %>% | |
# ggplot(aes(x=as.Date(track_added_at))) + geom_density() | |
# | |
#lol <- data.frame(do.call(rbind, list_of_lists)) | |
#lol$track_artists_mess[2][[1]] | |
filter(lol, playlist_owner = user_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment