Skip to content

Instantly share code, notes, and snippets.

@ercas
Last active July 21, 2021 15:58
Show Gist options
  • Save ercas/f969bf8854ec8721c3283d64f228f040 to your computer and use it in GitHub Desktop.
Save ercas/f969bf8854ec8721c3283d64f228f040 to your computer and use it in GitHub Desktop.
create graphs of listenbrainz data
library(dplyr)
library(ggplot2)
library(jsonlite)
library(lubridate)
library(stringr)
# load data ---------------------------------------------------------------
data <- fromJSON("ercas_lb-2021-07-21.json") %>%
flatten() %>%
mutate(
listened_at = with_tz(
as_datetime(listened_at),
system("readlink -f /etc/localtime | grep -o '[^/]*/[^/]*$'", intern = TRUE)
)
) %>%
filter(listened_at >= as_datetime("2020-01-01"))
# top artists -------------------------------------------------------------
n_artists <- 10
top_artists <- data$track_metadata.artist_name %>%
table() %>%
as.data.frame() %>%
arrange(desc(Freq)) %>%
.$. %>%
head(n_artists)
data %>%
mutate(
Artist = ifelse(
#track_metadata.artist_name %in% top_artists,
#track_metadata.artist_name,
str_detect(track_metadata.artist_name, paste(top_artists, collapse = "|")),
str_extract(track_metadata.artist_name, paste(top_artists, collapse = "|")),
"All others"
)
) %>%
ggplot() +
aes(x = listened_at, fill = Artist) +
geom_density(position = "stack", color = NA) +
labs(
x = "Date",
y = "Density",
title = sprintf("Top %d most listened-to artists over time", n_artists)
) +
scale_fill_brewer(palette = "Spectral") +
theme_minimal() +
scale_y_continuous(breaks = NULL)
# top albums --------------------------------------------------------------
n_albums <- 10
top_albums <- data$track_metadata.release_name %>%
table() %>%
as.data.frame() %>%
arrange(desc(Freq)) %>%
.$. %>%
head(n_albums)
data %>%
mutate(
Album = ifelse(
track_metadata.release_name %in% top_albums,
ifelse(
str_starts(track_metadata.release_name, "The Idler Wheel"),
"The Idler Wheel (...)",
track_metadata.release_name
),
"All others"
)
) %>%
ggplot() +
aes(x = listened_at, fill = Album) +
geom_density(position = "stack", color = NA) +
labs(
x = "Date",
y = "Density",
title = sprintf("Top %d most listened-to albums over time", n_albums)
) +
scale_fill_brewer(palette = "Spectral") +
theme_minimal() +
scale_y_continuous(breaks = NULL)
# top listening times -----------------------------------------------------
weeks_since_epoch <- function(dt) {
return(floor(as.numeric(dt) / (60*60*24*7)))
}
data %>%
mutate(
week = weeks_since_epoch(listened_at),
hour = hour(listened_at)
) %>%
group_by(week, hour) %>%
summarize(
week_of = floor_date(min(listened_at) - 1, "weeks") + 1,
listens = n()
) %>%
ggplot() +
aes(week_of, hour, fill = log(listens)) +
geom_tile() +
scale_fill_viridis_c() +
labs(
x = "Week",
y = "Hour",
title = "Most active listening times (note log scale)"
) +
scale_x_datetime(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme(
# viridis: https://github.com/BIDS/colormap/blob/master/colormaps.py#L788
panel.background = element_rect(fill = rgb(0.267004, 0.004874, 0.329415)),
panel.grid = element_blank()
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment