Last active
November 6, 2016 17:29
-
-
Save jlitven/925129de1ecb914ec0dc76e3e9bbc047 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get artist network | |
# Arguments: | |
# artists_ids - list of artist ids | |
# Returns: | |
# list containing artists and edge dataframes | |
get_artist_network = function(artist_ids){ | |
# Get artists details | |
max_ids = min(MAX_NUM_ARTISTS, length(artist_ids)) | |
URI = paste0('https://api.spotify.com/v1/artists?ids=', paste(artist_ids[1:max_ids], collapse=",")) | |
artists = fromJSON(txt=URI)$artists | |
artist_cols = c('id', 'name', 'images', 'genres') | |
artists = artists[, artist_cols] | |
artists$group = 'initial' | |
# Create network | |
edges = data.frame(from=character(), to=character(), stringsAsFactors=FALSE) | |
n_artists = nrow(artists) | |
withProgress(message = 'Building Artist Network', value = 0, { | |
for(artist_id in artists$id){ | |
# Increment the progress bar, and update the detail text. | |
incProgress(1/n_artists) | |
# Find related artists | |
URI = paste0('https://api.spotify.com/v1/artists/', artist_id, '/related-artists') | |
related_artists = fromJSON(txt=URI)$artists | |
if(length(related_artists) == 0){ | |
next | |
} | |
related_artists = related_artists[, artist_cols] | |
related_artists$group = 'related' | |
related_artist_ids = related_artists$id | |
# Create the edges | |
from = rep(artist_id, length(related_artist_ids)) | |
to = related_artist_ids | |
edges = rbind(edges, data.frame(from, to, stringsAsFactors = FALSE)) | |
# Append to artists data frame | |
# Filter out artists already stored | |
related_artists = | |
related_artists %>% | |
filter(!id %in% artists$id) | |
artists = rbind(artists, related_artists) | |
} | |
}) | |
print("Getting Unique artists..") | |
# Get unique artists and edges | |
artists = unique(artists) | |
edges = unique(edges) | |
# Remove all artists with only one connection in the network | |
temp = data.frame(id=c(edges$from, edges$to), stringsAsFactors=FALSE) | |
connected_artists = temp %>% group_by(id) %>% filter(n() > 1) %>% summarise(n_edges=n()) | |
filtered_edges = | |
edges %>% | |
filter(from %in% connected_artists$id, to %in% connected_artists$id) | |
filtered_artists = | |
artists %>% | |
filter(id %in% connected_artists$id) | |
filtered_artists = inner_join(filtered_artists, connected_artists, by=c("id")) | |
# Convert images to one url | |
print("Converting images to one url..") | |
filtered_artists$image = as.character(sapply(filtered_artists$images, function(image) tail(image$url, n=1))) | |
filtered_artists$images = NULL | |
# Convert genres to string | |
filtered_artists$genres = sapply(filtered_artists$genres, function(genre) paste(genre, collapse=', ')) | |
return(list(filtered_artists, filtered_edges)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment