Last active
June 10, 2018 09:00
-
-
Save dsilvadeepal/f7500b44800ff9aeea568cae058e17e4 to your computer and use it in GitHub Desktop.
Extracting Popular Songs and Lyrics of the top 10 Artists
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Format the link to navigate to the artists genius webpage | |
genius_urls <- paste0("https://genius.com/artists/",top_artists$Artist) | |
#Initialize a tibble to store the results | |
artist_lyrics <- tibble() | |
# Outer loop to get the song links for each artist | |
for (i in 1:10) { | |
genius_page <- read_html(genius_urls[i]) | |
song_links <- html_nodes(genius_page, ".mini_card_grid-song a") %>% | |
html_attr("href") | |
#Inner loop to get the Song Name and Lyrics from the Song Link | |
for (j in 1:10) { | |
# Get lyrics | |
lyrics_scraped <- read_html(song_links[j]) %>% | |
html_nodes("div.lyrics p") %>% | |
html_text() | |
# Get song name | |
song_name <- read_html(song_links[j]) %>% | |
html_nodes("h1.header_with_cover_art-primary_info-title") %>% | |
html_text() | |
# Save the details to a tibble | |
artist_lyrics <- rbind(artist_lyrics, tibble(Rank = top_artists$Rank[i], | |
Artist = top_artists$Artist[i], | |
Song = song_name, | |
Lyrics = lyrics_scraped )) | |
# Insert a time lag - to prevent me from getting booted from the site :) | |
Sys.sleep(10) | |
} | |
} | |
#Inspect the results | |
artist_lyrics |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment