Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created December 20, 2018 20:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/0d20c2da8d8e939af012014430bf52f9 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/0d20c2da8d8e939af012014430bf52f9 to your computer and use it in GitHub Desktop.
# Year: 2012
# http://www.runtwolf.com/cc-2012-photo-gallery.html
# clear directory, call libraries
rm(list=ls())
library(rvest)
library(dplyr)
# declare functions
substrRight <- function(x, n){substr(x, nchar(x)-n+1, nchar(x))}
substrLeft <- function(x, n){substr(x, 1, n)}
# get page with list of album names
url = 'http://www.runtwolf.com/cc-2012-photo-gallery.html'
download.file(url, destfile = 'page.html', quiet=TRUE)
content <- read_html('page.html')
# clean list of strings
albums <- content %>% html_nodes('a') %>% html_attr('href')
albums <- albums[grepl('old.runtwolf.com/CC2012/', albums)]
albums <- substr(albums, gregexpr(pattern ='/CC2012/', albums), gregexpr(pattern ='/index.html', albums))
albums <- substr(albums, 9, nchar(albums)-1)
album_urls <- paste('http://www.old.runtwolf.com/cc2012/', albums, '/thumbnails.html', sep = "")
# Session 1
albums <- albums[1:30]
album_urls <- album_urls[1:30]
# Session 2
#albums <- albums[31:60]
#album_urls <- album_urls[31:60]
# Session 3
#albums <- albums[61:length(albums)]
#album_urls <- album_urls[61:length(album_urls)]
##### DOWNLOAD PHOTOS FROM EACH ALBUM #####
mainDir <- '/Users/erikgregorywebb/Documents/Python/runtwolf/R/2012/XC'
for (i in 1:length(album_urls)) {
setwd(file.path(mainDir))
Sys.sleep(1)
download.file(album_urls[i], destfile = 'page.html', quiet=TRUE)
content <- read_html('page.html')
images <- content %>% html_nodes('img') %>% html_attr('src')
images <- substrRight(images, 12)
subDir = albums[i]
dir.create(file.path(mainDir, subDir))
setwd(file.path(mainDir, subDir))
for (j in 1:length(images)) {
Sys.sleep(1)
url = paste('http://old.runtwolf.com/CC2012/', albums[i], '/images/', images[j], sep = "")
print(url)
try(download.file(url, destfile = paste(paste(albums[i], j, sep = "-"), ".jpg", sep = ""), mode = "wb", quiet = T))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment