Skip to content

Instantly share code, notes, and snippets.

@luisDVA
Last active November 8, 2017 16:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luisDVA/9c12fff91cf1df47645c03ad224db9bc to your computer and use it in GitHub Desktop.
Save luisDVA/9c12fff91cf1df47645c03ad224db9bc to your computer and use it in GitHub Desktop.
ggpup function modified to take in a vector of dog breeds to search for
# define function
# this code is just for fun and I do not have rights to the images on dogtime.com I just like dogs
ggpupBV <- function(ggplotObject,breedVector){
# required packages
require(dplyr)
require(jpeg)
require(grid)
require(gridExtra)
require(RCurl)
require(rvest)
require(stringi)
require(extrafont)
require(fuzzyjoin)
# scrape a list of image URLS from the dogtime breed profiles homepage
if (!exists("imgurls")){
# read page source
dogIndex <- read_html("http://dogtime.com/dog-breeds/profiles")
# keep node of interest (identified using Selectorgadget)
scrapedHtml <- dogIndex %>% html_nodes(".horizontal-group-listing") %>% toString()
# match image urls
## Find everything that starts with "http"
### No white spaces allowed
### Ends with jpg
#### Note: this regex is for PC
imgurlslist <- stri_match_all_regex(scrapedHtml,"(http[^\\s]+(jpg)\\b)")
# subset into character vector
imgurls <- imgurlslist[[1]][,1]
}
# convert list to df
imgurlsDF <- data.frame(imgurls)
# clean up urls (gradually)
imgurlsDF$breedname <- stri_extract_all_regex(imgurlsDF$imgurls,"(?<=\\_).*?(?=\\.)")
imgurlsDF$breedname <- stri_extract_all_regex(imgurlsDF$breedname,"(?<=\\_).*?(?=300)")
imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,pattern = "-"," ")
imgurlsDF$breedname <- trimws(imgurlsDF$breedname)
imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"dog breed","")
imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"what is a","")
imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"what is the","")
#match provided breed vector with URLS
imgurlsDF <- imgurlsDF %>% filter(!is.na(breedname))
breedVector <- data.frame(breeds=breedVector)
joinedMatches <- stringdist_left_join(breedVector,imgurlsDF, by=c("breeds"="breedname"),max_dist=3)
joinedMatches$imgurls <- as.character(joinedMatches$imgurls)
joinedMatches <- joinedMatches %>% filter(!is.na(imgurls))
# scrape two dog breed photos
urlInd <- sample(nrow(joinedMatches),2,rep=F)
# for upper right corner
dogImg.URLU <- joinedMatches$imgurls[urlInd[1]]
# save as a raster object
dogImgU <- rasterGrob(readJPEG(getURLContent(dogImg.URLU)))
# label for grob
dogImgUlab <- textGrob(paste(stri_trans_totitle(joinedMatches$breedname[urlInd[1]])))
# for lower right corner
dogImg.URLL <- joinedMatches$imgurls[urlInd[2]]
# save as a raster object
dogImgL <- rasterGrob(readJPEG(getURLContent(dogImg.URLL)))
# label for grob
dogImgLlab <- textGrob(paste(stri_trans_totitle(joinedMatches$breedname[urlInd[2]])))
# graphical parameters
# define plot layout
lay <- rbind(c(1,1,2),
c(1,1,3),
c(1,1,4),
c(1,1,5))
# set up some attribution text
# the fontfamily parameter is optional, erase or change to something else to match the fonts available for your system
rightText=textGrob("images from www.dogtime.com", rot=90, gp=gpar(fontfamily = "Roboto Condensed Light"))
# arrange the plot and the image side by side
grid.arrange(ggplotObject, dogImgU, dogImgUlab, dogImgL, dogImgLlab, layout_matrix=lay,widths=c(2,1,1),right=rightText)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment