Skip to content

Instantly share code, notes, and snippets.

@awhstin
Last active March 25, 2016 15:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save awhstin/e9ebe08006a775d4d093 to your computer and use it in GitHub Desktop.
Save awhstin/e9ebe08006a775d4d093 to your computer and use it in GitHub Desktop.
#This code is an addendum to the fantastic code Hadley (rvest/demo/tripadvisor.R) put out on Trip Advisor scraping. It is a for-loop specifically for Attraction
#related entities on Trip Advisor, and runs off a CSV of urls for your attraction.
library(rvest)
testurl <- read.csv("url.csv", header=FALSE, quote="'", stringsAsFactors = F)
list<-unlist(testurl)
tripadvisor <- NULL
for(i in 1:length(list)){
reviews <- list[i] %>%
read_html() %>%
html_nodes("#REVIEWS .innerBubble")
id <- reviews %>%
html_node(".quote a") %>%
html_attr("id")
quote <- reviews %>%
html_node(".quote span") %>%
html_text()
rating <- reviews %>%
html_node(".rating .rating_s_fill") %>%
html_attr("alt") %>%
gsub(" of 5 stars", "", .) %>%
as.integer()
date <- reviews %>%
# html_node(".rating .ratingDate") %>%
# html_text()
html_node(".rating .ratingDate") %>%
html_attr("title") %>%
strptime("%b %d, %Y") %>%
as.POSIXct()
review <- reviews %>%
html_node(".entry .partial_entry") %>%
html_text()%>%
as.character()
member <- list[i] %>%
read_html() %>%
html_nodes("#REVIEWS .col1of2")
location <- member %>%
html_node(".location") %>%
html_text()
rowthing <-data.frame(id, quote, rating, review, date, location, stringsAsFactors = FALSE)
tripadvisor<-rbind(rowthing, tripadvisor)
}
#grab overall ratings
url<-('https://www.tripadvisor.com/Attraction_Review-g35805-d103239-Reviews-or500-Art_Institute_of_Chicago-Chicago_Illinois.html#REVIEWS')
totals <- list %>%
read_html() %>%
html_nodes(".main_content .barChart")
ratings <- totals %>%
html_nodes(".part") %>%
html_text() %>%
t(data.frame())
#export
write.csv(tripadvisor,file='TripAdvisor.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment