Skip to content

Instantly share code, notes, and snippets.

@andland
Last active August 29, 2015 14:18
Show Gist options
  • Save andland/52f8a97a111dcd8778aa to your computer and use it in GitHub Desktop.
Save andland/52f8a97a111dcd8778aa to your computer and use it in GitHub Desktop.
Scrape Amazon's Trade-In Value
library(rvest)
urls_df = read.csv("AmazonBookURLs.csv", stringsAsFactors = FALSE, comment.char = "")
load(file = "AmazonBookPrices.RData")
price_df_temp = data.frame(Title = urls_df$Title,
Date = Sys.time(),
Price = NA_real_, stringsAsFactors = FALSE)
for (i in 1:nrow(urls_df)) {
tradein_html = urls_df$URL[i] %>% html() %>%
html_node("#tradeInButton_tradeInValue")
if (is.null(tradein_html)) {
next
}
price = tradein_html %>%
html_text() %>%
gsub("(^[[:space:]]+\\$|[[:space:]]+$)", "", .) %>%
as.numeric()
price_df_temp$Price[i] = price
}
price_df = rbind(price_df, price_df_temp)
save(price_df, file = "AmazonBookPrices.RData")
Title URL
Data Clustering C++ http://www.amazon.com/Data-Clustering-Object-Oriented-Knowledge-Discovery/dp/1439862230
Transportation Statistics and Microsimulation http://www.amazon.com/Transportation-Statistics-Microsimulation-Clifford-Spiegelman/dp/1439800235
Fundamentals of Transportation and Traffic Operations http://www.amazon.com/Fundamentals-Transportation-Traffic-Operations-Daganzo/dp/0080427855
A First Course in Stochastic Processes http://www.amazon.com/First-Course-Stochastic-Processes-Second/dp/0123985528
A Probability Path http://www.amazon.com/A-Probability-Path-Sidney-Resnick/dp/081764055X
A Primer on Linear Models http://www.amazon.com/Primer-Linear-Chapman-Statistical-Science/dp/1420062018
Statistical Approach to Genetic Epidemiology http://www.amazon.com/Statistical-Approach-Genetic-Epidemiology-Applications/dp/3527323899
Intro Trans Engineering http://www.amazon.com/Introduction-Transportation-Engineering-Banks-James/dp/0072431881
library(ggplot2); theme_set(theme_bw())
library(scales)
price_df$TitleTrunc = paste0(substring(price_df$TitleTrunc, 1, 30), ifelse(nchar(price_df$Title) > 30, "...", ""))
ggplot(price_df, aes(Date, Price)) +
geom_step() + geom_point() + facet_wrap(~ TitleTrunc, scales = "free_y") +
scale_y_continuous(labels = dollar) + theme(axis.text.x = element_text(angle = 90, vjust = 0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment