Last active
February 10, 2017 21:39
-
-
Save ngopal/4bc59560ad80fdfb2e08823d18099d1b to your computer and use it in GitHub Desktop.
A very quick and surface-level analysis of wines using the wines.com API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rjson) | |
apikey = 'REDACTED' # please get one from wines.com | |
url <- paste('http://services.wine.com/api/beta2/service.svc/json/catalog?filter=categories(614)&offset=0&size=100&apikey=',apikey,sep="") | |
document <- fromJSON(file=url, method='C') | |
# I am pulling down light and crispy white wine data above | |
ds <- matrix(0, 100, 3) | |
for (i in 1:length(document$Products$List)) { | |
ds[i,] <- c(as.numeric(document$Products$List[[i]]$PriceRetail), | |
as.numeric(document$Products$List[[i]]$Ratings$HighestScore), | |
as.numeric(document$Products$List[[i]]$Vintage)) | |
} | |
colnames(ds) <- c("price", "rating", "vintage") | |
ds <- as.data.frame(ds) | |
ds <- ds[which(ds$rating != 0),] | |
lm1 <- lm(price ~ rating, data=ds, na.action = na.omit) | |
summary(lm1) | |
# Call: | |
# lm(formula = price ~ rating, data = ds, na.action = na.omit) | |
# | |
# Residuals: | |
# Min 1Q Median 3Q Max | |
# -27.580 -4.516 -2.241 2.585 80.686 | |
# | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) -455.690 103.979 -4.383 4.70e-05 *** | |
# rating 5.266 1.139 4.624 2.01e-05 *** | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 14.71 on 61 degrees of freedom | |
# Multiple R-squared: 0.2595, Adjusted R-squared: 0.2474 | |
# F-statistic: 21.38 on 1 and 61 DF, p-value: 2.007e-05 | |
# A one unit increase in rating amount to a $5 increase in price | |
plot(ds$rating, ds$price) | |
abline(lm1) | |
lm2 <- lm(rating ~ vintage, data=ds, na.action = na.omit) | |
summary(lm2) | |
# Call: | |
# lm(formula = rating ~ vintage, data = ds, na.action = na.omit) | |
# | |
# Residuals: | |
# Min 1Q Median 3Q Max | |
# -2.4987 -1.0194 -0.4987 0.9806 4.0220 | |
# | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) 1056.7887 529.4167 1.996 0.0504 . | |
# vintage -0.4793 0.2628 -1.824 0.0731 . | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 1.611 on 61 degrees of freedom | |
# Multiple R-squared: 0.0517, Adjusted R-squared: 0.03616 | |
# F-statistic: 3.326 on 1 and 61 DF, p-value: 0.0731 | |
# A one unit increase in vintage year amounts to losing a half rating point | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment