Created
April 27, 2016 03:45
-
-
Save daranzolin/333970b5919df256c77c85f0cef266f7 to your computer and use it in GitHub Desktop.
Yelp API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(httr) | |
library(jsonlite) | |
library(dplyr) | |
library(knitr) | |
library(ggplot2) | |
#OAuth | |
consumer_key <- "xxxx" | |
consumer_secret <- "xxxx" | |
token <- "xxxx" | |
token_secret <- "xxxx" | |
myapp = oauth_app("YELP", key=consumer_key, secret=consumer_secret) | |
sig=sign_oauth1.0(myapp, token=token,token_secret=token_secret) | |
#Define function | |
get_cool_places <- function(location, desperate = FALSE) { | |
baseURL <- "https://api.yelp.com/v2/search?category_filter=nightlife" | |
location <- paste0("&location=", location) | |
limit <- ifelse(desperate == FALSE, "&limit=5", "&limit=20") | |
radius <- ifelse(desperate == FALSE, "&radius_filter=5000", "&radius_filter=25000") | |
location_data <- GET(paste0(baseURL, location, radius, limit), sig) | |
location_content <- content(location_data, type = "text") | |
jsondat <- fromJSON(location_content) | |
df <- flatten(data.frame(jsondat)) %>% | |
dplyr::filter(businesses.is_closed == FALSE) %>% | |
dplyr::select(businesses.name, businesses.rating, businesses.categories, businesses.location.address, businesses.snippet_text) %>% | |
dplyr::mutate(fun = ifelse(grepl("sexy|party|fun", businesses.snippet_text, ignore.case = TRUE), TRUE, FALSE)) %>% | |
dplyr::select(-businesses.snippet_text) | |
names(df) <- c("Name", "Rating", "Categories", "Address", "Sexy_Fun_Party") | |
if (desperate == TRUE) { | |
df <- df %>% | |
dplyr::arrange(desc(Rating)) | |
} else { | |
df <- df %>% | |
dplyr::filter(as.numeric(Rating) > 3) %>% | |
dplyr::arrange(desc(Rating)) | |
} | |
return(df) | |
} | |
#Define locations, loop through and compare average Yelp reviews | |
places <- c("SunnyvaleCA", "WallaWallaWA", "AustinTX", | |
"DecaturGA", "SanfordNC", "BellinghamWA", "LodiCA", | |
"AngwinCA", "RedlandsCA", "ColtonCA", "MountainViewCA") | |
location_list <- list() | |
for (i in 1:length(places)) { | |
location_list[[i]] <- get_cool_places(places[i], desperate = TRUE) %>% | |
dplyr::summarize(avg_rating = mean(as.numeric(Rating)), | |
hotness = sum(Sexy_Fun_Party == TRUE)) %>% | |
dplyr::mutate(location = places[i]) | |
} | |
location_df <- dplyr::bind_rows(location_list) | |
location_df$location <- factor(location_df$location, | |
levels = location_df$location[order(location_df$avg_rating)]) | |
#Plot average Yelp ratings | |
ggplot(location_df, aes(location, avg_rating, fill = hotness)) + | |
geom_bar(stat = "identity") + | |
labs(x = "Location", y = "Average Yelp Rating") + | |
ggtitle("Average Yelp Ratings by Location and Hotness") + | |
scale_fill_gradient(limits=c(0, 3), low = "blue", high = "red") + | |
theme_minimal() + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment