This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# n_infractions: numbers of infractions committed at a particular date of inspection | |
closure_n_infractions <- closure %>% | |
group_by(camis,date) %>% | |
summarise(n_infractions = n())%>% | |
arrange(desc(n_infractions)) | |
# n_closures: number of closures a restaurant has had within the past 5 years. | |
closure_n_closures <- closure_n_infractions %>% | |
group_by(camis)%>% | |
summarise(n_closures = n())%>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tf-idf implementation in R | |
#### TF-IDF for Products ### | |
# Calculate frequency of words for each skincare product | |
product_words <- data %>% | |
select(Product,ReviewContent)%>% | |
tidytext::unnest_tokens(word, ReviewContent) %>% | |
count(Product, word, sort = TRUE) %>% | |
ungroup() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# auxiliary function to compute cosine similarity between two vectors | |
cossim <- function(x,y){ | |
return (sum(x*y)/sqrt(sum(x*x))/sqrt(sum(y*y))) | |
} | |
# "recommend" takes in a query(qr), number of products to return (number) | |
# and a dataframe of precalculated tf-idf for skincare products(dt) | |
recommend <- function(qr,number,dt){ | |
# product_words that contains words in the query |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.spiders import Spider | |
from scrapy.http import Request | |
from scrapy.selector import Selector | |
from skincare.items import SkincareItem | |
class SkincareSpider(Spider): | |
name = "skincare_spider" | |
allowed_urls = ['http://www.totalbeauty.com/'] | |
start_urls = ["http://www.totalbeauty.com/reviews/face/page%s" % page for page in xrange(1,1703)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Shiny Server Code to render distribution of grades plot | |
output$grade_plot <- renderGvis({ | |
# transform data for GoogleVis: group by year and grade | |
grade_only_grouped <- grade_data()%>% | |
group_by(year,grade)%>% | |
summarise(n=n()) | |
#melt and cast data into the format for stacked googlevis | |
mgrade_only <- melt(grade_only_grouped, id = c("grade","year")) |