Skip to content

Instantly share code, notes, and snippets.

# n_infractions: numbers of infractions committed at a particular date of inspection
closure_n_infractions <- closure %>%
group_by(camis,date) %>%
summarise(n_infractions = n())%>%
arrange(desc(n_infractions))
# n_closures: number of closures a restaurant has had within the past 5 years.
closure_n_closures <- closure_n_infractions %>%
group_by(camis)%>%
summarise(n_closures = n())%>%
@yvlau92
yvlau92 / grades.R
Created February 13, 2017 23:14
Distribution of restaurant grades
# Shiny Server Code to render distribution of grades plot
output$grade_plot <- renderGvis({
# transform data for GoogleVis: group by year and grade
grade_only_grouped <- grade_data()%>%
group_by(year,grade)%>%
summarise(n=n())
#melt and cast data into the format for stacked googlevis
mgrade_only <- melt(grade_only_grouped, id = c("grade","year"))
from scrapy.spiders import Spider
from scrapy.http import Request
from scrapy.selector import Selector
from skincare.items import SkincareItem
class SkincareSpider(Spider):
name = "skincare_spider"
allowed_urls = ['http://www.totalbeauty.com/']
start_urls = ["http://www.totalbeauty.com/reviews/face/page%s" % page for page in xrange(1,1703)]
@yvlau92
yvlau92 / tf-idf.R
Last active February 21, 2017 03:12
# tf-idf implementation in R
#### TF-IDF for Products ###
# Calculate frequency of words for each skincare product
product_words <- data %>%
select(Product,ReviewContent)%>%
tidytext::unnest_tokens(word, ReviewContent) %>%
count(Product, word, sort = TRUE) %>%
ungroup()
# auxiliary function to compute cosine similarity between two vectors
cossim <- function(x,y){
return (sum(x*y)/sqrt(sum(x*x))/sqrt(sum(y*y)))
}
# "recommend" takes in a query(qr), number of products to return (number)
# and a dataframe of precalculated tf-idf for skincare products(dt)
recommend <- function(qr,number,dt){
# product_words that contains words in the query