Skip to content

Instantly share code, notes, and snippets.

yvlau92

Block or report user

Report or block yvlau92

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View skincare-recommend.R
# auxiliary function to compute cosine similarity between two vectors
cossim <- function(x,y){
return (sum(x*y)/sqrt(sum(x*x))/sqrt(sum(y*y)))
}
# "recommend" takes in a query(qr), number of products to return (number)
# and a dataframe of precalculated tf-idf for skincare products(dt)
recommend <- function(qr,number,dt){
# product_words that contains words in the query
View tf-idf.R
# tf-idf implementation in R
#### TF-IDF for Products ###
# Calculate frequency of words for each skincare product
product_words <- data %>%
select(Product,ReviewContent)%>%
tidytext::unnest_tokens(word, ReviewContent) %>%
count(Product, word, sort = TRUE) %>%
ungroup()
View skincare.py
from scrapy.spiders import Spider
from scrapy.http import Request
from scrapy.selector import Selector
from skincare.items import SkincareItem
class SkincareSpider(Spider):
name = "skincare_spider"
allowed_urls = ['http://www.totalbeauty.com/']
start_urls = ["http://www.totalbeauty.com/reviews/face/page%s" % page for page in xrange(1,1703)]
@yvlau92
yvlau92 / grades.R
Created Feb 13, 2017
Distribution of restaurant grades
View grades.R
# Shiny Server Code to render distribution of grades plot
output$grade_plot <- renderGvis({
# transform data for GoogleVis: group by year and grade
grade_only_grouped <- grade_data()%>%
group_by(year,grade)%>%
summarise(n=n())
#melt and cast data into the format for stacked googlevis
mgrade_only <- melt(grade_only_grouped, id = c("grade","year"))
View project1_blog.R
# n_infractions: numbers of infractions committed at a particular date of inspection
closure_n_infractions <- closure %>%
group_by(camis,date) %>%
summarise(n_infractions = n())%>%
arrange(desc(n_infractions))
# n_closures: number of closures a restaurant has had within the past 5 years.
closure_n_closures <- closure_n_infractions %>%
group_by(camis)%>%
summarise(n_closures = n())%>%
You can’t perform that action at this time.