Skip to content

Instantly share code, notes, and snippets.

@yvlau92
Created February 21, 2017 02:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yvlau92/1b7d1c98fb747c29a1d7d8146fe77d41 to your computer and use it in GitHub Desktop.
Save yvlau92/1b7d1c98fb747c29a1d7d8146fe77d41 to your computer and use it in GitHub Desktop.
# auxiliary function to compute cosine similarity between two vectors
cossim <- function(x,y){
return (sum(x*y)/sqrt(sum(x*x))/sqrt(sum(y*y)))
}
# "recommend" takes in a query(qr), number of products to return (number)
# and a dataframe of precalculated tf-idf for skincare products(dt)
recommend <- function(qr,number,dt){
# product_words that contains words in the query
product_words_sub <- dt%>%
filter(word %in% qr$word)%>%
select(word,Product,tf_idf)
# spread dataframe into column for Product
product_tf_idf <- spread(product_words_sub , key = Product, value = tf_idf)
# Assign zero to NA to perform cosine similarity operation
product_tf_idf[is.na(product_tf_idf)] <- 0
product_tf_idf <- product_tf_idf[order(product_tf_idf$word),]
# label a third row for the cossine similarity values
product_tf_idf[dim(product_tf_idf )[1]+1,1] = "cossim"
# perform cosine similarity with query
for(i in 2:dim(product_tf_idf)[2]){
product_tf_idf[dim(product_tf_idf)[1],i] =
cossim(product_tf_idf[1:dim(product_tf_idf)[1]-1,i],qr$tf_idf)
}
# manipulate data to be returned as data frame
result <- t(product_tf_idf[,-1])
colnames(result) <- product_tf_idf$word
result<-as.data.frame(result)
result$Product <- rownames(result)
#arrange and return top_n(number) results
result <- result%>%
select(Product, cossim)%>%
arrange(desc(cossim))%>%
top_n(number)
# restrict the amount of returned values to "number" in case of ties
if(dim(result)[1] > number){
return (result[1:number,])
}else {
return (result)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment