Created
February 21, 2017 02:44
-
-
Save yvlau92/1b7d1c98fb747c29a1d7d8146fe77d41 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# auxiliary function to compute cosine similarity between two vectors | |
cossim <- function(x,y){ | |
return (sum(x*y)/sqrt(sum(x*x))/sqrt(sum(y*y))) | |
} | |
# "recommend" takes in a query(qr), number of products to return (number) | |
# and a dataframe of precalculated tf-idf for skincare products(dt) | |
recommend <- function(qr,number,dt){ | |
# product_words that contains words in the query | |
product_words_sub <- dt%>% | |
filter(word %in% qr$word)%>% | |
select(word,Product,tf_idf) | |
# spread dataframe into column for Product | |
product_tf_idf <- spread(product_words_sub , key = Product, value = tf_idf) | |
# Assign zero to NA to perform cosine similarity operation | |
product_tf_idf[is.na(product_tf_idf)] <- 0 | |
product_tf_idf <- product_tf_idf[order(product_tf_idf$word),] | |
# label a third row for the cossine similarity values | |
product_tf_idf[dim(product_tf_idf )[1]+1,1] = "cossim" | |
# perform cosine similarity with query | |
for(i in 2:dim(product_tf_idf)[2]){ | |
product_tf_idf[dim(product_tf_idf)[1],i] = | |
cossim(product_tf_idf[1:dim(product_tf_idf)[1]-1,i],qr$tf_idf) | |
} | |
# manipulate data to be returned as data frame | |
result <- t(product_tf_idf[,-1]) | |
colnames(result) <- product_tf_idf$word | |
result<-as.data.frame(result) | |
result$Product <- rownames(result) | |
#arrange and return top_n(number) results | |
result <- result%>% | |
select(Product, cossim)%>% | |
arrange(desc(cossim))%>% | |
top_n(number) | |
# restrict the amount of returned values to "number" in case of ties | |
if(dim(result)[1] > number){ | |
return (result[1:number,]) | |
}else { | |
return (result) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment