This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prune_ngram_df_by_cover_percentage <- function(df, percentage) { | |
# assumes df contains columns (word, freq) | |
# assumes df is sorted by freq in descending order | |
# prune ngrams by finding the minimum number of ngrams that cover X percent of the word instances | |
sums <- cumsum(df$freq) | |
cover <- which(sums >= sum(df$freq) * percentage)[1] | |
print(sprintf("%s of %s (%s%%) cover %s%% of word instances", | |
cover, | |
nrow(df), | |
cover/nrow(df)*100, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"facets": { | |
"terms": { | |
"facet_filter": { | |
"fquery": { | |
"query": { | |
"filtered": { | |
"filter": { | |
"bool": { | |
"must": [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# index the information for user with id 2, specifically, its friends | |
curl -XPUT localhost:9200/users/user/2 -d '{ | |
"friends" : ["1", "3"] | |
}' | |
# index a tweet, from user with id 2 | |
curl -XPUT localhost:9200/tweets/tweet/1 -d '{ | |
"user" : "1", | |
"tweet" : "hi i am user 1 " | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XDELETE localhost:9200/test | |
curl -XPOST localhost:9200/test -d ' | |
{ | |
"mappings": { | |
"type1": { | |
"properties": { | |
"message": { | |
"index": "analyzed", | |
"type": "string" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XPOST localhost:9200/test -d ' | |
{ | |
"mappings": { | |
"type1": { | |
"properties": { | |
"message": { | |
"index": "analyzed", | |
"type": "string" | |
}, | |
"depart": { |