View ngram_prune.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prune_ngram_df_by_cover_percentage <- function(df, percentage) { | |
# assumes df contains columns (word, freq) | |
# assumes df is sorted by freq in descending order | |
# prune ngrams by finding the minimum number of ngrams that cover X percent of the word instances | |
sums <- cumsum(df$freq) | |
cover <- which(sums >= sum(df$freq) * percentage)[1] | |
print(sprintf("%s of %s (%s%%) cover %s%% of word instances", | |
cover, | |
nrow(df), | |
cover/nrow(df)*100, |
View kibana-filtered-query.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"facets": { | |
"terms": { | |
"facet_filter": { | |
"fquery": { | |
"query": { | |
"filtered": { | |
"filter": { | |
"bool": { | |
"must": [ |
View terms-filter-lookup.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# index the information for user with id 2, specifically, its friends | |
curl -XPUT localhost:9200/users/user/2 -d '{ | |
"friends" : ["1", "3"] | |
}' | |
# index a tweet, from user with id 2 | |
curl -XPUT localhost:9200/tweets/tweet/1 -d '{ | |
"user" : "1", | |
"tweet" : "hi i am user 1 " | |
}' |
View multi_field_nested_test.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XDELETE localhost:9200/test | |
curl -XPOST localhost:9200/test -d ' | |
{ | |
"mappings": { | |
"type1": { | |
"properties": { | |
"message": { | |
"index": "analyzed", | |
"type": "string" |
View elasticsearch_multi_field_geo_point.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XPOST localhost:9200/test -d ' | |
{ | |
"mappings": { | |
"type1": { | |
"properties": { | |
"message": { | |
"index": "analyzed", | |
"type": "string" | |
}, | |
"depart": { |