Telvis Calhoun telvis07

## elasticsearch_multi_field_geo_point.sh
curl -XPOST localhost:9200/test -d '
{
  "mappings": {
    "type1": {
      "properties": {
        "message": {
          "index": "analyzed",
          "type": "string"
        },
        "depart": {

## multi_field_nested_test.sh
curl -XDELETE localhost:9200/test

curl -XPOST localhost:9200/test -d '
{
  "mappings": {
    "type1": {
      "properties": {
        "message": {
          "index": "analyzed",
          "type": "string"

## terms-filter-lookup.sh
# index the information for user with id 2, specifically, its friends
curl -XPUT localhost:9200/users/user/2 -d '{
   "friends" : ["1", "3"]
}'

# index a tweet, from user with id 2
curl -XPUT localhost:9200/tweets/tweet/1 -d '{
   "user" : "1",
   "tweet" : "hi i am user 1 "
}'

## kibana-filtered-query.json
{
  "facets": {
    "terms": {
      "facet_filter": {
        "fquery": {
          "query": {
            "filtered": {
              "filter": {
                "bool": {
                  "must": [

## ngram_prune.R
prune_ngram_df_by_cover_percentage <- function(df, percentage) {
  # assumes df contains columns (word, freq)
  # assumes df is sorted by freq in descending order
  # prune ngrams by finding the minimum number of ngrams that cover X percent of the word instances
  sums <- cumsum(df$freq)
  cover <- which(sums >= sum(df$freq) * percentage)[1]
  print(sprintf("%s of %s (%s%%) cover %s%% of word instances",
                cover,
                nrow(df),
                cover/nrow(df)*100,
	curl -XPOST localhost:9200/test -d '
	{
	"mappings": {
	"type1": {
	"properties": {
	"message": {
	"index": "analyzed",
	"type": "string"
	},
	"depart": {
	curl -XDELETE localhost:9200/test

	curl -XPOST localhost:9200/test -d '
	{
	"mappings": {
	"type1": {
	"properties": {
	"message": {
	"index": "analyzed",
	"type": "string"
	# index the information for user with id 2, specifically, its friends
	curl -XPUT localhost:9200/users/user/2 -d '{
	"friends" : ["1", "3"]
	}'

	# index a tweet, from user with id 2
	curl -XPUT localhost:9200/tweets/tweet/1 -d '{
	"user" : "1",
	"tweet" : "hi i am user 1 "
	}'
	{
	"facets": {
	"terms": {
	"facet_filter": {
	"fquery": {
	"query": {
	"filtered": {
	"filter": {
	"bool": {
	"must": [
	prune_ngram_df_by_cover_percentage <- function(df, percentage) {
	# assumes df contains columns (word, freq)
	# assumes df is sorted by freq in descending order
	# prune ngrams by finding the minimum number of ngrams that cover X percent of the word instances
	sums <- cumsum(df$freq)
	cover <- which(sums >= sum(df$freq) * percentage)[1]
	print(sprintf("%s of %s (%s%%) cover %s%% of word instances",
	cover,
	nrow(df),
	cover/nrow(df)*100,