mbarretta/gist:f012f5e78ff71200e819ebfa0bddb544

## gistfile1.txt
PUT my_index
{
  "mappings": {
    "doc": {
      "properties": {
        "securityTags": {
          "type": "keyword",
          "fields": {
            "length": {
              "type": "token_count",
              "analyzer": "standard"
            }
          }
        }
      }
    }
  }
}


POST /my_index/doc
{
    "sensitiveData": "Dave Erickson lives in Wahshington DC",
    "securityTags": ["DaveLocation", "DaveFullName"]
}

# this should return no hits since we're only supplying one term
GET my_index/_search
{
  "query": {
    "terms_set": {
      "securityTags": {
        "terms": ["DaveLocation"],
        "minimum_should_match_field": "securityTags.length"
      }
    }
  }
}

# this demonstrates that the token_count is not counting the total tokens in the securityTag list
GET my_index/_search
{
  "query": {
    "term": {
      "securityTags.length": {
        "value": 1
      }
    }
  }
}

# see if there is a single value for securityTags.length by putting a new doc with a two-token term
POST /my_index/doc
{
    "sensitiveData": "Dave Erickson lives in Wahshington DC",
    "securityTags": ["DaveLocation", "DaveFullName", "two tokens"]
}

# looks like we get all the docs
GET my_index/_search
{
  "query": {
    "term": {
      "securityTags.length": {
        "value": 1
      }
    }
  }
}

# ...and that a length value is stored for each securityTag: the fields mapping turns securityTag into an object with all its lucene-flattening consequences
GET my_index/_search
{
  "query": {
    "term": {
      "securityTags.length": {
        "value": 2
      }
    }
  }
}
	PUT my_index
	{
	"mappings": {
	"doc": {
	"properties": {
	"securityTags": {
	"type": "keyword",
	"fields": {
	"length": {
	"type": "token_count",
	"analyzer": "standard"
	}
	}
	}
	}
	}
	}
	}


	POST /my_index/doc
	{
	"sensitiveData": "Dave Erickson lives in Wahshington DC",
	"securityTags": ["DaveLocation", "DaveFullName"]
	}

	# this should return no hits since we're only supplying one term
	GET my_index/_search
	{
	"query": {
	"terms_set": {
	"securityTags": {
	"terms": ["DaveLocation"],
	"minimum_should_match_field": "securityTags.length"
	}
	}
	}
	}

	# this demonstrates that the token_count is not counting the total tokens in the securityTag list
	GET my_index/_search
	{
	"query": {
	"term": {
	"securityTags.length": {
	"value": 1
	}
	}
	}
	}

	# see if there is a single value for securityTags.length by putting a new doc with a two-token term
	POST /my_index/doc
	{
	"sensitiveData": "Dave Erickson lives in Wahshington DC",
	"securityTags": ["DaveLocation", "DaveFullName", "two tokens"]
	}

	# looks like we get all the docs
	GET my_index/_search
	{
	"query": {
	"term": {
	"securityTags.length": {
	"value": 1
	}
	}
	}
	}

	# ...and that a length value is stored for each securityTag: the fields mapping turns securityTag into an object with all its lucene-flattening consequences
	GET my_index/_search
	{
	"query": {
	"term": {
	"securityTags.length": {
	"value": 2
	}
	}
	}
	}