abronner/Zero Downtime (part 1 of 3)

## Zero Downtime (part 1 of 3)
# ----------------------------------------------------------------------------------------------------------------
# Elasticsearch-Toronto Meetup: Zero Downtime (part 1 of 3)
# ----------------------------------------------------------------------------------------------------------------
# My presentation at the second elasticsearch meetup in Toronto
# April 21, 2015
# http://www.meetup.com/ElasticSearch-toronto
# http://www.meetup.com/Elasticsearch-Toronto/events/220384588/
# ----------------------------------------------------------------------------------------------------------------
# Demo with elasticsearch 1.5.1 and Marvel/Sense
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html
# ----------------------------------------------------------------------------------------------------------------

# ---------------------------------------------
# Chapter 1: WHEN SCHEMA-LESS MET AGGREGATIONS
#            ... IN PRODUCTION
# ---------------------------------------------

# STORY: as a meetup organizer I want to FIND, SORT and AGGREGATE data about my group members in order to gain deeper insight about my group.

# 'Naive' schema-less INSERT:

POST elasticsearch-toronto_v1/members/182513481
{
  "username": "Amit",
  "location": "Toronto, ON",
  "member_since": "November 25, 2014",
  "introduction": "You know for search",
  "meetup_groups": [
    "(UXD / UX) User Experience Design Toronto",
    "AngularJS Toronto",
    "Big Data Developers in Toronto",
    "DevOps Toronto",
    "Full Stack Toronto Meetup",
    "Meteor Toronto",
    "PhoneGap Toronto (#PhoneGapTO)"
    ],
  "number_of_groups": 7,
  "organizer": true,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481"
}

# Check: is it INDEXED?

GET elasticsearch-toronto_v1/members/182513481

# Can we SEARCH it?

# match all

GET elasticsearch-toronto_v1/_search
{
  "query": {
    "match_all": {}
  }
}

# match by username

GET elasticsearch-toronto_v1/_search
{
  "query": {
    "match": {
      "username": "Amit"
    }
  }
}

# match by location

# note: CASE INSENSITIVE

GET elasticsearch-toronto_v1/_search
{
  "query": {
    "match": {
      "location": "toronto"
    }
  }
}

# match by meetup groups

# notes: BOOL query, PHRASE query

GET elasticsearch-toronto_v1/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "meetup_groups": "angularjs"
          }
        },
        {
          "match_phrase": {
            "meetup_groups": "FULL STACK"
          }
        },
        {
          "match_phrase_prefix": {
            "meetup_groups": "Meteor TO"
          }
        }
      ]
    }
  }
}

# So far so good !
# but, What about FACETS and AGGREGATIONS?

# aggregate by meetup groups

GET elasticsearch-toronto_v1/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groups": {
      "terms": {
        "field": "meetup_groups",
        "size": 20
      }
    }
  }
}

# 'SCHEMA-LESS' EQUALS 'DEFAULT MAPPING' !

# Here is our DEFAULT MAPPING

GET elasticsearch-toronto_v1/_mapping

# And here is how DEFAULT STRING is ANALYZED

GET elasticsearch-toronto_v1/_analyze
{"(UXD / UX) User Experience Design Toronto"}

# No problem, MAP AGGREGATION FIELDS AS NOT_ANALYZED

# update mapping:
PUT elasticsearch-toronto_v1/members/_mapping
{
  "members": {
    "properties": {
      "meetup_groups": {
        "type": "string",
        "index": "not_analyzed"
      }
    }
  }
}

# And... ERROR

# YOU CANNOT UPDATE THE MAPPING OF A MAPPED FIELD !

# Ok, alternative: USE MULTI FIELDS

PUT elasticsearch-toronto_v1/members/_mapping
{
  "members": {
    "properties": {
      "meetup_groups": {
        "type": "string",
        "fields": {
          "not_analyzed": {
            "type": "string",
            "index": "not_analyzed"
          }
        }
      }
    }
  }
}

# Great, NO Error

# aggregate by meetup groups

GET elasticsearch-toronto_v1/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groups": {
      "terms": {
        "field": "meetup_groups.not_analyzed",
        "size": 20
      }
    }
  }
}

# And... NO RESULTS

# CHANGING MAPPING DOES NOT REINDEX DOCUMENTS !

# In other words, we need to reindex

POST elasticsearch-toronto_v1/members/182513481
{
  "username": "Amit",
  "location": "Toronto, ON",
  "member_since": "November 25, 2014",
  "introduction": "You know for search",
  "meetup_groups": [
    "(UXD / UX) User Experience Design Toronto",
    "AngularJS Toronto",
    "Big Data Developers in Toronto",
    "DevOps Toronto",
    "Full Stack Toronto Meetup",
    "Meteor Toronto",
    "PhoneGap Toronto (#PhoneGapTO)"
    ],
  "number_of_groups": 7,
  "organizer": true,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481"
}

# try again

GET elasticsearch-toronto_v1/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groups": {
      "terms": {
        "field": "meetup_groups.not_analyzed",
        "size": 20
      }
    }
  }
}

# IT WORKS

# but NOT ANALYZED is CASE SENSITIVE..

# insert another memeber:
POST elasticsearch-toronto_v1/members/13589068
{
  "username": "Roman B.",
  "location": "East York, ON",
  "member_since": "November 29, 2014",
  "introduction": "Full stack Dev",
  "meetup_groups": [
    "Angularjs Toronto",
    "Business Connection Exchange Toronto",
    "Devops Toronto",
    "Full stack Toronto Meetup",
    "Docker Online Meetup"
    ],
  "number_of_groups": 5,
  "organizer": false,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068"
}

# check aggregation:
GET elasticsearch-toronto_v1/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groups": {
      "terms": {
        "field": "meetup_groups.not_analyzed",
        "size": 20
      }
    }
  }
}

# We want to build a CUSTOM ANALYZER:
# keyword tokenizer + lowercase filter

PUT elasticsearch-toronto_v1/_settings
{
  "analysis" : {
    "analyzer":{
      "keyword_lowercase":{
        "type": "custom",
        "tokenizer": "keyword",
        "filter": ["lowercase"]
      }
    }
  }
}

# And... ERROR

# INDEX MUST BE CLOSED TO UPDATE SETTINGS

# There could be many other changes.

# For example: want a date histogram to see how members join over time? oops, 'member_since' is string and not a date field...

GET elasticsearch-toronto_v1/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs" : {
    "members_over_time" : {
      "date_histogram" : {
        "field" : "member_since",
        "interval" : "day"
      }
    }
  }
}

# SOONER OR LATER YOU WILL NEED TO MAKE CHANGES WHEN YOUR INDEX IS ALREADY IN PRODUCTION

# -----------------
# END of Chapter 1
# -----------------

# Lessons:
# - YOU CANNOT UPDATE THE MAPPING OF A MAPPED FIELD
# - YOU CANNOT UPDATE THE SETTINGS OF AN OPEN INDEX
# - CHANGING MAPPING/SETTINGS DOES NOT REINDEX DOCUMENTS
# - PREPARE TO MAKES CHANGES TO YOUR INDEX IN PRODUCTION

## Zero Downtime (part 2 of 3)
# ----------------------------------------------------------------------------------------------------------------
# Elasticsearch-Toronto Meetup: Zero Downtime (part 2 of 3)
# ----------------------------------------------------------------------------------------------------------------
# My presentation at the second elasticsearch meetup in Toronto
# April 21, 2015
# http://www.meetup.com/ElasticSearch-toronto
# http://www.meetup.com/Elasticsearch-Toronto/events/220384588/
# ----------------------------------------------------------------------------------------------------------------
# Demo with elasticsearch 1.5.1 and Marvel/Sense
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html
# ----------------------------------------------------------------------------------------------------------------

# ---------------------------------------------
# Chapter 2: MAKING INDEX CHANGES IN PRODUCTION
# ---------------------------------------------

# Step 1: PREPARE NEW VERSION OF INDEX

POST elasticsearch-toronto_v2
{
  "settings": {
    "analysis" : {
      "analyzer":{
        "keyword_lowercase":{
          "type": "custom",
          "tokenizer": "keyword",
          "filter": ["lowercase"]
        }
      }
    }
  },
  "mappings": {
    "members": {
      "properties": {
        "meetup_groups": {
          "type": "string",
          "fields": {
            "not_analyzed": {
              "type": "string",
              "index": "not_analyzed"
            },
            "keyword_lowercase": {
              "type": "string",
              "index": "analyzed",
              "analyzer": "keyword_lowercase"
            }
          }
        },
        "member_since": {
          "type": "date",
          "format": "MMM d, y"
        }
      }
    }
  }
}

# check the mapping
GET elasticsearch-toronto_v2/_mapping

# Step 2: REINDEX DOCUMENTS

POST elasticsearch-toronto_v2/members/182513481
{
  "username": "Amit",
  "location": "Toronto, ON",
  "member_since": "November 25, 2014",
  "introduction": "You know for search",
  "meetup_groups": [
    "(UXD / UX) User Experience Design Toronto",
    "AngularJS Toronto",
    "Big Data Developers in Toronto",
    "DevOps Toronto",
    "Full Stack Toronto Meetup",
    "Meteor Toronto",
    "PhoneGap Toronto (#PhoneGapTO)"
    ],
  "number_of_groups": 7,
  "organizer": true,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481"
}

POST elasticsearch-toronto_v2/members/13589068
{
  "username": "Roman B.",
  "location": "East York, ON",
  "member_since": "November 29, 2014",
  "introduction": "Full stack Dev",
  "meetup_groups": [
    "Angularjs Toronto",
    "Business Connection Exchange Toronto",
    "Devops Toronto",
    "Full stack Toronto Meetup",
    "Docker Online Meetup"
    ],
  "number_of_groups": 5,
  "organizer": false,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068"
}

# check changes: aggregation
GET elasticsearch-toronto_v2/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groups": {
      "terms": {
        "field": "meetup_groups.keyword_lowercase",
        "size": 20
      }
    }
  }
}

# check changes: date histogram
GET elasticsearch-toronto_v2/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs" : {
    "members_over_time" : {
      "date_histogram" : {
        "field" : "member_since",
        "interval" : "day"
      }
    }
  }
}

# Step 3: SWITCH INDICES

# USE ALIASES INSTEAD OF INDEX NAMES !

POST _aliases
{
    "actions": [
        { "add":    { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }}
    ]
}

# check aggregation (using alias)
GET elasticsearch-toronto/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groups": {
      "terms": {
        "field": "meetup_groups.keyword_lowercase",
        "size": 20
      }
    }
  }
}

# check date histogram (using alias)
GET elasticsearch-toronto/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs" : {
    "members_over_time" : {
      "date_histogram" : {
        "field" : "member_since",
        "interval" : "day"
      }
    }
  }
}

# SWITCH WITH ZERO DOWNTIME !

POST _aliases
{
    "actions": [
        { "remove": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }},
        { "add":    { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }}
    ]
}

# check aggregation (using alias)
GET elasticsearch-toronto/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groups": {
      "terms": {
        "field": "meetup_groups.keyword_lowercase",
        "size": 20
      }
    }
  }
}

# check date histogram (using alias)
GET elasticsearch-toronto/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs" : {
    "members_over_time" : {
      "date_histogram" : {
        "field" : "member_since",
        "interval" : "day"
      }
    }
  }
}

# Be careful not to mess it up:

# ALIAS can point to multiple indices

POST _aliases
{
    "actions": [
        { "add": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }},
        { "add":    { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }}
    ]
}

# check alias
GET _alias/elasticsearch-toronto

# ALIAS can point to zero indices

POST _aliases
{
    "actions": [
        { "remove": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }},
        { "remove":    { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }}
    ]
}

# check alias
GET _alias/elasticsearch-toronto

# If necessary, you can always revert

POST _aliases
{
    "actions": [
        { "add": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }},
        { "remove":    { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }}
    ]
}

# check alias
GET _alias/elasticsearch-toronto

# before we continue
DELETE elasticsearch-toronto_v2/_query
{
  "query": {
    "match_all": {}
  }
}

# ** REINDEXING METHODS: **

# (1) From external source (e.g. database)

# (2) From current index version ('_source' field)

# REINDEX: SCAN & SCROLL + BULK API

# SCAN

# call ONCE
GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m
{
  "query": {
    "match_all": {}
  },
  "size":  1
}

# call until you get zero hits
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID

# Notes:
# scroll expiry time (1m)
# scroll size (x number of shards)

# SCAN DOES NO SORTING !

# BULK

# use the '_source' from SCAN results

POST _bulk
{ "index" : { "_index" : "elasticsearch-toronto_v2", "_type" : "members", "_id" : "182513481" }}
{ "username": "Amit", "location": "Toronto, ON", "member_since": "November 25, 2014", "introduction": "You know for search", "meetup_groups": [ "(UXD / UX) User Experience Design Toronto", "AngularJS Toronto", "Big Data Developers in Toronto", "DevOps Toronto", "Full Stack Toronto Meetup", "Meteor Toronto", "PhoneGap Toronto (#PhoneGapTO)" ], "number_of_groups": 7, "organizer": true, "link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" }
{ "index" : { "_index" : "elasticsearch-toronto_v2", "_type" : "members", "_id" : "13589068" }}
{ "username": "Roman B.", "location": "East York, ON", "member_since": "November 29, 2014", "introduction": "Full stack Dev", "meetup_groups": [ "Angularjs Toronto", "Business Connection Exchange Toronto", "Devops Toronto", "Full stack Toronto Meetup", "Docker Online Meetup" ], "number_of_groups": 5, "organizer": false, "link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" }

# check results
GET elasticsearch-toronto_v2/_search
{
  "query": {
    "match_all": {}
  }
}

# before we continue
DELETE elasticsearch-toronto_v2/_query
{
  "query": {
    "match_all": {}
  }
}

# EASY REINDEXING WITH PYTHON CLIENT

# $ pip install elasticsearch
# $ python
# >>> from elasticsearch import Elasticsearch
# >>> from elasticsearch import helpers
# >>> es = Elasticsearch()
# >>> helpers.reindex(es, "elasticsearch-toronto_v1", "elasticsearch-toronto_v2", {"query": {"match_all": {}}})

# check results
GET elasticsearch-toronto_v2/_search
{
  "query": {
    "match_all": {}
  }
}

# source code on github: https://github.com/elastic/elasticsearch-py/blob/master/elasticsearch/helpers/__init__.py

# Elegant implementation (using GENERATORS)

# EASY TO MODIFY DOCUMENTS DURING REINDEXING

# example: https://gist.githubusercontent.com/abronner/2c0e0dba0e998eb3a4b1/raw/ce17e56eb22069cacb305e0a7e642daeaa80c5ed/gistfile1.txt

# >>> reindex(es, "elasticsearch-toronto_v1", "elasticsearch-toronto_v2", {"query": {"match_all": {}}})

# check results
GET elasticsearch-toronto_v2/_search
{
  "query": {
    "match_all": {}
  }
}

# -----------------
# END of Chapter 2
# -----------------

# Lessons:
# - USE ALIASES INSTEAD OF INDEX NAMES
# - REINDEX WITH SCAN/SCROLL & BULK API
# - SCAN DOES NOT SORT THE RESULTS

## Zero Downtime (part 3 of 3)
# ----------------------------------------------------------------------------------------------------------------
# Elasticsearch-Toronto Meetup: Zero Downtime (part 3 of 3)
# ----------------------------------------------------------------------------------------------------------------
# My presentation at the second elasticsearch meetup in Toronto
# April 21, 2015
# http://www.meetup.com/ElasticSearch-toronto
# http://www.meetup.com/Elasticsearch-Toronto/events/220384588/
# ----------------------------------------------------------------------------------------------------------------
# Demo with elasticsearch 1.5.1 and Marvel/Sense
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html
# ----------------------------------------------------------------------------------------------------------------

# ---------------------------------------------
# Chapter 3: REINDEXING CHALLENGES
# ---------------------------------------------

# before we continue
DELETE elasticsearch-toronto_v2/_query
{
  "query": {
    "match_all": {}
  }
}

# INCOMING DOCUMENTS

# you need to change your production index
# you use aliases
# you create a new version of the index
# and you reindex using scan/scroll & bulk api
# but...
# your system continues to index new documents

# CHALLENGE: SCAN TAKES A SNAPSHOT IN TIME

# call ONCE
GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m
{
  "query": {
    "match_all": {}
  },
  "size":  1
}

# 1st document
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID

# index a new document
POST elasticsearch-toronto_v1/members/8968154
{
  "username": "Nick Van Weerdenburg",
  "location": "Toronto, ON",
  "member_since": "December 29, 2014",
  "introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.",
  "meetup_groups": [
    "Agile Experience Design Toronto",
    "AngularJS Toronto",
    "PhoneGap Toronto (#PhoneGapTO)",
    "(UXD / UX) User Experience Design Toronto",
    "#DevTO"
    ],
  "number_of_groups": 5,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154"
}

# 2nd document
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID

# 3rd document ?
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID

# NO... END OF SCROLL

# SOLUTION: SCROLL BY TIMESTAMPS

PUT elasticsearch-toronto_v1/members/_mapping
{
  "members" : {
    "_timestamp": { "enabled" : true }
  }
}

# reindex (to set timestamp)
POST elasticsearch-toronto_v1/members/182513481
{
  "username": "Amit",
  "location": "Toronto, ON",
  "member_since": "November 25, 2014",
  "introduction": "You know for search",
  "meetup_groups": [
    "(UXD / UX) User Experience Design Toronto",
    "AngularJS Toronto",
    "Big Data Developers in Toronto",
    "DevOps Toronto",
    "Full Stack Toronto Meetup",
    "Meteor Toronto",
    "PhoneGap Toronto (#PhoneGapTO)"
    ],
  "number_of_groups": 7,
  "organizer": true,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481"
}

# reindex (to set timestamp)
POST elasticsearch-toronto_v1/members/13589068
{
  "username": "Roman B.",
  "location": "East York, ON",
  "member_since": "November 29, 2014",
  "introduction": "Full stack Dev",
  "meetup_groups": [
    "Angularjs Toronto",
    "Business Connection Exchange Toronto",
    "Devops Toronto",
    "Full stack Toronto Meetup",
    "Docker Online Meetup"
    ],
  "number_of_groups": 5,
  "organizer": false,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068"
}

# Get last timestamp before scanning

GET elasticsearch-toronto_v1/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "last_timestamp": {
      "max": {
        "field": "_timestamp"
      }
    }
  }
}

# LAST_TIMESTAMP = ___

# SCAN until timestamp

GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m
{
  "query": {
    "range": {
      "_timestamp": {
        "gt": 0,
        "lte": LAST_TIMESTAMP
      }
    }
  },
  "size":  1
}

# 1st document
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID

# index a new document
POST elasticsearch-toronto_v1/members/8968154
{
  "username": "Nick Van Weerdenburg",
  "location": "Toronto, ON",
  "member_since": "December 29, 2014",
  "introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.",
  "meetup_groups": [
    "Agile Experience Design Toronto",
    "AngularJS Toronto",
    "PhoneGap Toronto (#PhoneGapTO)",
    "(UXD / UX) User Experience Design Toronto",
    "#DevTO"
    ],
  "number_of_groups": 5,
  "link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154"
}

# 2nd document
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID

# End of scroll

# SCAN from previous timestamp

GET elasticsearch-toronto_v1/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "last_timestamp": {
      "max": {
        "field": "_timestamp"
      }
    }
  }
}

GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m
{
  "query": {
    "range": {
      "_timestamp": {
        "gt": PREVIOUS_TIMESTAMP,
        "lte": LAST_TIMESTAMP
      }
    }
  },
  "size":  1
}

# 3rd document
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID

# Final Notes:

# You will still need to stop incoming documents before the alias switch, otherwise there is always a chance of missing some documents

# If stopping incoming document is not possible, you might need to do a final update (reindex) after the alias switch. The problem is the chance that a document from the previous index might have been already updated on the new index and should not be overwritten. Checking document versions is an optional solution.

# Scan does NO sorting so you can't make any assumptions about reindexing failures. Either log all failures and fix them individually, or repeat the  scan interval.

# Things can get more complicated when a new version of your software is released with a new version of your index. It's better to split the tasks if possible.

# -----------------
# END of Chapter 3
# -----------------
	# ----------------------------------------------------------------------------------------------------------------
	# Elasticsearch-Toronto Meetup: Zero Downtime (part 1 of 3)
	# ----------------------------------------------------------------------------------------------------------------
	# My presentation at the second elasticsearch meetup in Toronto
	# April 21, 2015
	# http://www.meetup.com/ElasticSearch-toronto
	# http://www.meetup.com/Elasticsearch-Toronto/events/220384588/
	# ----------------------------------------------------------------------------------------------------------------
	# Demo with elasticsearch 1.5.1 and Marvel/Sense
	# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html
	# ----------------------------------------------------------------------------------------------------------------

	# ---------------------------------------------
	# Chapter 1: WHEN SCHEMA-LESS MET AGGREGATIONS
	# ... IN PRODUCTION
	# ---------------------------------------------

	# STORY: as a meetup organizer I want to FIND, SORT and AGGREGATE data about my group members in order to gain deeper insight about my group.

	# 'Naive' schema-less INSERT:

	POST elasticsearch-toronto_v1/members/182513481
	{
	"username": "Amit",
	"location": "Toronto, ON",
	"member_since": "November 25, 2014",
	"introduction": "You know for search",
	"meetup_groups": [
	"(UXD / UX) User Experience Design Toronto",
	"AngularJS Toronto",
	"Big Data Developers in Toronto",
	"DevOps Toronto",
	"Full Stack Toronto Meetup",
	"Meteor Toronto",
	"PhoneGap Toronto (#PhoneGapTO)"
	],
	"number_of_groups": 7,
	"organizer": true,
	"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481"
	}

	# Check: is it INDEXED?

	GET elasticsearch-toronto_v1/members/182513481

	# Can we SEARCH it?

	# match all

	GET elasticsearch-toronto_v1/_search
	{
	"query": {
	"match_all": {}
	}
	}

	# match by username

	GET elasticsearch-toronto_v1/_search
	{
	"query": {
	"match": {
	"username": "Amit"
	}
	}
	}

	# match by location

	# note: CASE INSENSITIVE

	GET elasticsearch-toronto_v1/_search
	{
	"query": {
	"match": {
	"location": "toronto"
	}
	}
	}

	# match by meetup groups

	# notes: BOOL query, PHRASE query

	GET elasticsearch-toronto_v1/_search
	{
	"query": {
	"bool": {
	"must": [
	{
	"match": {
	"meetup_groups": "angularjs"
	}
	},
	{
	"match_phrase": {
	"meetup_groups": "FULL STACK"
	}
	},
	{
	"match_phrase_prefix": {
	"meetup_groups": "Meteor TO"
	}
	}
	]
	}
	}
	}

	# So far so good !
	# but, What about FACETS and AGGREGATIONS?

	# aggregate by meetup groups

	GET elasticsearch-toronto_v1/_search
	{
	"size": 0,
	"query": {
	"match_all": {}
	},
	"aggs": {
	"groups": {
	"terms": {
	"field": "meetup_groups",
	"size": 20
	}
	}
	}
	}

	# 'SCHEMA-LESS' EQUALS 'DEFAULT MAPPING' !

	# Here is our DEFAULT MAPPING

	GET elasticsearch-toronto_v1/_mapping

	# And here is how DEFAULT STRING is ANALYZED

	GET elasticsearch-toronto_v1/_analyze
	{"(UXD / UX) User Experience Design Toronto"}

	# No problem, MAP AGGREGATION FIELDS AS NOT_ANALYZED

	# update mapping:
	PUT elasticsearch-toronto_v1/members/_mapping
	{
	"members": {
	"properties": {
	"meetup_groups": {
	"type": "string",
	"index": "not_analyzed"
	}
	}
	}
	}

	# And... ERROR

	# YOU CANNOT UPDATE THE MAPPING OF A MAPPED FIELD !

	# Ok, alternative: USE MULTI FIELDS

	PUT elasticsearch-toronto_v1/members/_mapping
	{
	"members": {
	"properties": {
	"meetup_groups": {
	"type": "string",
	"fields": {
	"not_analyzed": {
	"type": "string",
	"index": "not_analyzed"
	}
	}
	}
	}
	}
	}

	# Great, NO Error

	# aggregate by meetup groups

	GET elasticsearch-toronto_v1/_search
	{
	"size": 0,
	"query": {
	"match_all": {}
	},
	"aggs": {
	"groups": {
	"terms": {
	"field": "meetup_groups.not_analyzed",
	"size": 20
	}
	}
	}
	}

	# And... NO RESULTS

	# CHANGING MAPPING DOES NOT REINDEX DOCUMENTS !

	# In other words, we need to reindex

	POST elasticsearch-toronto_v1/members/182513481
	{
	"username": "Amit",
	"location": "Toronto, ON",
	"member_since": "November 25, 2014",
	"introduction": "You know for search",
	"meetup_groups": [
	"(UXD / UX) User Experience Design Toronto",
	"AngularJS Toronto",
	"Big Data Developers in Toronto",
	"DevOps Toronto",
	"Full Stack Toronto Meetup",
	"Meteor Toronto",
	"PhoneGap Toronto (#PhoneGapTO)"
	],
	"number_of_groups": 7,
	"organizer": true,
	"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481"
	}

	# try again

	GET elasticsearch-toronto_v1/_search
	{
	"size": 0,
	"query": {
	"match_all": {}
	},
	"aggs": {
	"groups": {
	"terms": {
	"field": "meetup_groups.not_analyzed",
	"size": 20
	}
	}
	}
	}

	# IT WORKS

	# but NOT ANALYZED is CASE SENSITIVE..

	# insert another memeber:
	POST elasticsearch-toronto_v1/members/13589068
	{
	"username": "Roman B.",
	"location": "East York, ON",
	"member_since": "November 29, 2014",
	"introduction": "Full stack Dev",
	"meetup_groups": [
	"Angularjs Toronto",
	"Business Connection Exchange Toronto",
	"Devops Toronto",
	"Full stack Toronto Meetup",
	"Docker Online Meetup"
	],
	"number_of_groups": 5,
	"organizer": false,
	"link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068"
	}

	# check aggregation:
	GET elasticsearch-toronto_v1/_search
	{
	"size": 0,
	"query": {
	"match_all": {}
	},
	"aggs": {
	"groups": {
	"terms": {
	"field": "meetup_groups.not_analyzed",
	"size": 20
	}
	}
	}
	}

	# We want to build a CUSTOM ANALYZER:
	# keyword tokenizer + lowercase filter

	PUT elasticsearch-toronto_v1/_settings
	{
	"analysis" : {
	"analyzer":{
	"keyword_lowercase":{
	"type": "custom",
	"tokenizer": "keyword",
	"filter": ["lowercase"]
	}
	}
	}
	}

	# And... ERROR

	# INDEX MUST BE CLOSED TO UPDATE SETTINGS

	# There could be many other changes.

	# For example: want a date histogram to see how members join over time? oops, 'member_since' is string and not a date field...

	GET elasticsearch-toronto_v1/_search
	{
	"size": 0,
	"query": {
	"match_all": {}
	},
	"aggs" : {
	"members_over_time" : {
	"date_histogram" : {
	"field" : "member_since",
	"interval" : "day"
	}
	}
	}
	}

	# SOONER OR LATER YOU WILL NEED TO MAKE CHANGES WHEN YOUR INDEX IS ALREADY IN PRODUCTION

	# -----------------
	# END of Chapter 1
	# -----------------

	# Lessons:
	# - YOU CANNOT UPDATE THE MAPPING OF A MAPPED FIELD
	# - YOU CANNOT UPDATE THE SETTINGS OF AN OPEN INDEX
	# - CHANGING MAPPING/SETTINGS DOES NOT REINDEX DOCUMENTS
	# - PREPARE TO MAKES CHANGES TO YOUR INDEX IN PRODUCTION