Created
April 23, 2015 01:03
-
-
Save abronner/c603ee6e12137cefec37 to your computer and use it in GitHub Desktop.
Elasticsearch-Toronto Meetup: Zero Downtime (April 21, 2015)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------------------------------------------------- | |
# Elasticsearch-Toronto Meetup: Zero Downtime (part 1 of 3) | |
# ---------------------------------------------------------------------------------------------------------------- | |
# My presentation at the second elasticsearch meetup in Toronto | |
# April 21, 2015 | |
# http://www.meetup.com/ElasticSearch-toronto | |
# http://www.meetup.com/Elasticsearch-Toronto/events/220384588/ | |
# ---------------------------------------------------------------------------------------------------------------- | |
# Demo with elasticsearch 1.5.1 and Marvel/Sense | |
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html | |
# ---------------------------------------------------------------------------------------------------------------- | |
# --------------------------------------------- | |
# Chapter 1: WHEN SCHEMA-LESS MET AGGREGATIONS | |
# ... IN PRODUCTION | |
# --------------------------------------------- | |
# STORY: as a meetup organizer I want to FIND, SORT and AGGREGATE data about my group members in order to gain deeper insight about my group. | |
# 'Naive' schema-less INSERT: | |
POST elasticsearch-toronto_v1/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
# Check: is it INDEXED? | |
GET elasticsearch-toronto_v1/members/182513481 | |
# Can we SEARCH it? | |
# match all | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# match by username | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"query": { | |
"match": { | |
"username": "Amit" | |
} | |
} | |
} | |
# match by location | |
# note: CASE INSENSITIVE | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"query": { | |
"match": { | |
"location": "toronto" | |
} | |
} | |
} | |
# match by meetup groups | |
# notes: BOOL query, PHRASE query | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"meetup_groups": "angularjs" | |
} | |
}, | |
{ | |
"match_phrase": { | |
"meetup_groups": "FULL STACK" | |
} | |
}, | |
{ | |
"match_phrase_prefix": { | |
"meetup_groups": "Meteor TO" | |
} | |
} | |
] | |
} | |
} | |
} | |
# So far so good ! | |
# but, What about FACETS and AGGREGATIONS? | |
# aggregate by meetup groups | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# 'SCHEMA-LESS' EQUALS 'DEFAULT MAPPING' ! | |
# Here is our DEFAULT MAPPING | |
GET elasticsearch-toronto_v1/_mapping | |
# And here is how DEFAULT STRING is ANALYZED | |
GET elasticsearch-toronto_v1/_analyze | |
{"(UXD / UX) User Experience Design Toronto"} | |
# No problem, MAP AGGREGATION FIELDS AS NOT_ANALYZED | |
# update mapping: | |
PUT elasticsearch-toronto_v1/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"meetup_groups": { | |
"type": "string", | |
"index": "not_analyzed" | |
} | |
} | |
} | |
} | |
# And... ERROR | |
# YOU CANNOT UPDATE THE MAPPING OF A MAPPED FIELD ! | |
# Ok, alternative: USE MULTI FIELDS | |
PUT elasticsearch-toronto_v1/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"meetup_groups": { | |
"type": "string", | |
"fields": { | |
"not_analyzed": { | |
"type": "string", | |
"index": "not_analyzed" | |
} | |
} | |
} | |
} | |
} | |
} | |
# Great, NO Error | |
# aggregate by meetup groups | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.not_analyzed", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# And... NO RESULTS | |
# CHANGING MAPPING DOES NOT REINDEX DOCUMENTS ! | |
# In other words, we need to reindex | |
POST elasticsearch-toronto_v1/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
# try again | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.not_analyzed", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# IT WORKS | |
# but NOT ANALYZED is CASE SENSITIVE.. | |
# insert another memeber: | |
POST elasticsearch-toronto_v1/members/13589068 | |
{ | |
"username": "Roman B.", | |
"location": "East York, ON", | |
"member_since": "November 29, 2014", | |
"introduction": "Full stack Dev", | |
"meetup_groups": [ | |
"Angularjs Toronto", | |
"Business Connection Exchange Toronto", | |
"Devops Toronto", | |
"Full stack Toronto Meetup", | |
"Docker Online Meetup" | |
], | |
"number_of_groups": 5, | |
"organizer": false, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" | |
} | |
# check aggregation: | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.not_analyzed", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# We want to build a CUSTOM ANALYZER: | |
# keyword tokenizer + lowercase filter | |
PUT elasticsearch-toronto_v1/_settings | |
{ | |
"analysis" : { | |
"analyzer":{ | |
"keyword_lowercase":{ | |
"type": "custom", | |
"tokenizer": "keyword", | |
"filter": ["lowercase"] | |
} | |
} | |
} | |
} | |
# And... ERROR | |
# INDEX MUST BE CLOSED TO UPDATE SETTINGS | |
# There could be many other changes. | |
# For example: want a date histogram to see how members join over time? oops, 'member_since' is string and not a date field... | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs" : { | |
"members_over_time" : { | |
"date_histogram" : { | |
"field" : "member_since", | |
"interval" : "day" | |
} | |
} | |
} | |
} | |
# SOONER OR LATER YOU WILL NEED TO MAKE CHANGES WHEN YOUR INDEX IS ALREADY IN PRODUCTION | |
# ----------------- | |
# END of Chapter 1 | |
# ----------------- | |
# Lessons: | |
# - YOU CANNOT UPDATE THE MAPPING OF A MAPPED FIELD | |
# - YOU CANNOT UPDATE THE SETTINGS OF AN OPEN INDEX | |
# - CHANGING MAPPING/SETTINGS DOES NOT REINDEX DOCUMENTS | |
# - PREPARE TO MAKES CHANGES TO YOUR INDEX IN PRODUCTION |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------------------------------------------------- | |
# Elasticsearch-Toronto Meetup: Zero Downtime (part 2 of 3) | |
# ---------------------------------------------------------------------------------------------------------------- | |
# My presentation at the second elasticsearch meetup in Toronto | |
# April 21, 2015 | |
# http://www.meetup.com/ElasticSearch-toronto | |
# http://www.meetup.com/Elasticsearch-Toronto/events/220384588/ | |
# ---------------------------------------------------------------------------------------------------------------- | |
# Demo with elasticsearch 1.5.1 and Marvel/Sense | |
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html | |
# ---------------------------------------------------------------------------------------------------------------- | |
# --------------------------------------------- | |
# Chapter 2: MAKING INDEX CHANGES IN PRODUCTION | |
# --------------------------------------------- | |
# Step 1: PREPARE NEW VERSION OF INDEX | |
POST elasticsearch-toronto_v2 | |
{ | |
"settings": { | |
"analysis" : { | |
"analyzer":{ | |
"keyword_lowercase":{ | |
"type": "custom", | |
"tokenizer": "keyword", | |
"filter": ["lowercase"] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"members": { | |
"properties": { | |
"meetup_groups": { | |
"type": "string", | |
"fields": { | |
"not_analyzed": { | |
"type": "string", | |
"index": "not_analyzed" | |
}, | |
"keyword_lowercase": { | |
"type": "string", | |
"index": "analyzed", | |
"analyzer": "keyword_lowercase" | |
} | |
} | |
}, | |
"member_since": { | |
"type": "date", | |
"format": "MMM d, y" | |
} | |
} | |
} | |
} | |
} | |
# check the mapping | |
GET elasticsearch-toronto_v2/_mapping | |
# Step 2: REINDEX DOCUMENTS | |
POST elasticsearch-toronto_v2/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
POST elasticsearch-toronto_v2/members/13589068 | |
{ | |
"username": "Roman B.", | |
"location": "East York, ON", | |
"member_since": "November 29, 2014", | |
"introduction": "Full stack Dev", | |
"meetup_groups": [ | |
"Angularjs Toronto", | |
"Business Connection Exchange Toronto", | |
"Devops Toronto", | |
"Full stack Toronto Meetup", | |
"Docker Online Meetup" | |
], | |
"number_of_groups": 5, | |
"organizer": false, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" | |
} | |
# check changes: aggregation | |
GET elasticsearch-toronto_v2/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.keyword_lowercase", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# check changes: date histogram | |
GET elasticsearch-toronto_v2/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs" : { | |
"members_over_time" : { | |
"date_histogram" : { | |
"field" : "member_since", | |
"interval" : "day" | |
} | |
} | |
} | |
} | |
# Step 3: SWITCH INDICES | |
# USE ALIASES INSTEAD OF INDEX NAMES ! | |
POST _aliases | |
{ | |
"actions": [ | |
{ "add": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }} | |
] | |
} | |
# check aggregation (using alias) | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.keyword_lowercase", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# check date histogram (using alias) | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs" : { | |
"members_over_time" : { | |
"date_histogram" : { | |
"field" : "member_since", | |
"interval" : "day" | |
} | |
} | |
} | |
} | |
# SWITCH WITH ZERO DOWNTIME ! | |
POST _aliases | |
{ | |
"actions": [ | |
{ "remove": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }}, | |
{ "add": { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }} | |
] | |
} | |
# check aggregation (using alias) | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.keyword_lowercase", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# check date histogram (using alias) | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs" : { | |
"members_over_time" : { | |
"date_histogram" : { | |
"field" : "member_since", | |
"interval" : "day" | |
} | |
} | |
} | |
} | |
# Be careful not to mess it up: | |
# ALIAS can point to multiple indices | |
POST _aliases | |
{ | |
"actions": [ | |
{ "add": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }}, | |
{ "add": { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }} | |
] | |
} | |
# check alias | |
GET _alias/elasticsearch-toronto | |
# ALIAS can point to zero indices | |
POST _aliases | |
{ | |
"actions": [ | |
{ "remove": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }}, | |
{ "remove": { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }} | |
] | |
} | |
# check alias | |
GET _alias/elasticsearch-toronto | |
# If necessary, you can always revert | |
POST _aliases | |
{ | |
"actions": [ | |
{ "add": { "index": "elasticsearch-toronto_v1", "alias": "elasticsearch-toronto" }}, | |
{ "remove": { "index": "elasticsearch-toronto_v2", "alias": "elasticsearch-toronto" }} | |
] | |
} | |
# check alias | |
GET _alias/elasticsearch-toronto | |
# before we continue | |
DELETE elasticsearch-toronto_v2/_query | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# ** REINDEXING METHODS: ** | |
# (1) From external source (e.g. database) | |
# (2) From current index version ('_source' field) | |
# REINDEX: SCAN & SCROLL + BULK API | |
# SCAN | |
# call ONCE | |
GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m | |
{ | |
"query": { | |
"match_all": {} | |
}, | |
"size": 1 | |
} | |
# call until you get zero hits | |
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID | |
# Notes: | |
# scroll expiry time (1m) | |
# scroll size (x number of shards) | |
# SCAN DOES NO SORTING ! | |
# BULK | |
# use the '_source' from SCAN results | |
POST _bulk | |
{ "index" : { "_index" : "elasticsearch-toronto_v2", "_type" : "members", "_id" : "182513481" }} | |
{ "username": "Amit", "location": "Toronto, ON", "member_since": "November 25, 2014", "introduction": "You know for search", "meetup_groups": [ "(UXD / UX) User Experience Design Toronto", "AngularJS Toronto", "Big Data Developers in Toronto", "DevOps Toronto", "Full Stack Toronto Meetup", "Meteor Toronto", "PhoneGap Toronto (#PhoneGapTO)" ], "number_of_groups": 7, "organizer": true, "link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" } | |
{ "index" : { "_index" : "elasticsearch-toronto_v2", "_type" : "members", "_id" : "13589068" }} | |
{ "username": "Roman B.", "location": "East York, ON", "member_since": "November 29, 2014", "introduction": "Full stack Dev", "meetup_groups": [ "Angularjs Toronto", "Business Connection Exchange Toronto", "Devops Toronto", "Full stack Toronto Meetup", "Docker Online Meetup" ], "number_of_groups": 5, "organizer": false, "link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" } | |
# check results | |
GET elasticsearch-toronto_v2/_search | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# before we continue | |
DELETE elasticsearch-toronto_v2/_query | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# EASY REINDEXING WITH PYTHON CLIENT | |
# $ pip install elasticsearch | |
# $ python | |
# >>> from elasticsearch import Elasticsearch | |
# >>> from elasticsearch import helpers | |
# >>> es = Elasticsearch() | |
# >>> helpers.reindex(es, "elasticsearch-toronto_v1", "elasticsearch-toronto_v2", {"query": {"match_all": {}}}) | |
# check results | |
GET elasticsearch-toronto_v2/_search | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# source code on github: https://github.com/elastic/elasticsearch-py/blob/master/elasticsearch/helpers/__init__.py | |
# Elegant implementation (using GENERATORS) | |
# EASY TO MODIFY DOCUMENTS DURING REINDEXING | |
# example: https://gist.githubusercontent.com/abronner/2c0e0dba0e998eb3a4b1/raw/ce17e56eb22069cacb305e0a7e642daeaa80c5ed/gistfile1.txt | |
# >>> reindex(es, "elasticsearch-toronto_v1", "elasticsearch-toronto_v2", {"query": {"match_all": {}}}) | |
# check results | |
GET elasticsearch-toronto_v2/_search | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# ----------------- | |
# END of Chapter 2 | |
# ----------------- | |
# Lessons: | |
# - USE ALIASES INSTEAD OF INDEX NAMES | |
# - REINDEX WITH SCAN/SCROLL & BULK API | |
# - SCAN DOES NOT SORT THE RESULTS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------------------------------------------------- | |
# Elasticsearch-Toronto Meetup: Zero Downtime (part 3 of 3) | |
# ---------------------------------------------------------------------------------------------------------------- | |
# My presentation at the second elasticsearch meetup in Toronto | |
# April 21, 2015 | |
# http://www.meetup.com/ElasticSearch-toronto | |
# http://www.meetup.com/Elasticsearch-Toronto/events/220384588/ | |
# ---------------------------------------------------------------------------------------------------------------- | |
# Demo with elasticsearch 1.5.1 and Marvel/Sense | |
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html | |
# ---------------------------------------------------------------------------------------------------------------- | |
# --------------------------------------------- | |
# Chapter 3: REINDEXING CHALLENGES | |
# --------------------------------------------- | |
# before we continue | |
DELETE elasticsearch-toronto_v2/_query | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# INCOMING DOCUMENTS | |
# you need to change your production index | |
# you use aliases | |
# you create a new version of the index | |
# and you reindex using scan/scroll & bulk api | |
# but... | |
# your system continues to index new documents | |
# CHALLENGE: SCAN TAKES A SNAPSHOT IN TIME | |
# call ONCE | |
GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m | |
{ | |
"query": { | |
"match_all": {} | |
}, | |
"size": 1 | |
} | |
# 1st document | |
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID | |
# index a new document | |
POST elasticsearch-toronto_v1/members/8968154 | |
{ | |
"username": "Nick Van Weerdenburg", | |
"location": "Toronto, ON", | |
"member_since": "December 29, 2014", | |
"introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.", | |
"meetup_groups": [ | |
"Agile Experience Design Toronto", | |
"AngularJS Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)", | |
"(UXD / UX) User Experience Design Toronto", | |
"#DevTO" | |
], | |
"number_of_groups": 5, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154" | |
} | |
# 2nd document | |
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID | |
# 3rd document ? | |
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID | |
# NO... END OF SCROLL | |
# SOLUTION: SCROLL BY TIMESTAMPS | |
PUT elasticsearch-toronto_v1/members/_mapping | |
{ | |
"members" : { | |
"_timestamp": { "enabled" : true } | |
} | |
} | |
# reindex (to set timestamp) | |
POST elasticsearch-toronto_v1/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
# reindex (to set timestamp) | |
POST elasticsearch-toronto_v1/members/13589068 | |
{ | |
"username": "Roman B.", | |
"location": "East York, ON", | |
"member_since": "November 29, 2014", | |
"introduction": "Full stack Dev", | |
"meetup_groups": [ | |
"Angularjs Toronto", | |
"Business Connection Exchange Toronto", | |
"Devops Toronto", | |
"Full stack Toronto Meetup", | |
"Docker Online Meetup" | |
], | |
"number_of_groups": 5, | |
"organizer": false, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" | |
} | |
# Get last timestamp before scanning | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"last_timestamp": { | |
"max": { | |
"field": "_timestamp" | |
} | |
} | |
} | |
} | |
# LAST_TIMESTAMP = ___ | |
# SCAN until timestamp | |
GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m | |
{ | |
"query": { | |
"range": { | |
"_timestamp": { | |
"gt": 0, | |
"lte": LAST_TIMESTAMP | |
} | |
} | |
}, | |
"size": 1 | |
} | |
# 1st document | |
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID | |
# index a new document | |
POST elasticsearch-toronto_v1/members/8968154 | |
{ | |
"username": "Nick Van Weerdenburg", | |
"location": "Toronto, ON", | |
"member_since": "December 29, 2014", | |
"introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.", | |
"meetup_groups": [ | |
"Agile Experience Design Toronto", | |
"AngularJS Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)", | |
"(UXD / UX) User Experience Design Toronto", | |
"#DevTO" | |
], | |
"number_of_groups": 5, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154" | |
} | |
# 2nd document | |
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID | |
# End of scroll | |
# SCAN from previous timestamp | |
GET elasticsearch-toronto_v1/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"last_timestamp": { | |
"max": { | |
"field": "_timestamp" | |
} | |
} | |
} | |
} | |
GET elasticsearch-toronto_v1/_search?search_type=scan&scroll=1m | |
{ | |
"query": { | |
"range": { | |
"_timestamp": { | |
"gt": PREVIOUS_TIMESTAMP, | |
"lte": LAST_TIMESTAMP | |
} | |
} | |
}, | |
"size": 1 | |
} | |
# 3rd document | |
GET _search/scroll?scroll=1m&scroll_id=SCROLL_ID | |
# Final Notes: | |
# You will still need to stop incoming documents before the alias switch, otherwise there is always a chance of missing some documents | |
# If stopping incoming document is not possible, you might need to do a final update (reindex) after the alias switch. The problem is the chance that a document from the previous index might have been already updated on the new index and should not be overwritten. Checking document versions is an optional solution. | |
# Scan does NO sorting so you can't make any assumptions about reindexing failures. Either log all failures and fix them individually, or repeat the scan interval. | |
# Things can get more complicated when a new version of your software is released with a new version of your index. It's better to split the tasks if possible. | |
# ----------------- | |
# END of Chapter 3 | |
# ----------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment