Forked from abronner/Chapter 1: WHEN SCHEMA-LESS MET AGGREGATIONS
Last active
August 29, 2015 14:16
-
-
Save snasirca/be89f883d68ad79865c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------------------------------------------------- | |
# Elasticsearch-Toronto Meetup: The Power of Mapping (part 1 of 3) | |
# ---------------------------------------------------------------------------------------------------------------- | |
# My presentation at the first elasticsearch meetup in Toronto | |
# January 19, 2015 | |
# http://www.meetup.com/ElasticSearch-toronto | |
# http://www.meetup.com/Elasticsearch-Toronto/events/218903340 | |
# ---------------------------------------------------------------------------------------------------------------- | |
# Demo with elasticsearch 1.4.2 and Marvel/Sense | |
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html | |
# ---------------------------------------------------------------------------------------------------------------- | |
# --------------------------------------------- | |
# Chapter 1: WHEN SCHEMA-LESS MET AGGREGATIONS | |
# --------------------------------------------- | |
# STORY: as a meetup organizer I want to FIND, SORT and AGGREGATE data about my group members in order to gain deeper insight about my group. | |
# 'Naive' schema-less INSERT: | |
POST elasticsearch-toronto/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
# Check: is it INDEXED? | |
GET elasticsearch-toronto/members/182513481 | |
# Quick note about 'HEAD' | |
HEAD elasticsearch-toronto/members/182513481 | |
# Can we SEARCH it? | |
# match all | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
# match by username | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"match": { | |
"username": "Amit" | |
} | |
} | |
} | |
# match by location | |
# note: CASE INSENSITIVE | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"match": { | |
"location": "toronto" | |
} | |
} | |
} | |
# match by meetup groups | |
# notes: BOOL query, PHRASE query | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"meetup_groups": "angularjs" | |
} | |
}, | |
{ | |
"match_phrase": { | |
"meetup_groups": "FULL STACK" | |
} | |
}, | |
{ | |
"match_phrase_prefix": { | |
"meetup_groups": "Meteor TO" | |
} | |
} | |
] | |
} | |
} | |
} | |
# So far so good ! | |
# but, What about FACETS and AGGREGATIONS? | |
# aggregate by meetup groups | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# 'SCHEMA-LESS' EQUALS 'DEFAULT MAPPING' ! | |
# Here is our DEFAULT MAPPING | |
GET elasticsearch-toronto/_mapping | |
# And here is how DEFAULT STRING is ANALYZED | |
GET elasticsearch-toronto/_analyze | |
{"(UXD / UX) User Experience Design Toronto"} | |
# MAP AGGREGATION FIELDS AS NOT_ANALYZED ! | |
# update mapping: | |
PUT elasticsearch-toronto/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"meetup_groups_not_analyzed": { | |
"type": "string", | |
"index": "not_analyzed" | |
} | |
} | |
} | |
} | |
# check mapping: | |
GET elasticsearch-toronto/_mapping | |
# update doc: | |
POST elasticsearch-toronto/members/182513481/_update | |
{ | |
"doc": { | |
"meetup_groups_not_analyzed": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
] | |
} | |
} | |
# check aggregation: | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups_not_analyzed", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# PROBLEM (#1): need to INDEX the SAME FIELD TWICE | |
GET elasticsearch-toronto/members/182513481 | |
# SOLUTION (#1): USE MULTI FIELDS ! | |
PUT elasticsearch-toronto/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"meetup_groups": { | |
"type": "string", | |
"fields": { | |
"not_analyzed": { | |
"type": "string", | |
"index": "not_analyzed" | |
} | |
} | |
} | |
} | |
} | |
} | |
# check: | |
GET elasticsearch-toronto/_mapping | |
# re-index original doc: | |
POST elasticsearch-toronto/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
# check: | |
GET elasticsearch-toronto/members/182513481 | |
# check aggregation: | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.not_analyzed", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# PROBLEM (#2): NOT ANALYZED is CASE SENSITIVE | |
# Sometime it's good, sometimes less ... | |
# insert another memeber: | |
POST elasticsearch-toronto/members/13589068 | |
{ | |
"username": "Roman B.", | |
"location": "East York, ON", | |
"member_since": "November 29, 2014", | |
"introduction": "Full stack Dev", | |
"meetup_groups": [ | |
"Angularjs Toronto", | |
"Business Connection Exchange Toronto", | |
"Devops Toronto", | |
"Full stack Toronto Meetup", | |
"Docker Online Meetup" | |
], | |
"number_of_groups": 5, | |
"organizer": false, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" | |
} | |
# check aggregation: | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.not_analyzed", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# SOLUTION (#2): BUILD YOUR OWN ANALYZER | |
# check | |
GET elasticsearch-toronto/_settings | |
# must close index to update analysis settings | |
POST elasticsearch-toronto/_close | |
# CUSTOM ANALYZER: KEYWORD TOKENIZER + LOWERCASE FILTER | |
PUT elasticsearch-toronto/_settings | |
{ | |
"analysis" : { | |
"analyzer":{ | |
"keyword_lowercase":{ | |
"type": "custom", | |
"tokenizer": "keyword", | |
"filter": ["lowercase"] | |
} | |
} | |
} | |
} | |
# re-open index | |
POST elasticsearch-toronto/_open | |
# check settings | |
GET elasticsearch-toronto/_settings | |
# check the custom analyzer | |
GET elasticsearch-toronto/_analyze?analyzer=keyword_lowercase | |
{"(UXD / UX) User Experience Design Toronto"} | |
# update MULTI FIELD MAPPING | |
PUT elasticsearch-toronto/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"meetup_groups": { | |
"type": "string", | |
"fields": { | |
"not_analyzed": { | |
"type": "string", | |
"index": "not_analyzed" | |
}, | |
"keyword_lowercase": { | |
"type": "string", | |
"index": "analyzed", | |
"analyzer": "keyword_lowercase" | |
} | |
} | |
} | |
} | |
} | |
} | |
# re-index docs: | |
POST elasticsearch-toronto/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
POST elasticsearch-toronto/members/13589068 | |
{ | |
"username": "Roman B.", | |
"location": "East York, ON", | |
"member_since": "November 29, 2014", | |
"introduction": "Full stack Dev", | |
"meetup_groups": [ | |
"Angularjs Toronto", | |
"Business Connection Exchange Toronto", | |
"Devops Toronto", | |
"Full stack Toronto Meetup", | |
"Docker Online Meetup" | |
], | |
"number_of_groups": 5, | |
"organizer": false, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" | |
} | |
# and check aggregation: | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups.keyword_lowercase", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# ----------------- | |
# END of Chapter 1 | |
# ----------------- | |
# Lessons: | |
# - SCHEMA-LESS EQUALS DEFAULT MAPPING | |
# - AGGREGATION FIELDS SHOULD NOT BE TOKENIZED | |
# - BUILD YOUR OWN ANALYZERS | |
# - USE MULTI-FIELDS | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------------------------------------------------- | |
# Elasticsearch-Toronto Meetup: The Power of Mapping (part 2 of 3) | |
# ---------------------------------------------------------------------------------------------------------------- | |
# My presentation at the first elasticsearch meetup in Toronto | |
# January 19, 2015 | |
# http://www.meetup.com/ElasticSearch-toronto | |
# http://www.meetup.com/Elasticsearch-Toronto/events/218903340 | |
# ---------------------------------------------------------------------------------------------------------------- | |
# Demo with elasticsearch 1.4.2 and Marvel/Sense | |
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html | |
# ---------------------------------------------------------------------------------------------------------------- | |
# ---------------------------------------- | |
# Chapter 2: WHEN SCHEMA-LESS MET LANGUAGE | |
# ---------------------------------------- | |
# (reminder) STORY: as a meetup organizer I want to FIND, SORT and AGGREGATE data about my group members in order to gain deeper insight about my group. | |
# PROBLEM (#1): ACCENTED CHARACTERS (é, à, ç, ü, ô, ...) | |
# add a member: | |
POST elasticsearch-toronto/members/22348381 | |
{ | |
"username": "Diego Muñoz Escalante", | |
"location": "Toronto, ON", | |
"member_since": "November 29, 2014", | |
"meetup_groups": [ | |
"Web Design Network", | |
"AngularJS Toronto", | |
"Ember Hack Night, Toronto", | |
"Full Stack Toronto Meetup" | |
], | |
"number_of_groups": 4, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/22348381" | |
} | |
# check: | |
GET elasticsearch-toronto/members/22348381 | |
# search by username | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"match": { | |
"username": "Munoz" | |
} | |
} | |
} | |
# Oops ... | |
# THIS PROBLEM APPLIES TO ALL ACCENTED CHARACTERS | |
# SOLTION (#1): ASCII FOLDING FILTER | |
# check | |
GET elasticsearch-toronto/_settings | |
# must close index to update analysis settings | |
POST elasticsearch-toronto/_close | |
# CUSTOM ANALYZER: ASCII FOLDING FILTER | |
PUT elasticsearch-toronto/_settings | |
{ | |
"analysis" : { | |
"analyzer":{ | |
"ascii_string":{ | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter" : ["asciifolding"] | |
} | |
} | |
} | |
} | |
# re-open index | |
POST elasticsearch-toronto/_open | |
# check settings | |
GET elasticsearch-toronto/_settings | |
# check analyzer | |
GET elasticsearch-toronto/_analyze?analyzer=ascii_string | |
{"Diego Muñoz Escalante"} | |
# update mapping | |
PUT elasticsearch-toronto/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"username": { | |
"type": "string", | |
"fields": { | |
"ascii": { | |
"type": "string", | |
"index": "analyzed", | |
"analyzer": "ascii_string" | |
} | |
} | |
} | |
} | |
} | |
} | |
# re-index doc | |
POST elasticsearch-toronto/members/22348381 | |
{ | |
"username": "Diego Muñoz Escalante", | |
"location": "Toronto, ON", | |
"member_since": "November 29, 2014", | |
"meetup_groups": [ | |
"Web Design Network", | |
"AngularJS Toronto", | |
"Ember Hack Night, Toronto", | |
"Full Stack Toronto Meetup" | |
], | |
"number_of_groups": 4, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/22348381" | |
} | |
# search again | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"match": { | |
"username.ascii": "Munoz" | |
} | |
} | |
} | |
# et voilà ! | |
# PROBLEM (#2): LANGUAGE SPECIFICS | |
# add a member | |
POST elasticsearch-toronto/members/8968154 | |
{ | |
"username": "Nick Van Weerdenburg", | |
"location": "Toronto, ON", | |
"member_since": "December 29, 2014", | |
"introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.", | |
"meetup_groups": [ | |
"Agile Experience Design Toronto", | |
"AngularJS Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)", | |
"(UXD / UX) User Experience Design Toronto", | |
"#DevTO" | |
], | |
"number_of_groups": 5, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154" | |
} | |
# check: | |
GET elasticsearch-toronto/members/8968154 | |
# search the introduction: | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"match": { | |
"introduction": "founders for developing specialized angular apps" | |
} | |
} | |
} | |
# WHAT JUST HAPPENED ??? | |
# There is a list of language-related issues: | |
# - singular/plural: 'founder' vs 'founders' | |
# - verb conjugation: 'specialized' vs 'specializing' | |
# - verb/noun forms: 'developing' vs 'development' | |
# - 'compounds': 'angular' vs 'AngluarJS' | |
# - stopwords: common words in the language that are usually irrelevant for free text search | |
# RESULT WAS CHOSEN ONLY BASED ON THE WORD 'FOR' | |
# LANGUAGE-DEPEDENT(!) SOLUTIONS (#2): | |
# - stopword lists | |
# - stemmers | |
# - word delimter | |
# check | |
GET elasticsearch-toronto/_settings | |
# must close index to update analysis settings | |
POST elasticsearch-toronto/_close | |
# CUSTOM ENGLISH ANALYZER | |
PUT elasticsearch-toronto/_settings | |
{ | |
"analysis" : { | |
"analyzer":{ | |
"english":{ | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter" : ["word_delimiter", "lowercase", "stop", "snowball"] | |
} | |
} | |
} | |
} | |
# re-open index | |
POST elasticsearch-toronto/_open | |
# check settings | |
GET elasticsearch-toronto/_settings | |
# check analyzer | |
GET elasticsearch-toronto/_analyze?analyzer=english | |
{"Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX."} | |
GET elasticsearch-toronto/_analyze?analyzer=english | |
{"founders for developing specialized angular apps"} | |
# update mapping | |
PUT elasticsearch-toronto/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"introduction": { | |
"type": "string", | |
"fields": { | |
"english": { | |
"type": "string", | |
"index": "analyzed", | |
"analyzer": "english" | |
} | |
} | |
} | |
} | |
} | |
} | |
# re-index doc | |
POST elasticsearch-toronto/members/8968154 | |
{ | |
"username": "Nick Van Weerdenburg", | |
"location": "Toronto, ON", | |
"member_since": "December 29, 2014", | |
"introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.", | |
"meetup_groups": [ | |
"Agile Experience Design Toronto", | |
"AngularJS Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)", | |
"(UXD / UX) User Experience Design Toronto", | |
"#DevTO" | |
], | |
"number_of_groups": 5, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154" | |
} | |
# search again: | |
GET elasticsearch-toronto/_search | |
{ | |
"query": { | |
"match": { | |
"introduction.english": "founders for developing specialized angular apps" | |
} | |
} | |
} | |
# And that's more like it! | |
# PROBLEM (#3): DATE FORMAT | |
# For example: | |
# How many members joined before December 2014? | |
# check mapping | |
GET elasticsearch-toronto/members/_mapping | |
# update mapping | |
PUT elasticsearch-toronto/members/_mapping | |
{ | |
"members": { | |
"properties": { | |
"member_since": { | |
"type": "string", | |
"fields": { | |
"date": { | |
"type": "date", | |
"format": "MMM dd, yyyy" | |
} | |
} | |
} | |
} | |
} | |
} | |
# check mapping | |
GET elasticsearch-toronto/members/_mapping | |
# re-index doc | |
POST elasticsearch-toronto/members/8968154 | |
{ | |
"username": "Nick Van Weerdenburg", | |
"location": "Toronto, ON", | |
"member_since": "December 29, 2014", | |
"introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.", | |
"meetup_groups": [ | |
"Agile Experience Design Toronto", | |
"AngularJS Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)", | |
"(UXD / UX) User Experience Design Toronto", | |
"#DevTO" | |
], | |
"number_of_groups": 5, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154" | |
} | |
# re-index doc | |
POST elasticsearch-toronto/members/22348381 | |
{ | |
"username": "Diego Muñoz Escalante", | |
"location": "Toronto, ON", | |
"member_since": "November 29, 2014", | |
"meetup_groups": [ | |
"Web Design Network", | |
"AngularJS Toronto", | |
"Ember Hack Night, Toronto", | |
"Full Stack Toronto Meetup" | |
], | |
"number_of_groups": 4, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/22348381" | |
} | |
# re-index doc | |
POST elasticsearch-toronto/members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
# date range aggregation: | |
GET elasticsearch-toronto/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"range": { | |
"date_range": { | |
"field": "member_since.date", | |
"format": "MM-yyy", | |
"ranges": [ | |
{ "to": "12-2014" }, | |
{ "from": "12-2014" } | |
] | |
} | |
} | |
} | |
} | |
# ----------------- | |
# END of Chapter 2 | |
# ----------------- | |
# Lessons: | |
# - DESIGN YOUR INDEX FOR YOUR TARGET LANGUAGES | |
# - LANGUAGE SPECIFIC SOLUTIONS ARE NOT TRIVIAL | |
# - THIS IS JUST THE TIP OF THE ICEBERG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------------------------------------------------- | |
# Elasticsearch-Toronto Meetup: The Power of Mapping (part 3 of 3) | |
# ---------------------------------------------------------------------------------------------------------------- | |
# My presentation at the first elasticsearch meetup in Toronto | |
# January 19, 2015 | |
# http://www.meetup.com/ElasticSearch-toronto | |
# http://www.meetup.com/Elasticsearch-Toronto/events/218903340 | |
# ---------------------------------------------------------------------------------------------------------------- | |
# Demo with elasticsearch 1.4.2 and Marvel/Sense | |
# Installation: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_installing_elasticsearch.html | |
# ---------------------------------------------------------------------------------------------------------------- | |
# ---------------------------------- | |
# Chapter 3: PUTTING IT ALL TOGETHER | |
# ---------------------------------- | |
# IN REAL-WORLD APPLICATIONS YOU MAY HAVE TO INDEX A LARGE NUMBER OF FIELDS THAT REQUIRE AGGREGATION, LANGUAGE-SPECIFIC SEARCH, SORTING AND OTHER TYPES OF CUSTOM ANALYSIS | |
# THIS RESULTS IN VERY LONG MAPPING DEFINITIONS THAT ARE HARD TO MAINTAIN | |
# DYNAMIC TEMPLATES MAY PROVIDE ELEGANT SOLUTIONS FOR SUCH CASES | |
# check | |
GET elasticsearch-toronto/_settings | |
# must close index to update analysis settings | |
POST elasticsearch-toronto/_close | |
# CUSTOM ANALYZERS | |
PUT elasticsearch-toronto/_settings | |
{ | |
"analysis" : { | |
"analyzer":{ | |
"keyword_lowercase":{ | |
"type": "custom", | |
"tokenizer": "keyword", | |
"filter": ["lowercase"] | |
}, | |
"keyword_lowercase_truncate":{ | |
"type": "custom", | |
"tokenizer": "keyword", | |
"filter": [ | |
"lowercase", | |
"custom_truncate" | |
] | |
}, | |
"english":{ | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter" : [ | |
"asciifolding", | |
"word_delimiter", | |
"lowercase", | |
"stop", | |
"snowball" | |
] | |
} | |
}, | |
"filter" : { | |
"custom_truncate": { | |
"type": "truncate", | |
"length": "10" | |
} | |
} | |
} | |
} | |
# check | |
GET elasticsearch-toronto/_settings | |
# re-open index | |
POST elasticsearch-toronto/_open | |
# DYNAMIC TEMPLATES | |
# update mapping | |
PUT elasticsearch-toronto/_default_/_mapping | |
{ | |
"_default_": { | |
"dynamic_templates" : [ | |
{ | |
"copy_to_not_analyzed" : { | |
"path_match" : ".+_not_analyzed$", | |
"match_pattern" : "regex", | |
"mapping" : { | |
"type" : "string", | |
"index" : "not_analyzed", | |
"ignore_above" : 200 | |
} | |
} | |
}, | |
{ | |
"copy_to_keyword_lowercase" : { | |
"path_match" : ".+_keyword_lowercase$", | |
"match_pattern" : "regex", | |
"mapping" : { | |
"type" : "string", | |
"analyzer" : "keyword_lowercase", | |
"ignore_above" : 200 | |
} | |
} | |
}, | |
{ | |
"copy_to_sort" : { | |
"path_match" : ".+_sort$", | |
"match_pattern" : "regex", | |
"mapping" : { | |
"type" : "string", | |
"analyzer": "keyword_lowercase_truncate" | |
} | |
} | |
}, | |
{ | |
"default_english_string" : { | |
"match" : "*", | |
"match_mapping_type" : "string", | |
"mapping" : { | |
"type" : "string", | |
"analyzer": "english", | |
"copy_to" : [ | |
"{name}_not_analyzed", | |
"{name}_keyword_lowercase", | |
"{name}_sort" | |
] | |
} | |
} | |
} | |
] | |
} | |
} | |
# check mapping | |
GET elasticsearch-toronto/_mapping | |
# index doc | |
POST elasticsearch-toronto/template_members/8968154 | |
{ | |
"username": "Nick Van Weerdenburg", | |
"location": "Toronto, ON", | |
"member_since": "December 29, 2014", | |
"introduction": "Founder of http://rangle.io, a next-generation web and UX development firm specializing in AngularJS, Node, modern JS, and Lean UX.", | |
"meetup_groups": [ | |
"Agile Experience Design Toronto", | |
"AngularJS Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)", | |
"(UXD / UX) User Experience Design Toronto", | |
"#DevTO" | |
], | |
"number_of_groups": 5, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/8968154" | |
} | |
# index doc | |
POST elasticsearch-toronto/template_members/22348381 | |
{ | |
"username": "Diego Muñoz Escalante", | |
"location": "Toronto, ON", | |
"member_since": "November 29, 2014", | |
"meetup_groups": [ | |
"Web Design Network", | |
"AngularJS Toronto", | |
"Ember Hack Night, Toronto", | |
"Full Stack Toronto Meetup" | |
], | |
"number_of_groups": 4, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/22348381" | |
} | |
# index doc | |
POST elasticsearch-toronto/template_members/182513481 | |
{ | |
"username": "Amit", | |
"location": "Toronto, ON", | |
"member_since": "November 25, 2014", | |
"introduction": "You know for search", | |
"meetup_groups": [ | |
"(UXD / UX) User Experience Design Toronto", | |
"AngularJS Toronto", | |
"Big Data Developers in Toronto", | |
"DevOps Toronto", | |
"Full Stack Toronto Meetup", | |
"Meteor Toronto", | |
"PhoneGap Toronto (#PhoneGapTO)" | |
], | |
"number_of_groups": 7, | |
"organizer": true, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/182513481" | |
} | |
# index doc | |
POST elasticsearch-toronto/template_members/13589068 | |
{ | |
"username": "Roman B.", | |
"location": "East York, ON", | |
"member_since": "November 29, 2014", | |
"introduction": "Full stack Dev", | |
"meetup_groups": [ | |
"Angularjs Toronto", | |
"Business Connection Exchange Toronto", | |
"Devops Toronto", | |
"Full stack Toronto Meetup", | |
"Docker Online Meetup" | |
], | |
"number_of_groups": 5, | |
"organizer": false, | |
"link": "http://www.meetup.com/Elasticsearch-Toronto/members/13589068" | |
} | |
# check mapping | |
GET elasticsearch-toronto/template_members/_mapping | |
# REVISIT PREVIOUS EXAMPLES | |
# aggregation of meetup group: | |
GET elasticsearch-toronto/template_members/_search | |
{ | |
"size": 0, | |
"query": { | |
"match_all": {} | |
}, | |
"aggs": { | |
"groups": { | |
"terms": { | |
"field": "meetup_groups_keyword_lowercase", | |
"size": 20 | |
} | |
} | |
} | |
} | |
# language specific issues: | |
GET elasticsearch-toronto/template_members/_search | |
{ | |
"query": { | |
"match": { | |
"introduction": "founders for developing specialized angular apps" | |
} | |
} | |
} | |
# accented characters: | |
GET elasticsearch-toronto/template_members/_search | |
{ | |
"query": { | |
"match": { | |
"username": "Munoz" | |
} | |
} | |
} | |
# final example, sort all | |
GET elasticsearch-toronto/template_members/_search | |
{ | |
"query": { | |
"match_all": {} | |
}, | |
"sort" : [ | |
{ | |
"username_sort" : { | |
"order" : "asc" | |
} | |
} | |
], | |
fields: [ | |
"username" | |
] | |
} | |
# ----------------- | |
# END of Chapter 3 | |
# ----------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment