spinscale/training-examples.json

## training-examples.json
##### decompounder example
DELETE decompound

PUT decompound
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_decompound_analyzer" : {
          "type" : "custom",
          "tokenizer" : "standard",
          "filter" : [ "lowercase", "my_decompound_filter" ]
        },
        "my_decompound_analyzer_unique" : {
          "type" : "custom",
          "tokenizer" : "standard",
          "filter" : [ "lowercase", "my_decompound_filter", "unique" ]
        }
      },
      "filter": {
        "my_decompound_filter" : {
          "type" : "dictionary_decompounder",
          "word_list" : [ "topf", "schiff" ]
        }
      }
    }
  },
  "mappings": {
    "doc" : {
      "properties": {
        "name" : {
          "type": "text",
          "analyzer": "my_decompound_analyzer",
          "fields": {
            "unique" : {
              "type": "text",
              "analyzer": "my_decompound_analyzer_unique"
            }
          }
        }
      }
    }
  }
}

GET decompound/_settings

POST decompound/_analyze
{
  "text": "blumentopf"
}

POST decompound/_analyze
{
  "text": "blumentOPF",
  "filter": [ "lowercase", "my_decompound_filter" ],
  "tokenizer": "standard"
}

POST decompound/_analyze
{
  "text": "blumentopf",
  "analyzer": "my_decompound_analyzer"
}

POST decompound/_analyze
{
  "text": "dampfschiff",
  "analyzer": "my_decompound_analyzer"
}

POST decompound/_analyze
{
  "text": "blumentopf blumentopf blumentopf",
  "analyzer": "my_decompound_analyzer"
}

POST decompound/_analyze
{
  "text": "blumentopf blumentopf blumentopf",
  "analyzer": "my_decompound_analyzer_unique"
}


PUT decompound/doc/1
{
  "name" : "ein wunderschöner blumentopf"
}

GET decompound/doc/_search
{
  "query": {
    "match": {
      "name": "topf"
    }
  }
}

GET decompound/doc/_search
{
  "query": {
    "match": {
      "name.unique": "topf"
    }
  }
}

# https://github.com/jprante/elasticsearch-analysis-decompound


# reverse token filter, suffix based searching
DELETE reverse

PUT reverse
{
  "settings": {
    "analysis": {
      "analyzer": {
        "reverse" : {
          "type" : "custom",
          "tokenizer" : "standard",
          "filter" : [ "lowercase", "reverse" ]
        }
      }
    }
  },
  "mappings": {
    "doc" : {
      "properties": {
        "name" : {
          "type": "text",
          "fields": {
            "reverse" : {
              "type": "text",
              "analyzer": "reverse"
            }
          }
        }
      }
    }
  }
}

PUT reverse/doc/1
{
  "name" : "blumentopf"
}

# inefficient, inverted index traversal is tricky
GET reverse/_search
{
  "query": {
    "wildcard": {
      "name": {
        "value": "*topf"
      }
    }
  }
}

GET reverse/_search
{
  "query": {
    "prefix": {
      "name.reverse": {
        "value": "fpot"
      }
    }
  }
}

# highlight with synonyms
DELETE synonyms

PUT synonyms
{
  "settings": {
    "analysis": {
      "analyzer": {
        "query_analyzer_with_synonyms": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "my_synonyms"
          ]
        }
      },
      "filter": {
        "my_synonyms": {
          "type": "synonym_graph",
          "synonyms": [
            "bmw, bayrische motorenwerke",
            "vw, volkswagen"
          ]
        }
      }
    }
  },
  "mappings": {
    "doc": {
      "properties": {
        "name": {
          "type": "text",
          "analyzer": "standard",
          "search_analyzer": "query_analyzer_with_synonyms"
        }
      }
    }
  }
}

PUT synonyms/doc/1
{
  "name" : "Volkswagen Golf"
}

GET synonyms/_search
{
  "query": {
    "match": {
      "name": "vw"
    }
  },
  "highlight": {
    "fields": {
      "name": {}
    }
  }
}

PUT synonyms/doc/2
{
  "name" : "This is a BMW 320i, which is a nice car. BMW stands for Bayerische Motorenwerke"
}

GET synonyms/_search
{
  "query": {
    "match": {
      "name": "bmw"
    }
  },
  "highlight": {
    "fields": {
      "name": {}
    }
  }
}

GET synonyms/_search
{
  "query": {
    "match": {
      "name": "bayrische motorenwerke"
    }
  },
  "highlight": {
    "fields": {
      "name": {
        "type" : "plain"
      }
    }
  }
}

GET synonyms/_search
{
  "query": {
    "match": {
      "name": "bayrische motorenwerke"
    }
  },
  "highlight": {
    "fields": {
      "name": {
        "type" : "unified"
      }
    }
  }
}


# italian job
DELETE join

PUT join
{
  "mappings": {
    "doc": {
      "properties": {
        "my_join_field": {
          "type": "join",
          "relations": {
            "company": "preferred_company"
          }
        }
      }
    }
  }
}

PUT join/doc/1
{
  "text": "Amazon Logistics GmbH",
  "my_join_field": {
    "name": "company"
  }
}

PUT join/doc/2
{
  "text": "Amazon fulfilment A/S",
  "my_join_field": {
    "name": "company"
  }
}

GET join/_search
{
  "query": {
    "match": {
      "text": "amazon"
    }
  }
}

# alex prefers the logistics gmbh
PUT join/doc/3?routing=1
{
  "type" : "preferred_company",
  "company": "Amazon Logistics GmbH",
  "user" : "alex",
  "my_join_field": {
    "name": "preferred_company",
    "parent" : "1"
  }
}

PUT join/doc/4?routing=2
{
  "type" : "preferred_company",
  "text": "Amazon fulfilment A/S",
  "user" : "paul",
  "my_join_field": {
    "name": "preferred_company",
    "parent" : "2"
  }
}

GET join/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "text": "amazon"
          }
        }
      ],
      "filter": {
        "term": {
          "my_join_field.name": "company"
        }
      },
      "should": [
        {
          "has_child": {
            "type": "preferred_company",
            "query": {
              "match": {
                "user": "alex"
              }
            }
          }
        }
      ]
    }
  }
}

GET join/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "text": "amazon"
          }
        }
      ],
      "should": [
        {
          "has_child": {
            "type": "preferred_company",
            "query": {
              "match": {
                "user": "paul"
              }
            },
            "inner_hits": {}
          }
        }
      ]
    }
  }
}


# bin/elasticsearch-plugin install analysis-phonetic
DELETE phonetic_sample

PUT phonetic_sample
{
  "mappings": {
    "person" : {
      "properties": {
        "name" : {
          "type": "text",
          "analyzer": "my_analyzer",
          "fields": {
            "koelner" : {
              "type": "text",
              "analyzer": "koelner_analyzer"
            }
          }
        }
      }
    }
  },
  "settings": {
    "index": {
      "analysis": {
        "analyzer": {
          "my_analyzer": {
            "tokenizer": "standard",
            "filter": [
              "standard",
              "lowercase",
              "my_metaphone"
            ]
          },
          "koelner_analyzer": {
            "tokenizer": "standard",
            "filter": [
              "standard",
              "lowercase",
              "koelner_metaphone"
            ]
          }
        },
        "filter": {
          "my_metaphone": {
            "type": "phonetic",
            "encoder": "metaphone",
            "replace": false
          },
          "koelner_metaphone": {
            "type": "phonetic",
            "encoder": "koelnerphonetik",
            "replace": false
          }
        }
      }
    }
  }
}

POST phonetic_sample/_analyze
{
  "analyzer": "my_analyzer",
  "text": "Joe Blocks"
}

POST phonetic_sample/_analyze
{
  "analyzer": "koelner_analyzer",
  "text": "Aleksander"
}


PUT phonetic_sample/person/1
{
  "name" : "Peter Meyer"
}

PUT phonetic_sample/person/2
{
  "name" : "Peter Meier"
}

PUT phonetic_sample/person/3
{
  "name" : "Peter Maier"
}

PUT phonetic_sample/person/4
{
  "name" : "Peter Mayer"
}

GET phonetic_sample/_search
{
  "query": {
    "match": {
      "name": "Maier"
    }
  }
}

GET phonetic_sample/_search
{
  "query": {
    "match": {
      "name.koelner": "Maier"
    }
  }
}
	##### decompounder example
	DELETE decompound

	PUT decompound
	{
	"settings": {
	"analysis": {
	"analyzer": {
	"my_decompound_analyzer" : {
	"type" : "custom",
	"tokenizer" : "standard",
	"filter" : [ "lowercase", "my_decompound_filter" ]
	},
	"my_decompound_analyzer_unique" : {
	"type" : "custom",
	"tokenizer" : "standard",
	"filter" : [ "lowercase", "my_decompound_filter", "unique" ]
	}
	},
	"filter": {
	"my_decompound_filter" : {
	"type" : "dictionary_decompounder",
	"word_list" : [ "topf", "schiff" ]
	}
	}
	}
	},
	"mappings": {
	"doc" : {
	"properties": {
	"name" : {
	"type": "text",
	"analyzer": "my_decompound_analyzer",
	"fields": {
	"unique" : {
	"type": "text",
	"analyzer": "my_decompound_analyzer_unique"
	}
	}
	}
	}
	}
	}
	}

	GET decompound/_settings

	POST decompound/_analyze
	{
	"text": "blumentopf"
	}

	POST decompound/_analyze
	{
	"text": "blumentOPF",
	"filter": [ "lowercase", "my_decompound_filter" ],
	"tokenizer": "standard"
	}

	POST decompound/_analyze
	{
	"text": "blumentopf",
	"analyzer": "my_decompound_analyzer"
	}

	POST decompound/_analyze
	{
	"text": "dampfschiff",
	"analyzer": "my_decompound_analyzer"
	}

	POST decompound/_analyze
	{
	"text": "blumentopf blumentopf blumentopf",
	"analyzer": "my_decompound_analyzer"
	}

	POST decompound/_analyze
	{
	"text": "blumentopf blumentopf blumentopf",
	"analyzer": "my_decompound_analyzer_unique"
	}


	PUT decompound/doc/1
	{
	"name" : "ein wunderschöner blumentopf"
	}

	GET decompound/doc/_search
	{
	"query": {
	"match": {
	"name": "topf"
	}
	}
	}

	GET decompound/doc/_search
	{
	"query": {
	"match": {
	"name.unique": "topf"
	}
	}
	}

	# https://github.com/jprante/elasticsearch-analysis-decompound


	# reverse token filter, suffix based searching
	DELETE reverse

	PUT reverse
	{
	"settings": {
	"analysis": {
	"analyzer": {
	"reverse" : {
	"type" : "custom",
	"tokenizer" : "standard",
	"filter" : [ "lowercase", "reverse" ]
	}
	}
	}
	},
	"mappings": {
	"doc" : {
	"properties": {
	"name" : {
	"type": "text",
	"fields": {
	"reverse" : {
	"type": "text",
	"analyzer": "reverse"
	}
	}
	}
	}
	}
	}
	}

	PUT reverse/doc/1
	{
	"name" : "blumentopf"
	}

	# inefficient, inverted index traversal is tricky
	GET reverse/_search
	{
	"query": {
	"wildcard": {
	"name": {
	"value": "*topf"
	}
	}
	}
	}

	GET reverse/_search
	{
	"query": {
	"prefix": {
	"name.reverse": {
	"value": "fpot"
	}
	}
	}
	}

	# highlight with synonyms
	DELETE synonyms

	PUT synonyms
	{
	"settings": {
	"analysis": {
	"analyzer": {
	"query_analyzer_with_synonyms": {
	"type": "custom",
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"my_synonyms"
	]
	}
	},
	"filter": {
	"my_synonyms": {
	"type": "synonym_graph",
	"synonyms": [
	"bmw, bayrische motorenwerke",
	"vw, volkswagen"
	]
	}
	}
	}
	},
	"mappings": {
	"doc": {
	"properties": {
	"name": {
	"type": "text",
	"analyzer": "standard",
	"search_analyzer": "query_analyzer_with_synonyms"
	}
	}
	}
	}
	}

	PUT synonyms/doc/1
	{
	"name" : "Volkswagen Golf"
	}

	GET synonyms/_search
	{
	"query": {
	"match": {
	"name": "vw"
	}
	},
	"highlight": {
	"fields": {
	"name": {}
	}
	}
	}

	PUT synonyms/doc/2
	{
	"name" : "This is a BMW 320i, which is a nice car. BMW stands for Bayerische Motorenwerke"
	}

	GET synonyms/_search
	{
	"query": {
	"match": {
	"name": "bmw"
	}
	},
	"highlight": {
	"fields": {
	"name": {}
	}
	}
	}

	GET synonyms/_search
	{
	"query": {
	"match": {
	"name": "bayrische motorenwerke"
	}
	},
	"highlight": {
	"fields": {
	"name": {
	"type" : "plain"
	}
	}
	}
	}

	GET synonyms/_search
	{
	"query": {
	"match": {
	"name": "bayrische motorenwerke"
	}
	},
	"highlight": {
	"fields": {
	"name": {
	"type" : "unified"
	}
	}
	}
	}



	# italian job
	DELETE join

	PUT join
	{
	"mappings": {
	"doc": {
	"properties": {
	"my_join_field": {
	"type": "join",
	"relations": {
	"company": "preferred_company"
	}
	}
	}
	}
	}
	}

	PUT join/doc/1
	{
	"text": "Amazon Logistics GmbH",
	"my_join_field": {
	"name": "company"
	}
	}

	PUT join/doc/2
	{
	"text": "Amazon fulfilment A/S",
	"my_join_field": {
	"name": "company"
	}
	}

	GET join/_search
	{
	"query": {
	"match": {
	"text": "amazon"
	}
	}
	}

	# alex prefers the logistics gmbh
	PUT join/doc/3?routing=1
	{
	"type" : "preferred_company",
	"company": "Amazon Logistics GmbH",
	"user" : "alex",
	"my_join_field": {
	"name": "preferred_company",
	"parent" : "1"
	}
	}

	PUT join/doc/4?routing=2
	{
	"type" : "preferred_company",
	"text": "Amazon fulfilment A/S",
	"user" : "paul",
	"my_join_field": {
	"name": "preferred_company",
	"parent" : "2"
	}
	}

	GET join/_search
	{
	"query": {
	"bool": {
	"must": [
	{
	"match": {
	"text": "amazon"
	}
	}
	],
	"filter": {
	"term": {
	"my_join_field.name": "company"
	}
	},
	"should": [
	{
	"has_child": {
	"type": "preferred_company",
	"query": {
	"match": {
	"user": "alex"
	}
	}
	}
	}
	]
	}
	}
	}

	GET join/_search
	{
	"query": {
	"bool": {
	"must": [
	{
	"match": {
	"text": "amazon"
	}
	}
	],
	"should": [
	{
	"has_child": {
	"type": "preferred_company",
	"query": {
	"match": {
	"user": "paul"
	}
	},
	"inner_hits": {}
	}
	}
	]
	}
	}
	}





	# bin/elasticsearch-plugin install analysis-phonetic
	DELETE phonetic_sample

	PUT phonetic_sample
	{
	"mappings": {
	"person" : {
	"properties": {
	"name" : {
	"type": "text",
	"analyzer": "my_analyzer",
	"fields": {
	"koelner" : {
	"type": "text",
	"analyzer": "koelner_analyzer"
	}
	}
	}
	}
	}
	},
	"settings": {
	"index": {
	"analysis": {
	"analyzer": {
	"my_analyzer": {
	"tokenizer": "standard",
	"filter": [
	"standard",
	"lowercase",
	"my_metaphone"
	]
	},
	"koelner_analyzer": {
	"tokenizer": "standard",
	"filter": [
	"standard",
	"lowercase",
	"koelner_metaphone"
	]
	}
	},
	"filter": {
	"my_metaphone": {
	"type": "phonetic",
	"encoder": "metaphone",
	"replace": false
	},
	"koelner_metaphone": {
	"type": "phonetic",
	"encoder": "koelnerphonetik",
	"replace": false
	}
	}
	}
	}
	}
	}

	POST phonetic_sample/_analyze
	{
	"analyzer": "my_analyzer",
	"text": "Joe Blocks"
	}

	POST phonetic_sample/_analyze
	{
	"analyzer": "koelner_analyzer",
	"text": "Aleksander"
	}


	PUT phonetic_sample/person/1
	{
	"name" : "Peter Meyer"
	}

	PUT phonetic_sample/person/2
	{
	"name" : "Peter Meier"
	}

	PUT phonetic_sample/person/3
	{
	"name" : "Peter Maier"
	}

	PUT phonetic_sample/person/4
	{
	"name" : "Peter Mayer"
	}

	GET phonetic_sample/_search
	{
	"query": {
	"match": {
	"name": "Maier"
	}
	}
	}

	GET phonetic_sample/_search
	{
	"query": {
	"match": {
	"name.koelner": "Maier"
	}
	}
	}