Skip to content

Instantly share code, notes, and snippets.

@tridungduong16
Last active October 30, 2018 04:33
Show Gist options
  • Save tridungduong16/0f8acb01612bd56e35739b76b97a37c0 to your computer and use it in GitHub Desktop.
Save tridungduong16/0f8acb01612bd56e35739b76b97a37c0 to your computer and use it in GitHub Desktop.

Types of query

Match query

Là truy vấn chuẩn để thực hiện full text query. Bao gồm truy vấn kết hợp và truy vấn cụm từ hoặc gần đúng. Match query chấp nhận văn bản, số, ngày tháng. Match query trả về các document chứa ít nhất 1 trong các từ trong truy vấn.

{
    "query" : {
        "match" : {

            "content" : "ruby python",
            "title" : "books for ruby"

        }
    }

}
    query = {
        "query" : {
            "bool": {
                "should": [
                {
                    "multi_match" : {
                        "query" : term_query,
                        "type": "phrase",
                        "fields": ["title", "title_vie","title_origin", "actors^1.5", "extracted_tags_genre^10"],
                    }
                },
                {
                    "multi_match" : {
                        "query" : term_query,
                        "type": "cross_fields",
                        "fields": ["title", "title_vie","title_origin", "actors^1.5", "extracted_tags_genre^10"],
                    }
                }
                ]
            }
        }
    }
    

Match pharse query

Match prefix query

String query

Range query

{
    "range" : {
        "age" : {
            "gte" : 10,
            "lte" : 20
        }
    }
}

Multi match query

{
    'query' : {
        'multi_match' : {
            "query" : "ruby",
            "fields" : ["title","content"]
        
        }
    
    }

}

Multi layer

Bool query

POST _search
{
  "query": {
    "bool" : {
      "must" : {
        "term" : { "user" : "doankun" }
      },
      "filter": {
        "term" : { "tag" : "dep" }
      },
      "must_not" : {
        "range" : {
          "age" : { "gte" : 20, "lte" : 30 }
        }
      },
      "should" : [
        { "term" : { "tag" : "hi" } },
        { "term" : { "tag" : "elasticsearch" } }
      ],
      "minimum_should_match" : 1,
      "boost" : 1.0
    }
  }
}

Field value factor

GET /_search
{
    "query": {
        "function_score": {
            "field_value_factor": {
                "field": "likes",
                "factor": 1.2,
                "modifier": "sqrt",
                "missing": 1
            }
        }
    }
}
"query": {
    "function_score": {
        "query": {
            "multi_match": {
                "query": "Lambda Expressions",
                "fields": [ "title", "tags^0.8", "speakers.name^0.6", "description^0.3" ]
            }
        },
        "field_value_factor": {
            "field":    "counters.views",
            "modifier": "log1p",
            "factor":   2
        }
    }
}

sqrt(1.2 * doc['likes'].value)

Scripting score

query6 = {
    "query": {
        "function_score": {
            "query": {

                "multi_match" : {

                    "query" : "search engine",

                    "fields": ["title", "summary"]

                }

            },
            
            "functions": [{
                "field_value_factor": {
                        "field": "num_reviews"
               }
            }
         ],
        "boost_mode": "sum"
      }
   },
    "size" : 2
}
query = {
    "query": {
        "function_score": {
            "query": {

                "multi_match" : {

                    "query" : "conan",

                    "fields": ["title", "description"]

                }

            },
            "script_score" : {
                "script" : {
                    "params": {
                        "a": 5,
                        "b": 1.2
                    },
                    "source": "_score + Math.log(2 + doc['viewCount'].value) + Math.log(doc['count'].value)*params.a"
                }
            }
        }
    }
}

IF-ELSE condition

script = "if (doc['viewCount'].value < 1000) { return _score * doc['viewCount'].value } return 1000;"
query = {
    "query": {
        "function_score": {
            "query": {

                "multi_match" : {

                    "query" : "conan",

                    "fields": ["title", "description"]

                }

            },
            "script_score" : {
                "script" : {
                    "params": {
                        "a": 5,
                        "b": 1.2
                    },
                    "source": script
                }
            }
        }
    }
}

Fuzzy search

query = {
  "query": {
    "multi_match": {
      "fields":  [ "title" , "description"],
      "query":     "mat danh",
      "fuzziness": "5"
    }
  }
}

result = es.search(index='fplay',body=query) result

new_score = old_score * log(1 + factor * number_of_votes)

boost_mode = "multiply", "sum", "avg", "min", "max"

ES Python

Create data with pandas

import json
import pandas as pd
import requests

# df is a dataframe or dataframe chunk coming from your reading logic
df = pd.read_csv("/home/duongtridung/Downloads/search_meta_data(2).csv")
df_as_json = df.to_json(orient='records', lines=True)
final_json_string = ''
for json_document in df_as_json.split('\n'):
    jdict = json.loads(json_document)
    metadata = json.dumps({'index': {'_id': jdict['_id']}})
    jdict.pop('_id')
    final_json_string += metadata + '\n' + json.dumps(jdict) + '\n'

headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
# r = requests.post('http://elasticsearch.host:9200/new_doc/docs/_bulk', data=final_json_string, headers=headers, timeout=60)
r = requests.post('http://localhost:9200/fplay/docs/_bulk', data=final_json_string, headers=headers, timeout=60)

Term suggestion

POST /fplay/_search?pretty
{
"suggest": 
    {"my-suggestion": 
      { "text": "conann", 
        "term": { "field":"title"}
}
}
}

Create completion field

PUT /fplay_view_count/docs/_mapping
{
        "properties" : {
            "description" : { "type" : "completion",
                          "analyzer" : "simple",
                          "search_analyzer" : "simple"
            }
         }
}

Update Data

resp = es.update(index='books', doc_type="articles", id=4, body={"doc": {"num_reviews":40}})

References

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment