Skip to content

Instantly share code, notes, and snippets.

@mihaichris
Last active October 1, 2022 13:15
Show Gist options
  • Save mihaichris/2a9c85a6697bb8e935046661840b2f3f to your computer and use it in GitHub Desktop.
Save mihaichris/2a9c85a6697bb8e935046661840b2f3f to your computer and use it in GitHub Desktop.
This file is designed to capture a few useful commands to do actions with the OpenSearch engine.
from opensearchpy import OpenSearch
host = 'localhost'
port = 9200
auth = ('admin', 'admin') # For testing only. Don't store credentials in code.
# Create the client with SSL/TLS enabled, but hostname and certificate verification disabled.
client = OpenSearch(
hosts=[{'host': host, 'port': port}],
http_compress=True, # enables gzip compression for request bodies
http_auth=auth,
# client_cert = client_cert_path,
# client_key = client_key_path,
use_ssl=True,
verify_certs=False,
ssl_assert_hostname=False,
ssl_show_warn=False,
)
# Do a few checks before we start indexing:
print(client.cat.health())
print(client.cat.indices())
# Create an index with non-default settings.
index_name = 'searchml_revisited'
index_body = {
'settings': {
'index': {
'query': {
'default_field': "body"
}
}
}
}
client.indices.create(index_name, body=index_body)
# Add sample document to the index.
docs = [
{
"id": "doc_a",
"title": "Fox and Hounds",
"body": "The quick red fox jumped over the lazy brown dogs.",
"price": "5.99",
"in_stock": True,
"category": "childrens"},
{
"id": "doc_b",
"title": "Fox wins championship",
"body": "Wearing all red, the Fox jumped out to a lead in the race over the Dog.",
"price": "15.13",
"in_stock": True,
"category": "sports"},
{
"id": "doc_c",
"title": "Lead Paint Removal",
"body": "All lead must be removed from the brown and red paint.",
"price": "150.21",
"in_stock": False,
"category": "instructional"},
{
"id": "doc_d",
"title": "The Three Little Pigs Revisted",
"price": "3.51",
"in_stock": True,
"body": "The big, bad wolf huffed and puffed and blew the house down. The end.",
"category": "childrens"}
]
for doc in docs:
doc_id = doc["id"]
print("Indexing {}".format(doc_id))
client.index(
index=index_name,
body=doc,
id=doc_id,
refresh=True
)
# Verify they are in:
print(client.cat.count(index_name, params={"v": "true"}))
# Get the index mappings
print(client.indices.get_mapping(index_name))
# Create a new index, this time with different mappings
index_name = 'searchml_revisited_custom_mappings'
index_body = {
'settings': {
'index': {
'query': {
'default_field': "body"
}
}
},
"mappings": {
"properties": {
"title": {"type": "text", "analyzer": "english"},
"body": {"type": "text", "analyzer": "english"},
"in_stock": {"type": "boolean"},
"category": {"type": "keyword", "ignore_above": "256"},
"price": {"type": "float"}
}
}
}
client.indices.create(index_name, body=index_body)
for doc in docs:
doc_id = doc["id"]
print("Indexing {}".format(doc_id))
response = client.index(
index=index_name,
body=doc,
id=doc_id,
refresh=True
)
print('\n\tResponse:')
print(response)
# Do some searches
q = 'dogs'
query = {
'size': 5,
'query': {
'multi_match': {
'query': q,
'fields': ['title^2', 'body']
}
}
}
client.search(
body=query,
index=index_name
)
# try a phrase query
q = 'fox dog'
query = {
'size': 5,
'query': {
'match_phrase': {
'body': {"query": q}
}
}
}
client.search(
body=query,
index=index_name
)
# try a phrase query with slop
q = 'fox dog'
query = {
'size': 5,
'query': {
'match_phrase': {
'body': {"query": q, "slop": 10}
}
}
}
client.search(
body=query,
index=index_name
)
# try a match all query with a filter and a price factor
query = {
'size': 5,
'query': {
"function_score": {
"query": {
"bool": {
"must": [
{"match_all": {}}
],
"filter": [
{"term": {"category": "childrens"}}
]
}
},
"field_value_factor": {
"field": "price",
"missing": 1
}
}
}
}
client.search(
body=query,
index=index_name
)
###################
# Aggregations
query = {
'size': 0,
'query': {
"match_all": {}
},
'aggs': {
"category": {
"terms": {
"field": "category",
"size": 10,
"missing": "N/A",
"min_doc_count": 0
}
}
}
}
client.search(
body=query,
index=index_name
)
# Terms on price
query = {
'size': 0,
'query': {
"match_all": {}
},
'aggs': {
"price": {
"terms": {
"field": "price",
"size": 10,
"min_doc_count": 0
}
}
}
}
client.search(
body=query,
index=index_name
)
# Range aggregation
query = {
'size': 0,
'query': {
"match_all": {}
},
'aggs': {
"price": {
"range": {
"field": "price",
"ranges": [
{
"to": 5
},
{
"from": 5,
"to": 20
},
{
"from": 20,
}
]
}
}
}
}
client.search(
body = query,
index = index_name
)
######################################
#####
##### DANGER!!!!!!!!!!!
#####
######################################
# if you want to delete the documents, but keep the index, run the following:
for doc in docs:
doc_id = doc["id"]
print("Indexing {}".format(doc_id))
response = client.delete(
index = index_name,
id = doc_id,
)
print('\n\tResponse:')
print(response)
# If at any time you want to start over, run this command to delete the index and then you can start from the top
# Delete the index.
response = client.indices.delete(
index = index_name
)
print('\nDeleting index:')
print(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment