Skip to content

Instantly share code, notes, and snippets.

@glallen01
Last active September 4, 2019 11:35
Show Gist options
  • Save glallen01/ebd4ee2f6d92c0a4ec6f0c4e93d66e43 to your computer and use it in GitHub Desktop.
Save glallen01/ebd4ee2f6d92c0a4ec6f0c4e93d66e43 to your computer and use it in GitHub Desktop.
delete by query test
# docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.3.1
strings_to_delete=(
"pktdatEnc="
"Interface statistics report"
"crying"
)
IFS=$'\n' # dont' break strings_to_delete on words, only newlines (default IFS=$' \t\n')
# TODO - wrap into one query
delete_query_body='{"query":{"bool":{"must":[{"bool":{"should":['
for string in ${strings_to_delete[@]}; do
delete_query_body+="{\"match\": { \"message\": \"${string}\" }},"
done
# delete the extra comma
delete_query_body=${delete_query_body%?}
delete_query_body+='],"minimum_number_should_match": 1}}]}}}'
echo ${delete_query_body}
url="http://localhost:9200"
index=twitter
curl -XGET "${url}/"
sleep 5
# TODO - for testing only
curl -XDELETE "${url}/${index}"
# put sample data:
for number in $(seq -w 00 99); do
curl -X POST "${url}/${index}/tweet/?pretty" -H 'Content-Type: application/json' -d"
{
\"user\" : \"kimchy\",
\"post_date\" : \"2009-11-15T14:12:12\",
\"message\" : \"trying out Elasticsearch\",
\"number\" : \"${number}\"
}
"
curl -X POST "${url}/${index}/tweet/?pretty" -H 'Content-Type: application/json' -d"
{
\"user\" : \"kimchi\",
\"post_date\" : \"2009-11-15T14:12:13\",
\"message\" : \"crying out Elasticsearch\",
\"number\" : \"${number}\"
}
"
done
sleep 5
curl -XGET -ks "${url}/${index}/_search" | jq .hits.total
curl -XGET -ks "${url}/${index}/_search?q=number:44"
sleep 5
echo "segments: "
curl -XGET -ks "${url}/_cat/segments/${index}"
sleep 5
# requires bc installed (dc is the rpn version of bc)
shard_count=$(dc <<< "$(curl -XGET -ks "${url}/_cat/indices/${index}?h=pri,rep") + p")
echo "shard_count: ${shard_count}"
sleep 1
for string in ${strings_to_delete[*]}; do
echo "doing the delete"
sleep 5
# TODO: try this with ${delete_query_body}
curl -XPOST -ks -H 'Content-Type: application/json' "${url}/${index}/_delete_by_query?refresh&slices=${shard_count}&conflicts=proceed&wait_for_completion=true" -d"{\"query\":{\"match\":{\"message\":\"${string}\"}}}"
sleep 2
echo "segments: "
curl -XGET -ks "${url}/_cat/segments/${index}"
echo "doing the merge"
sleep 5
curl -XPOST -ks "${url}/${index}/_forcemerge?only_expunge_deletes=true"
sleep 2
echo "segments: "
curl -XGET -ks "${url}/_cat/segments/${index}"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment