Last active
September 4, 2019 11:35
-
-
Save glallen01/ebd4ee2f6d92c0a4ec6f0c4e93d66e43 to your computer and use it in GitHub Desktop.
delete by query test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.3.1 | |
strings_to_delete=( | |
"pktdatEnc=" | |
"Interface statistics report" | |
"crying" | |
) | |
IFS=$'\n' # dont' break strings_to_delete on words, only newlines (default IFS=$' \t\n') | |
# TODO - wrap into one query | |
delete_query_body='{"query":{"bool":{"must":[{"bool":{"should":[' | |
for string in ${strings_to_delete[@]}; do | |
delete_query_body+="{\"match\": { \"message\": \"${string}\" }}," | |
done | |
# delete the extra comma | |
delete_query_body=${delete_query_body%?} | |
delete_query_body+='],"minimum_number_should_match": 1}}]}}}' | |
echo ${delete_query_body} | |
url="http://localhost:9200" | |
index=twitter | |
curl -XGET "${url}/" | |
sleep 5 | |
# TODO - for testing only | |
curl -XDELETE "${url}/${index}" | |
# put sample data: | |
for number in $(seq -w 00 99); do | |
curl -X POST "${url}/${index}/tweet/?pretty" -H 'Content-Type: application/json' -d" | |
{ | |
\"user\" : \"kimchy\", | |
\"post_date\" : \"2009-11-15T14:12:12\", | |
\"message\" : \"trying out Elasticsearch\", | |
\"number\" : \"${number}\" | |
} | |
" | |
curl -X POST "${url}/${index}/tweet/?pretty" -H 'Content-Type: application/json' -d" | |
{ | |
\"user\" : \"kimchi\", | |
\"post_date\" : \"2009-11-15T14:12:13\", | |
\"message\" : \"crying out Elasticsearch\", | |
\"number\" : \"${number}\" | |
} | |
" | |
done | |
sleep 5 | |
curl -XGET -ks "${url}/${index}/_search" | jq .hits.total | |
curl -XGET -ks "${url}/${index}/_search?q=number:44" | |
sleep 5 | |
echo "segments: " | |
curl -XGET -ks "${url}/_cat/segments/${index}" | |
sleep 5 | |
# requires bc installed (dc is the rpn version of bc) | |
shard_count=$(dc <<< "$(curl -XGET -ks "${url}/_cat/indices/${index}?h=pri,rep") + p") | |
echo "shard_count: ${shard_count}" | |
sleep 1 | |
for string in ${strings_to_delete[*]}; do | |
echo "doing the delete" | |
sleep 5 | |
# TODO: try this with ${delete_query_body} | |
curl -XPOST -ks -H 'Content-Type: application/json' "${url}/${index}/_delete_by_query?refresh&slices=${shard_count}&conflicts=proceed&wait_for_completion=true" -d"{\"query\":{\"match\":{\"message\":\"${string}\"}}}" | |
sleep 2 | |
echo "segments: " | |
curl -XGET -ks "${url}/_cat/segments/${index}" | |
echo "doing the merge" | |
sleep 5 | |
curl -XPOST -ks "${url}/${index}/_forcemerge?only_expunge_deletes=true" | |
sleep 2 | |
echo "segments: " | |
curl -XGET -ks "${url}/_cat/segments/${index}" | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment