Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
delete documents by query result for Amazon CloudSearch.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import urllib
import urllib2
import json
# you need to set your domain endpoints.
SEARCH_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com"
DOCUMENT_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com"
API_VERSION = "2013-01-01"
def searchDocuments(queryParams):
query = urllib.urlencode(queryParams)
url = "http://" + SEARCH_ENDPOINT + "/" + API_VERSION + "/search"
# send query
result = urllib2.urlopen(url, query)
if(result.code == 200):
body = result.read()
return body
else:
raise Exception("Error occured while sending search query. Response Code:" + result.code)
def parseIdListFromBody(data):
jsondata = json.loads(data)
docList = jsondata["hits"]["hit"]
idList = []
for doc in docList:
idList.append(doc["id"])
return idList
def createSDFforDelete(idList):
dataList = []
for i in idList:
data = {'type':'delete','id': i}
dataList.append(data)
return json.dumps(dataList)
def sendSDF(sdf):
url = "http://" + DOCUMENT_ENDPOINT + "/" + API_VERSION + "/documents/batch"
request = urllib2.Request(url)
request.add_header("Content-Type", "application/json")
request.add_data(sdf)
# send query
print urllib2.urlopen(request).read()
if __name__ == '__main__':
# build search query.
query = "*:*"
queryParser = "lucene"
size = "1000"
returnFieldName = "title"
queryParams = {"q" : query, "q.parser" : queryParser, "size" : size, "return" : returnFieldName}
data = searchDocuments(queryParams)
idList = parseIdListFromBody(data)
sdf = createSDFforDelete(idList)
sendSDF(sdf)
@yoshi0309

This comment has been minimized.

Copy link
Owner Author

commented May 27, 2014

you need to set "DOCUMENT_ENDPOINT", "SEARCH_ENDPOINT" and search parameters befor you run.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.