Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
delete documents by query result for Amazon CloudSearch.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import urllib
import urllib2
import json
# you need to set your domain endpoints.
SEARCH_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com"
DOCUMENT_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com"
API_VERSION = "2013-01-01"
def searchDocuments(queryParams):
query = urllib.urlencode(queryParams)
url = "http://" + SEARCH_ENDPOINT + "/" + API_VERSION + "/search"
# send query
result = urllib2.urlopen(url, query)
if(result.code == 200):
body = result.read()
return body
else:
raise Exception("Error occured while sending search query. Response Code:" + result.code)
def parseIdListFromBody(data):
jsondata = json.loads(data)
docList = jsondata["hits"]["hit"]
idList = []
for doc in docList:
idList.append(doc["id"])
return idList
def createSDFforDelete(idList):
dataList = []
for i in idList:
data = {'type':'delete','id': i}
dataList.append(data)
return json.dumps(dataList)
def sendSDF(sdf):
url = "http://" + DOCUMENT_ENDPOINT + "/" + API_VERSION + "/documents/batch"
request = urllib2.Request(url)
request.add_header("Content-Type", "application/json")
request.add_data(sdf)
# send query
print urllib2.urlopen(request).read()
if __name__ == '__main__':
# build search query.
query = "*:*"
queryParser = "lucene"
size = "1000"
returnFieldName = "title"
queryParams = {"q" : query, "q.parser" : queryParser, "size" : size, "return" : returnFieldName}
data = searchDocuments(queryParams)
idList = parseIdListFromBody(data)
sdf = createSDFforDelete(idList)
sendSDF(sdf)
Owner

yoshi0309 commented May 27, 2014

you need to set "DOCUMENT_ENDPOINT", "SEARCH_ENDPOINT" and search parameters befor you run.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment