Created
October 31, 2014 14:37
-
-
Save nanvel/4f7696174ac3a9b3554c to your computer and use it in GitHub Desktop.
Spike project with Amazon CloudSearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Search bebop series. | |
""" | |
import arrow | |
import json | |
from tornado import options | |
from tornado.httpclient import HTTPError, HTTPClient, HTTPRequest | |
from tornado_botocore import Botocore | |
from tvs import TVS | |
DOMAIN_NAME = 'test-bebop-domain' | |
API_VERSION = '2013-01-01' | |
if __name__ == '__main__': | |
options.parse_command_line() | |
# create domain | |
cs_create_domain = Botocore( | |
service='cloudsearch', operation='CreateDomain', | |
region_name='us-west-2') | |
session = cs_create_domain.session | |
""" | |
try: | |
# create domain, domain will be reused if already exists | |
print cs_create_domain.call(domain_name=DOMAIN_NAME) | |
# { | |
# "DomainStatus":{ | |
# "DomainId":"240020657974/test-bebop-domain", | |
# "Created":true, | |
# "SearchService":{}, | |
# "SearchInstanceCount":0, | |
# "DomainName":"test-bebop-domain", | |
# "DocService":{}, | |
# "Deleted":false, | |
# "Processing":false, | |
# "RequiresIndexDocuments":false, | |
# "ARN":"arn:aws:cloudsearch:us-west-2:240020657974:domain/test-bebop-domain", | |
# "SearchPartitionCount":0 | |
# }, | |
# "ResponseMetadata":{ | |
# "RequestId":"38b0cba7-60f2-11e4-980e-6d6976ea3108" | |
# } | |
# } | |
except HTTPError as e: | |
print e.response.body | |
# configure fields | |
cs_define_index_field = Botocore( | |
service='cloudsearch', operation='DefineIndexField', | |
region_name='us-west-2', session=session) | |
# Fields: | |
# - title - text + show in result | |
# - airdate - uint | |
# - genre - literal + facet enabled (or literal-array?) | |
# - content - text | |
FIELDS = [{ | |
'DomainName': DOMAIN_NAME, | |
'IndexField': { | |
'IndexFieldName': 'title', | |
'IndexFieldType': 'text', | |
'TextOptions': { | |
'HighlightEnabled': False, | |
'DefaultValue': 'untitled', | |
'ReturnEnabled': True, | |
} | |
} | |
}, { | |
'DomainName': DOMAIN_NAME, | |
'IndexField': { | |
'IndexFieldName': 'content', | |
'IndexFieldType': 'text', | |
'TextOptions': { | |
'HighlightEnabled': False, | |
'DefaultValue': '', | |
'ReturnEnabled': False, | |
} | |
} | |
}, { | |
'DomainName': DOMAIN_NAME, | |
'IndexField': { | |
'IndexFieldName': 'airdate', | |
'IndexFieldType': 'int', | |
'IntOptions': { | |
'DefaultValue': 946684800, | |
} | |
} | |
}, { | |
'DomainName': DOMAIN_NAME, | |
'IndexField': { | |
'IndexFieldName': 'genre', | |
'IndexFieldType': 'literal-array', | |
'LiteralArrayOptions': { | |
'DefaultValue': '', | |
'FacetEnabled': True, | |
'ReturnEnabled': False, | |
'SearchEnabled': True, | |
} | |
} | |
}] | |
try: | |
for params in FIELDS: | |
print cs_define_index_field.call(**params) | |
except HTTPError as e: | |
print e.response.body | |
# add data | |
""" | |
batch = [] | |
for tv in TVS: | |
batch.append({ | |
'type': 'add', 'id': tv['number'], | |
'fields': { | |
'title': tv['title'], | |
'content': tv['content'], | |
'airdate': arrow.get(tv['airdate'], ['YYYY-MM-DD', 'MMMM D, YYYY']).timestamp, | |
'genre': tv['genre'], | |
} | |
}) | |
# get document and search endpoints | |
cs_describe_domains = Botocore( | |
service='cloudsearch', operation='DescribeDomains', | |
region_name='us-west-2', session=session) | |
response = cs_describe_domains.call(domain_names=[DOMAIN_NAME]) | |
# { | |
# "DomainStatusList":[ | |
# { | |
# "DomainId":"240020657974/test-bebop-domain", | |
# "Created":true, | |
# "SearchService":{ | |
# "Endpoint":"search-test-bebop-domain-kmvxd5zzot4opij6zvb6okvrma.us-west-2.cloudsearch.amazonaws.com" | |
# }, | |
# "SearchInstanceCount":1, | |
# "DomainName":"test-bebop-domain", | |
# "DocService":{ | |
# "Endpoint":"doc-test-bebop-domain-kmvxd5zzot4opij6zvb6okvrma.us-west-2.cloudsearch.amazonaws.com" | |
# }, | |
# "SearchInstanceType":"search.m1.small", | |
# "Deleted":false, | |
# "Processing":false, | |
# "RequiresIndexDocuments":true, | |
# "ARN":"arn:aws:cloudsearch:us-west-2:240020657974:domain/test-bebop-domain", | |
# "SearchPartitionCount":1 | |
# } | |
# ], | |
# "ResponseMetadata":{ | |
# "RequestId":"7993ac9b-6101-11e4-8510-8ffcccb94f21" | |
# } | |
# } | |
search_endpoint = response['DomainStatusList'][0]['SearchService']['Endpoint'] | |
document_endpoint = response['DomainStatusList'][0]['DocService']['Endpoint'] | |
httpclient = HTTPClient() | |
# reindex | |
""" | |
cs_index_documents = Botocore( | |
service='cloudsearch', operation='IndexDocuments', | |
region_name='us-west-2', session=session) | |
print cs_index_documents.call(domain_name=DOMAIN_NAME) | |
# add documents | |
url = 'http://{document_endpoint}/{api_version}/documents/batch'.format( | |
document_endpoint=document_endpoint, | |
api_version=API_VERSION) | |
try: | |
request = HTTPRequest( | |
url=url, body=json.dumps(batch), | |
headers={'Content-Type': 'application/json'}, method='POST') | |
request.params = None | |
cs_describe_domains.endpoint.auth.add_auth(request=request) | |
response = httpclient.fetch(request=request) | |
print response.body | |
except HTTPError as e: | |
print e.response.body | |
""" | |
# search | |
url = 'http://{search_endpoint}/{api_version}/search?q=bebop'.format( | |
search_endpoint=search_endpoint, api_version=API_VERSION) | |
request = HTTPRequest( | |
url=url, headers={'Content-Type': 'application/json'}, | |
method='GET') | |
request.params = None | |
cs_describe_domains.endpoint.auth.add_auth(request=request) | |
response = httpclient.fetch(request=request) | |
print response.body | |
# { | |
# "status":{ | |
# "rid":"st/UtJYpAAoghec=", | |
# "time-ms":82 | |
# }, | |
# "hits":{ | |
# "found":12, | |
# "start":0, | |
# "hit":[ | |
# { | |
# "id":"3", | |
# "fields":{ | |
# "airdate":"910396800", | |
# "title":"Honky Tonk Women" | |
# } | |
# }, | |
# { | |
# "id":"18", | |
# "fields":{ | |
# "airdate":"920073600", | |
# "title":"Speak Like a Child" | |
# } | |
# }, | |
# ... | |
# ] | |
# } | |
# } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment