Skip to content

Instantly share code, notes, and snippets.

@thanakijwanavit
Created April 27, 2020 04:55
Show Gist options
  • Save thanakijwanavit/a22c55fe2ca9bcee2f77adb97c5761f2 to your computer and use it in GitHub Desktop.
Save thanakijwanavit/a22c55fe2ca9bcee2f77adb97c5761f2 to your computer and use it in GitHub Desktop.
# from avail.aws.cloudsearch import get_cloudsearch_endpoint_from_settings
import time
import json
def splitList(l,n):
x = [l[i:i + n] for i in range(0, len(l), n)]
return x
# get all items from dynamodb
import boto3
from boto3.dynamodb.conditions import Key, Attr
dynamodb = boto3.resource('dynamodb', aws_access_key_id=MY_ACCESS_KEY_ID, aws_secret_access_key=MY_SECRET_ACCESS_KEY, region_name='ap-southeast-1')
table = dynamodb.Table(TABLE_NAME)
response = table.scan()
items = response['Items']
while 'LastEvaluatedKey' in response:
print(response['LastEvaluatedKey'])
response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
items.extend(response['Items']
# settings = {
# "aws.cloudsearch.doc":
# "<doc-ep>",
# "aws.cloudsearch.search":
# "<search-ep>"
# }
# doc = get_cloudsearch_endpoint_from_settings("doc", settings)
# search = get_cloudsearch_endpoint_from_settings("search", settings)
def uploadCloudsearch(datas):
# doc = get_cloudsearch_endpoint_from_settings()
queries = []
for data in datas:
query = {
"id": data.get('pr_code'),
"type": "add",
'fields': data
}
queries.append(query)
# print(queries)
doclist = json.dumps(queries)
print(sys.getsizeof(doclist))
return docs.upload_documents(documents=doclist, contentType="application/json")
# doc.upload_documents(documents=doclist, contentType="application/json")
for itemBatch in splitList(items, 10000):
print(len(itemBatch))
time.sleep(11)
print(uploadCloudsearch(itemBatch))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment