Skip to content

Instantly share code, notes, and snippets.

@okiriza
Last active December 30, 2016 23:19
Show Gist options
  • Save okiriza/858fbcfaba8b154fb7c12a9fad45a262 to your computer and use it in GitHub Desktop.
Save okiriza/858fbcfaba8b154fb7c12a9fad45a262 to your computer and use it in GitHub Desktop.
from __future__ import print_function
import os
import time
import boto3
from boto3 import dynamodb
import cv2
import numpy as np
import scipy.misc
DB = boto3.resource('dynamodb', region_name='ap-southeast-1')
TABLE = DB.Table('cctv-jak')
LOC_COL = dynamodb.conditions.Key('loc')
TIMESTAMP_COL = dynamodb.conditions.Key('timestamp')
def download_images(loc, min_ts=None, max_ts=None, item_limit=None,
out_path=None, sleep=None, verbose=0, raise_img_error=False):
projection_expr = 'img_bytes, #time'
expr_attr_names = {'#time': 'timestamp'}
if verbose >= 1:
print('Downloading images for loc {}'.format(loc), end='')
if min_ts == 'checkpoint':
assert out_path
min_ts = _get_latest_image_time(os.path.join(out_path, loc))
if min_ts:
min_ts += ' ' # add space to sort key to avoid re-downloading last item
loc_cond = LOC_COL.eq(loc)
time_cond = _get_time_cond(min_ts, max_ts, verbose=verbose)
results = []
while True:
cond = dynamodb.conditions.And(loc_cond, time_cond) if time_cond else loc_cond
response = TABLE.query(ProjectionExpression=projection_expr,
ExpressionAttributeNames=expr_attr_names,
KeyConditionExpression=cond)
results.extend(response['Items'])
last_ts = response.get('LastEvaluatedKey')
if last_ts:
last_ts = last_ts['timestamp']
if (verbose >= 2) and response['Items']:
print(' Downloaded {} images, ending at {}'.format(len(response['Items']), last_ts))
for item in response['Items']:
item['loc'] = loc
if out_path:
_save_img(item['img_bytes'], loc, item['timestamp'], out_path, raise_img_error=raise_img_error)
if item_limit and (len(results) >= item_limit):
if verbose >= 1:
print('Exceeded downloaded images limit of {}, exiting...'.format(item_limit))
break
if last_ts:
# add space to sort key to avoid re-downloading last item
time_cond = _get_time_cond(last_ts + ' ', max_ts)
else:
break
if sleep:
time.sleep(sleep)
if verbose >= 1:
print('Finished downloading, total images: {}'.format(len(results)))
return results
def _get_latest_image_time(dir_path):
if not os.path.isdir(dir_path):
return None
paths = [_ for _ in os.listdir(dir_path) if _.endswith('.jpg')]
if not paths:
return None
return max(paths)[:-4].split('_')[1].replace('.', ':')
def _get_time_cond(min_ts, max_ts, verbose=0):
if min_ts and max_ts:
time_cond = TIMESTAMP_COL.between(min_ts, max_ts)
if verbose >= 1:
print(' from {} to {}'.format(min_ts, max_ts))
elif min_ts:
time_cond = TIMESTAMP_COL.gte(min_ts)
if verbose >= 1:
print(' from {}'.format(min_ts))
elif max_ts:
time_cond = TIMESTAMP_COL.lte(max_ts)
if verbose >= 1:
print(' up to {}'.format(max_ts))
else:
print('')
time_cond = None
return time_cond
def _save_img(img_bytes, loc, timestamp, out_path, raise_img_error=False):
loc_path = os.path.join(out_path, loc)
if not os.path.isdir(loc_path):
os.makedirs(loc_path)
fname = '{}_{}.jpg'.format(loc, timestamp).replace(':', '.')
fpath = os.path.join(loc_path, fname)
try:
img_bytes = img_bytes.encode('ISO-8859-1')
img_array = cv2.imdecode(np.fromstring(img_bytes, dtype=np.uint8), cv2.CV_LOAD_IMAGE_COLOR)
scipy.misc.imsave(fpath, img_array)
except ValueError:
# Something wrong when handling the image
if raise_img_error:
raise
if verbose >= 1:
print('Failed to save image for {} {}'.format(loc, item['timestamp']))
if __name__ == '__main__':
loc = 'Pondok Indah 1 S'
min_ts = 'checkpoint'
max_ts = None
sleep = 1
verbose = 2
out_path = 'data/cctv'
download_images(loc, min_ts=min_ts, max_ts=max_ts, out_path=out_path, sleep=sleep, verbose=verbose)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment