Last active
March 19, 2019 02:20
-
-
Save yokawasa/42c227f2e32a5b4d1ceabd01887a7bb5 to your computer and use it in GitHub Desktop.
Private: RNN input data generation script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import http.client, urllib.parse, base64, json | |
import requests | |
import argparse | |
import configparser | |
import logging | |
import os,sys | |
import gensim.downloader as api | |
from gensim.models import Word2Vec | |
import numpy as np | |
import azure.cosmos.cosmos_client as cosmos_client | |
_DEFAULT_AS_LOG_LEVEL = 'INFO' | |
_DEFAULT_AS_LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s' | |
_DEFAULT_AS_CONFIG_LOG_FILE = '~/autoss.log' | |
_DEFAULT_AS_CONFIG_LOG_LEVEL = 'INFO' | |
logger = logging.getLogger('autoss') | |
np.seterr(divide='ignore', invalid='ignore') | |
def AS_LOG(s): | |
sys.stdout.write("{}\n".format(s)) | |
logger.info(s) | |
def AS_ERR(s): | |
sys.stderr.write("{}\n".format(s)) | |
logger.error(s) | |
def init_logger(name, log_file, | |
log_level = _DEFAULT_AS_LOG_LEVEL, | |
log_format = _DEFAULT_AS_LOG_FORMAT): | |
logger = logging.getLogger(name) | |
handler = logging.FileHandler(os.path.expanduser(log_file)) | |
formatter = logging.Formatter(log_format) | |
handler.setFormatter(formatter) | |
logger.addHandler(handler) | |
switcher = { | |
'CRITICAL': logging.CRITICAL, | |
'ERROR': logging.ERROR, | |
'WARNING': logging.WARNING, | |
'INFO': logging.INFO, | |
'DEBUG': logging.DEBUG | |
} | |
logger.setLevel(switcher.get(log_level.upper(),logging.INFO)) | |
class asConfig(object): | |
def __init__(self, config_file=None): | |
self._config_file = config_file | |
self._config_parser = configparser.SafeConfigParser() | |
if self._config_file: | |
self._config_parser.read(config_file) | |
def get_string(self, section, key, default_val=None): | |
v ='' | |
if not self._config_file: | |
return default_val | |
try: | |
v = self._config_parser.get(section, key) | |
except configparser.NoOptionError as e: | |
v = default_val | |
return v | |
@property | |
def compvision_subkey(self): | |
return self.get_string('autoss','azure-compvision-subkey') | |
@property | |
def log_file(self): | |
return self.get_string('autoss','log_file',_DEFAULT_AS_CONFIG_LOG_FILE) | |
@property | |
def log_level(self): | |
return self.get_string('autoss','log_level',_DEFAULT_AS_CONFIG_LOG_LEVEL) | |
class asCosmosClient(object): | |
def __init__(self, config): | |
self._config = config | |
# Initialize the Cosmos client | |
self._client = cosmos_client.CosmosClient(url_connection=config['ENDPOINT'], auth={ | |
'masterKey': config['PRIMARYKEY']}) | |
#self._options = { | |
# 'offerThroughput': 400 | |
#} | |
#self._container_definition = { | |
# 'id': self._config['CONTAINER'] | |
#} | |
self._collection_link = "/dbs/{}/colls/{}".format( | |
self._config['DATABASE'], self._config['CONTAINER']) | |
def store_document(self, document): | |
_newdoc = None | |
try: | |
_newdoc = self._client.CreateItem(self._collection_link,document) | |
except Exception as e: | |
AS_ERR("Cosmos DB store_document error:{}".format(e)) | |
return _newdoc | |
def get_document(self, video_name, camera_no, frame_no): | |
options = {} | |
options['enableCrossPartitionQuery'] = True | |
options['maxItemCount'] = 1 | |
partition_key=None | |
query = {'query': "SELECT * FROM s WHERE s.video='{}' and s.camera_no='{}' and s.frame_no='{}'".format(video_name, camera_no, frame_no)} | |
try: | |
docs_iterable = self._client.QueryItems(self._collection_link, query, options, partition_key) | |
if len(list(docs_iterable)) > 0: | |
return docs_iterable.fetch_next_block()[0] | |
except Exception as e: | |
AS_ERR("Cosmos DB get_document error ({} {} {}):{}".format(video_name, camera_no, frame_no, e)) | |
return None | |
import glob | |
def glob_image_files(mypath): | |
return glob.glob('{}/*.jpg'.format(mypath)) | |
def get_tags(subscription_key, image_file): | |
# Request headers | |
headers = { | |
'Content-Type': 'application/octet-stream', | |
'Ocp-Apim-Subscription-Key': subscription_key, | |
} | |
# Request parameters | |
params = urllib.parse.urlencode({ | |
# All of them are optional | |
'visualFeatures': 'Tags', | |
'language': 'en', | |
}) | |
tagsets = [] | |
try: | |
with open(image_file, 'rb') as f: | |
img_data = f.read() | |
api_url = "https://westus.api.cognitive.microsoft.com/vision/v1.0/analyze?%s" % params | |
#AS_LOG("API URL:{}".format(api_url)) | |
r = requests.post(api_url, | |
headers=headers, | |
data=img_data) | |
raw_parsed = r.json() | |
#tags = [d.get('name') for d in tagsets] | |
## [Data Structure] | |
## raw_parsed['tags'] = [ | |
## {"name": "tagname1", "confidence":0.9999}, | |
## {"name": "tagname2", "confidence":0.9999}, | |
## ...] | |
tagsets = raw_parsed['tags'] | |
except Exception as e: | |
AS_ERR("Error:{}".format(e)) | |
return tagsets | |
def main(): | |
parser = argparse.ArgumentParser(description='This script is ...') | |
parser.add_argument('--config', help='config file (default autoss.conf)') | |
parser.add_argument('-n','--name', help='video name') | |
parser.add_argument('-c','--camerano', help='video name') | |
parser.add_argument('-d','--framedir', help='video frame images directory path') | |
args = parser.parse_args() | |
if not args.name or not args.camerano or not args.framedir: | |
print(parser.parse_args(['-h'])) | |
sys.exit(1) | |
video_name = args.name | |
camera_no = args.camerano | |
frame_dir = args.framedir | |
config_file = "autoss.conf" | |
if args.config: | |
config_file = args.config | |
config = asConfig(config_file) | |
init_logger('autoss', config.log_file, config.log_level) | |
subscription_key = config.compvision_subkey | |
word2vec_model_file ="{}/{}".format(os.getcwd(),'text8.model') | |
AS_LOG("Reading word2vec_model_file:{}".format(word2vec_model_file)) | |
model = Word2Vec.load(word2vec_model_file) | |
AS_LOG("Reading subscription_key:{}".format(subscription_key)) | |
cosmos_config_vectors = { | |
'ENDPOINT': config.get_string("autoss", "cosmos_endpoint_vectors"), | |
'PRIMARYKEY': config.get_string("autoss","cosmos_primarykey_vectors"), | |
'DATABASE': config.get_string("autoss","cosmos_db_vectors"), | |
'CONTAINER': config.get_string("autoss","cosmos_col_vectors") | |
} | |
cosmos_config_rnninput = { | |
'ENDPOINT': config.get_string("autoss","cosmos_endpoint_rnninput"), | |
'PRIMARYKEY': config.get_string("autoss","cosmos_primarykey_rnninput"), | |
'DATABASE': config.get_string("autoss","cosmos_db_rnninput"), | |
'CONTAINER': config.get_string("autoss","cosmos_col_rnninput") | |
} | |
cosmos_vectors_client = asCosmosClient(cosmos_config_vectors) | |
cosmos_rnninput_client = asCosmosClient(cosmos_config_rnninput) | |
## Iterate image files | |
image_files = glob_image_files(frame_dir) | |
frames = {} | |
frame_no_list=[] | |
## Populate verified frames | |
for image_file in image_files: | |
t = image_file.split('/') | |
imagefile_no_ext = t[-1].replace('.jpg', '') | |
c = imagefile_no_ext.split('_') | |
if len(c)!=3: | |
AS_ERR(f"Invalid file name format: {image_file}! " | |
f"Image name should be <NAME>_<CAMERANO>_<FRAMENO>.<ext>") | |
continue | |
_video_name=c[0] | |
_camera_no=int(c[1]) | |
_frame_no=int(c[2]) | |
if _video_name != video_name or _camera_no != int(camera_no): | |
AS_ERR(f"Invalid file name format: {image_file}!") | |
continue | |
frames[_frame_no] = image_file | |
frame_no_list.append(_frame_no) | |
AS_LOG("frame count={}".format(len(frame_no_list))) | |
## Sort frame_no_list | |
frame_no_list.sort() | |
## Iterate frames and do the following: | |
## - Get tags for each frame via Computer Vision API | |
## - Get vector for each tag using Word2Vec | |
## - Get sum of tag vectors | |
for frame_no in frame_no_list: | |
# continue # Only for debug | |
image_file = frames[frame_no] | |
AS_LOG("Processing frame: {}".format(image_file)) | |
## Get tags via Computer Vision service | |
tagsets = get_tags(subscription_key, image_file) | |
vectors_outdata = {} | |
vectors_outdata['id'] = "{}_{}_{}".format(video_name,camera_no,frame_no) | |
vectors_outdata['video'] = video_name | |
vectors_outdata['camera_no'] = camera_no | |
vectors_outdata['frame_no'] = str(frame_no) | |
## Get Vectors for each tags | |
vectors = {} | |
sumvector = np.zeros(100) | |
output_tagsets=[] | |
for tagset in tagsets: | |
tag = tagset.get('name') | |
# Check if tag work exists in the model | |
try: | |
v = model[tag] | |
except KeyError: | |
logging.error("Skip:{} is not the model".format(tag)) | |
continue | |
tagvector = np.array(list(v)) | |
sumvector = sumvector + tagvector | |
vectors[tag] = tagvector.tolist() | |
output_tagsets.append(tagset) | |
# logging.info("tag->{}".format(tag)) | |
# logging.info(vectors[tag]) | |
vectors_outdata['vectors'] = vectors | |
vectors_outdata['sumvector'] = sumvector.tolist() | |
vectors_outdata['tags'] = output_tagsets | |
#print(json.dumps(vectors_outdata)) | |
## Store in CosmosDB (db:vectors) | |
cosmos_vectors_client.store_document(vectors_outdata) | |
## Iterate frames and do the following: | |
for frame_no in frame_no_list: | |
AS_LOG("Processing vector computations for frame: {}".format(frame_no)) | |
# Check if there is next scene frame | |
next_frame_no = frame_no + 1 | |
if next_frame_no > len(frame_no_list): | |
break | |
## Get current frame and next frame document from Cosmos DB | |
cur_doc = cosmos_vectors_client.get_document( | |
video_name, camera_no, str(frame_no)) | |
next_doc = cosmos_vectors_client.get_document( | |
video_name, camera_no, str(next_frame_no)) | |
if not cur_doc: | |
AS_ERR("Error missing frame: {} {} {}! So skip".format(video_name, camera_no,frame_no)) | |
continue | |
if not next_doc: | |
AS_ERR("Error missing frame: {} {} {}! So skip".format(video_name, camera_no,next_frame_no)) | |
continue | |
cur_sumvector = cur_doc["sumvector"] | |
next_sumvector = next_doc["sumvector"] | |
## REQ2: Get diff vector | |
## diffvector(T) = Xpgm,C1,C2(T+1) - Xpgm,C1,C2(T) | |
diffvector = np.zeros(100) | |
try: | |
diffvector = np.array(next_sumvector) - np.array(cur_sumvector) | |
except Exception as e: | |
AS_ERR("Errors in diff vector computation: {}".format(e)) | |
## REQ3: Get norm vector of diff vector | |
## normvector(T) = norm (diff_vector(T)) | |
normvector = np.zeros(100) | |
try: | |
if not (diffvector == np.zeros(100)).all(): | |
normvector = ( diffvector / np.linalg.norm(diffvector) ).all() | |
except Exception as e: | |
AS_ERR("Errors in norm vector computation: {}".format(e)) | |
## REQ4: Counter | |
## NOT IMPLEMENTED AT THIS POINT | |
rnninput_outdata = {} | |
rnninput_outdata['id'] = "{}_{}_{}".format(video_name,camera_no,frame_no) | |
rnninput_outdata['video'] = video_name | |
rnninput_outdata['camera_no'] = camera_no | |
rnninput_outdata['frame_no'] = str(frame_no) | |
rnninput_outdata['sumvector'] = cur_sumvector | |
rnninput_outdata['diffvector'] = diffvector.tolist() | |
rnninput_outdata['normvector'] = normvector.tolist() | |
## Store in CosmosDB (db:rnninput) | |
cosmos_rnninput_client.store_document(rnninput_outdata) | |
AS_LOG("Done!") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment