Skip to content

Instantly share code, notes, and snippets.

@yokawasa
Last active March 19, 2019 02:20
Show Gist options
  • Save yokawasa/42c227f2e32a5b4d1ceabd01887a7bb5 to your computer and use it in GitHub Desktop.
Save yokawasa/42c227f2e32a5b4d1ceabd01887a7bb5 to your computer and use it in GitHub Desktop.
Private: RNN input data generation script
#!/usr/bin/env python3
import http.client, urllib.parse, base64, json
import requests
import argparse
import configparser
import logging
import os,sys
import gensim.downloader as api
from gensim.models import Word2Vec
import numpy as np
import azure.cosmos.cosmos_client as cosmos_client
_DEFAULT_AS_LOG_LEVEL = 'INFO'
_DEFAULT_AS_LOG_FORMAT = '%(asctime)s %(levelname)s %(message)s'
_DEFAULT_AS_CONFIG_LOG_FILE = '~/autoss.log'
_DEFAULT_AS_CONFIG_LOG_LEVEL = 'INFO'
logger = logging.getLogger('autoss')
np.seterr(divide='ignore', invalid='ignore')
def AS_LOG(s):
sys.stdout.write("{}\n".format(s))
logger.info(s)
def AS_ERR(s):
sys.stderr.write("{}\n".format(s))
logger.error(s)
def init_logger(name, log_file,
log_level = _DEFAULT_AS_LOG_LEVEL,
log_format = _DEFAULT_AS_LOG_FORMAT):
logger = logging.getLogger(name)
handler = logging.FileHandler(os.path.expanduser(log_file))
formatter = logging.Formatter(log_format)
handler.setFormatter(formatter)
logger.addHandler(handler)
switcher = {
'CRITICAL': logging.CRITICAL,
'ERROR': logging.ERROR,
'WARNING': logging.WARNING,
'INFO': logging.INFO,
'DEBUG': logging.DEBUG
}
logger.setLevel(switcher.get(log_level.upper(),logging.INFO))
class asConfig(object):
def __init__(self, config_file=None):
self._config_file = config_file
self._config_parser = configparser.SafeConfigParser()
if self._config_file:
self._config_parser.read(config_file)
def get_string(self, section, key, default_val=None):
v =''
if not self._config_file:
return default_val
try:
v = self._config_parser.get(section, key)
except configparser.NoOptionError as e:
v = default_val
return v
@property
def compvision_subkey(self):
return self.get_string('autoss','azure-compvision-subkey')
@property
def log_file(self):
return self.get_string('autoss','log_file',_DEFAULT_AS_CONFIG_LOG_FILE)
@property
def log_level(self):
return self.get_string('autoss','log_level',_DEFAULT_AS_CONFIG_LOG_LEVEL)
class asCosmosClient(object):
def __init__(self, config):
self._config = config
# Initialize the Cosmos client
self._client = cosmos_client.CosmosClient(url_connection=config['ENDPOINT'], auth={
'masterKey': config['PRIMARYKEY']})
#self._options = {
# 'offerThroughput': 400
#}
#self._container_definition = {
# 'id': self._config['CONTAINER']
#}
self._collection_link = "/dbs/{}/colls/{}".format(
self._config['DATABASE'], self._config['CONTAINER'])
def store_document(self, document):
_newdoc = None
try:
_newdoc = self._client.CreateItem(self._collection_link,document)
except Exception as e:
AS_ERR("Cosmos DB store_document error:{}".format(e))
return _newdoc
def get_document(self, video_name, camera_no, frame_no):
options = {}
options['enableCrossPartitionQuery'] = True
options['maxItemCount'] = 1
partition_key=None
query = {'query': "SELECT * FROM s WHERE s.video='{}' and s.camera_no='{}' and s.frame_no='{}'".format(video_name, camera_no, frame_no)}
try:
docs_iterable = self._client.QueryItems(self._collection_link, query, options, partition_key)
if len(list(docs_iterable)) > 0:
return docs_iterable.fetch_next_block()[0]
except Exception as e:
AS_ERR("Cosmos DB get_document error ({} {} {}):{}".format(video_name, camera_no, frame_no, e))
return None
import glob
def glob_image_files(mypath):
return glob.glob('{}/*.jpg'.format(mypath))
def get_tags(subscription_key, image_file):
# Request headers
headers = {
'Content-Type': 'application/octet-stream',
'Ocp-Apim-Subscription-Key': subscription_key,
}
# Request parameters
params = urllib.parse.urlencode({
# All of them are optional
'visualFeatures': 'Tags',
'language': 'en',
})
tagsets = []
try:
with open(image_file, 'rb') as f:
img_data = f.read()
api_url = "https://westus.api.cognitive.microsoft.com/vision/v1.0/analyze?%s" % params
#AS_LOG("API URL:{}".format(api_url))
r = requests.post(api_url,
headers=headers,
data=img_data)
raw_parsed = r.json()
#tags = [d.get('name') for d in tagsets]
## [Data Structure]
## raw_parsed['tags'] = [
## {"name": "tagname1", "confidence":0.9999},
## {"name": "tagname2", "confidence":0.9999},
## ...]
tagsets = raw_parsed['tags']
except Exception as e:
AS_ERR("Error:{}".format(e))
return tagsets
def main():
parser = argparse.ArgumentParser(description='This script is ...')
parser.add_argument('--config', help='config file (default autoss.conf)')
parser.add_argument('-n','--name', help='video name')
parser.add_argument('-c','--camerano', help='video name')
parser.add_argument('-d','--framedir', help='video frame images directory path')
args = parser.parse_args()
if not args.name or not args.camerano or not args.framedir:
print(parser.parse_args(['-h']))
sys.exit(1)
video_name = args.name
camera_no = args.camerano
frame_dir = args.framedir
config_file = "autoss.conf"
if args.config:
config_file = args.config
config = asConfig(config_file)
init_logger('autoss', config.log_file, config.log_level)
subscription_key = config.compvision_subkey
word2vec_model_file ="{}/{}".format(os.getcwd(),'text8.model')
AS_LOG("Reading word2vec_model_file:{}".format(word2vec_model_file))
model = Word2Vec.load(word2vec_model_file)
AS_LOG("Reading subscription_key:{}".format(subscription_key))
cosmos_config_vectors = {
'ENDPOINT': config.get_string("autoss", "cosmos_endpoint_vectors"),
'PRIMARYKEY': config.get_string("autoss","cosmos_primarykey_vectors"),
'DATABASE': config.get_string("autoss","cosmos_db_vectors"),
'CONTAINER': config.get_string("autoss","cosmos_col_vectors")
}
cosmos_config_rnninput = {
'ENDPOINT': config.get_string("autoss","cosmos_endpoint_rnninput"),
'PRIMARYKEY': config.get_string("autoss","cosmos_primarykey_rnninput"),
'DATABASE': config.get_string("autoss","cosmos_db_rnninput"),
'CONTAINER': config.get_string("autoss","cosmos_col_rnninput")
}
cosmos_vectors_client = asCosmosClient(cosmos_config_vectors)
cosmos_rnninput_client = asCosmosClient(cosmos_config_rnninput)
## Iterate image files
image_files = glob_image_files(frame_dir)
frames = {}
frame_no_list=[]
## Populate verified frames
for image_file in image_files:
t = image_file.split('/')
imagefile_no_ext = t[-1].replace('.jpg', '')
c = imagefile_no_ext.split('_')
if len(c)!=3:
AS_ERR(f"Invalid file name format: {image_file}! "
f"Image name should be <NAME>_<CAMERANO>_<FRAMENO>.<ext>")
continue
_video_name=c[0]
_camera_no=int(c[1])
_frame_no=int(c[2])
if _video_name != video_name or _camera_no != int(camera_no):
AS_ERR(f"Invalid file name format: {image_file}!")
continue
frames[_frame_no] = image_file
frame_no_list.append(_frame_no)
AS_LOG("frame count={}".format(len(frame_no_list)))
## Sort frame_no_list
frame_no_list.sort()
## Iterate frames and do the following:
## - Get tags for each frame via Computer Vision API
## - Get vector for each tag using Word2Vec
## - Get sum of tag vectors
for frame_no in frame_no_list:
# continue # Only for debug
image_file = frames[frame_no]
AS_LOG("Processing frame: {}".format(image_file))
## Get tags via Computer Vision service
tagsets = get_tags(subscription_key, image_file)
vectors_outdata = {}
vectors_outdata['id'] = "{}_{}_{}".format(video_name,camera_no,frame_no)
vectors_outdata['video'] = video_name
vectors_outdata['camera_no'] = camera_no
vectors_outdata['frame_no'] = str(frame_no)
## Get Vectors for each tags
vectors = {}
sumvector = np.zeros(100)
output_tagsets=[]
for tagset in tagsets:
tag = tagset.get('name')
# Check if tag work exists in the model
try:
v = model[tag]
except KeyError:
logging.error("Skip:{} is not the model".format(tag))
continue
tagvector = np.array(list(v))
sumvector = sumvector + tagvector
vectors[tag] = tagvector.tolist()
output_tagsets.append(tagset)
# logging.info("tag->{}".format(tag))
# logging.info(vectors[tag])
vectors_outdata['vectors'] = vectors
vectors_outdata['sumvector'] = sumvector.tolist()
vectors_outdata['tags'] = output_tagsets
#print(json.dumps(vectors_outdata))
## Store in CosmosDB (db:vectors)
cosmos_vectors_client.store_document(vectors_outdata)
## Iterate frames and do the following:
for frame_no in frame_no_list:
AS_LOG("Processing vector computations for frame: {}".format(frame_no))
# Check if there is next scene frame
next_frame_no = frame_no + 1
if next_frame_no > len(frame_no_list):
break
## Get current frame and next frame document from Cosmos DB
cur_doc = cosmos_vectors_client.get_document(
video_name, camera_no, str(frame_no))
next_doc = cosmos_vectors_client.get_document(
video_name, camera_no, str(next_frame_no))
if not cur_doc:
AS_ERR("Error missing frame: {} {} {}! So skip".format(video_name, camera_no,frame_no))
continue
if not next_doc:
AS_ERR("Error missing frame: {} {} {}! So skip".format(video_name, camera_no,next_frame_no))
continue
cur_sumvector = cur_doc["sumvector"]
next_sumvector = next_doc["sumvector"]
## REQ2: Get diff vector
## diffvector(T) = Xpgm,C1,C2(T+1) - Xpgm,C1,C2(T)
diffvector = np.zeros(100)
try:
diffvector = np.array(next_sumvector) - np.array(cur_sumvector)
except Exception as e:
AS_ERR("Errors in diff vector computation: {}".format(e))
## REQ3: Get norm vector of diff vector
## normvector(T) = norm (diff_vector(T))
normvector = np.zeros(100)
try:
if not (diffvector == np.zeros(100)).all():
normvector = ( diffvector / np.linalg.norm(diffvector) ).all()
except Exception as e:
AS_ERR("Errors in norm vector computation: {}".format(e))
## REQ4: Counter
## NOT IMPLEMENTED AT THIS POINT
rnninput_outdata = {}
rnninput_outdata['id'] = "{}_{}_{}".format(video_name,camera_no,frame_no)
rnninput_outdata['video'] = video_name
rnninput_outdata['camera_no'] = camera_no
rnninput_outdata['frame_no'] = str(frame_no)
rnninput_outdata['sumvector'] = cur_sumvector
rnninput_outdata['diffvector'] = diffvector.tolist()
rnninput_outdata['normvector'] = normvector.tolist()
## Store in CosmosDB (db:rnninput)
cosmos_rnninput_client.store_document(rnninput_outdata)
AS_LOG("Done!")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment