Skip to content

Instantly share code, notes, and snippets.

@InnerPeace-Wu
Last active October 18, 2017 06:48
Show Gist options
  • Save InnerPeace-Wu/49567f6008d4b9a841d816c6cb2f5a1c to your computer and use it in GitHub Desktop.
Save InnerPeace-Wu/49567f6008d4b9a841d816c6cb2f5a1c to your computer and use it in GitHub Desktop.
Dealing with the problem that without enough memory to read the whole region description json file of visual genome dataset.
# ----------------------------------------------
# DenseCap
# Written by InnerPeace
# ----------------------------------------------
"""read large region description json file"""
import ijson
import json
import sys
import os
VG_VERSION = '1.2'
VG_PATH = '/path/to/visual/genome/data'
VG_REGION_PATH = '%s/%s/region_descriptions.json' % (VG_PATH, VG_VERSION)
REGION_JSON = '%s/%s/regions_disc' % (VG_PATH, VG_VERSION)
def read_regions( ):
if not os.path.exists(REGION_JSON):
os.makedirs(REGION_JSON)
parser = ijson.parse(open(VG_REGION_PATH))
last_value = None
Dic = {}
regions = []
dic = {}
count = 0
for prefix, event, value in parser:
sys.stdout.write('>>> %d \r' % count)
sys.stdout.flush()
if value == 'regions':
Dic = {}
regions = []
last_value = None
elif last_value == 'id' and value:
count += 1
Dic['regions'] = regions
Dic['id'] = value
with open(REGION_JSON + '/%s.json' % value, 'w') as f:
json.dump(Dic, f)
elif event == 'map_key':
last_value = value
elif event == 'end_map':
regions.append(dic)
dic = {}
last_value = None
elif last_value:
dic[last_value] = value
if __name__ == '__main__':
read_regions()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment