Skip to content

Instantly share code, notes, and snippets.

@chaonan99
Created October 21, 2016 06:33
Show Gist options
  • Save chaonan99/bdf0c0bfcfc517727a5b3f52b8cd3568 to your computer and use it in GitHub Desktop.
Save chaonan99/bdf0c0bfcfc517727a5b3f52b8cd3568 to your computer and use it in GitHub Desktop.
Organize a list of string in a structured way (dictionary, list structure)
"""
Organize a list of string in a structured way (very slow)
:Author: chaonan99
:Date: 2016/10/21
"""
import re
import numpy as np
class StructureDir(object):
""" StructureDir
:type pattern: `_sre.SRE_Pattern` (returned by re.compile)
:param pattern: a regular expression object to match each string in the list
:type format_list: `list` of `string`
:value format_list: "list" or "dict"
:param format_list: indict a group should be organized as dictionary or list
"""
def __init__(self, pattern, format_list):
super(StructureDir, self).__init__()
self.pattern = pattern
self.format_list = format_list
assert pattern.groups == len(format_list), "length of pattern groups and format list do not match!"
def structured(self, list_of_dir):
def f(x):
return self.pattern.findall(x)[0]
matches = tuple(map(f, list_of_dir))
return self.__merge_one_layer(dict(zip(matches, list_of_dir)))
def __merge_one_layer(self, current_merge):
matches = list(current_merge.keys())
files = list(current_merge.values())
n = len(matches[0]) - 1
if n == -1:
return current_merge
if n == 0:
if self.format_list[n] == 'list':
return files
elif self.format_list[n] == 'dict':
return dict(zip(matches, files))
else:
raise ValueError("Unexpected format indicator: {}".format, self.format_list[n])
else:
if self.format_list[n] == 'list':
next_merge = {x:[files[ind] for ind, y in enumerate(np.array(matches)) if np.all(y[0:n]==np.array(x))] \
for x in {tuple(row) for row in np.array(matches)[:, 0:n]}}
elif self.format_list[n] == 'dict':
next_merge = {x:{y[n]:files[ind] for ind, y in enumerate(np.array(matches)) if np.all(y[0:n]==np.array(x))} \
for x in {tuple(row) for row in np.array(matches)[:, 0:n]}}
else:
raise ValueError("Unexpected format indicator: {}".format, self.format_list[n])
return self.__merge_one_layer(next_merge)
if __name__ == '__main__':
"""example"""
pattern = re.compile(r'/data/datasets/MARS/bbox_test/\d+/(\d+)C(\d+)T(\d+)F(\d+)')
files = [
"/data/datasets/MARS/bbox_test/0001/0001C2T001F0001",
"/data/datasets/MARS/bbox_test/0001/0001C2T001F0002",
"/data/datasets/MARS/bbox_test/0001/0001C2T001F0003",
"/data/datasets/MARS/bbox_test/0001/0001C2T002F0001",
"/data/datasets/MARS/bbox_test/0001/0001C2T002F0002",
"/data/datasets/MARS/bbox_test/0001/0001C2T002F0003",
"/data/datasets/MARS/bbox_test/0001/0001C4T003F0001",
"/data/datasets/MARS/bbox_test/0001/0001C4T003F0002",
"/data/datasets/MARS/bbox_test/0001/0002C1T001F0001",
"/data/datasets/MARS/bbox_test/0001/0002C1T001F0002",
"/data/datasets/MARS/bbox_test/0001/0002C1T002F0001",
"/data/datasets/MARS/bbox_test/0001/0002C2T003F0001",
"/data/datasets/MARS/bbox_test/0001/0002C2T003F0002",
]
stc = StructureDir(pattern, ["dict", "dict", "list", "list"])
res = stc.structured(files)
from IPython import embed; embed()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment