Created
March 7, 2019 08:06
-
-
Save vashineyu/7f57fb63eadb39216ff877c0dc001491 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import shutil | |
def initalize_folders(cfg): | |
"""Initalize orgainzed folder. | |
Args: | |
cfg (object): configuration object | |
""" | |
class_folder = dict(cfg.TARGET_CLASS) | |
for subdir_lv1 in cfg.SUB_FOLDERS: | |
for subdir_lv2 in class_folder.keys(): | |
d = os.path.join(cfg.ROOT_PATH, subdir_lv1, subdir_lv2) | |
try: | |
os.makedirs(d) | |
except: | |
shutil.rmtree(d) | |
os.makedirs(d) | |
print("Initalize folders Done") | |
def search_raw_files(root_path, extension, include_subfolder=False): | |
files = [] | |
print("Searching raw files in %s" % root_path, end="") | |
if include_subfolder: | |
for this_dir, _, _ in os.walk(root_path): | |
#files.extend(glob.glob(os.path.join(root_path, this_dir, "*."+extension))) | |
files.extend(glob.glob(os.path.join(this_dir, "*."+extension))) | |
else: | |
files = glob.glob(os.path.join(root_path, "*."+extension)) | |
print(", found %i files" % (len(files))) | |
return files | |
class Link_origin_to_soft(): | |
def __init__(self, path_list, assign_pattern_dict, to_subfolder): | |
self.path_list = path_list | |
self.to_subfolder = to_subfolder | |
self.assign_pattern = dict(assign_pattern_dict) | |
self.record_dict = dict() | |
def __len__(self): | |
return len(self.path_list) | |
def _scan_pattern(self, path): | |
""" | |
Args: | |
path (str): full path | |
Return: | |
class_key (str): should_go_to_class | |
""" | |
spath = os.path.basename(path) | |
for key in self.assign_pattern: | |
is_detect = sum([i in spath for i in self.assign_pattern[key]]) | |
if is_detect != 0: | |
return key | |
print("File %s cannot be assign into any class" % (path)) | |
return "neither" | |
def build_link(self, assign_softlink=True): | |
for path in self.path_list: | |
to_go_class = self._scan_pattern(path) | |
self.record_dict[os.path.basename(path)] = path | |
target_file = os.path.join("data", self.to_subfolder, to_go_class, os.path.basename(path)) | |
# Create softlink (remove and re-build it if exist) | |
if assign_softlink: | |
try: | |
os.symlink(src=path, dst=target_file) | |
except: | |
os.remove(target_file) | |
os.symlink(src=path, dst=target_file) | |
@property | |
def fetch_dictionary(self): | |
return self.record_dict | |
def merge_dicts(*dict_args): | |
""" | |
Given any number of dicts, shallow copy and merge into a new dict, | |
precedence goes to key value pairs in latter dicts. | |
""" | |
result = {} | |
for dictionary in dict_args: | |
result.update(dictionary) | |
return result | |
if __name__ == "__main__": | |
# Example of cfg | |
from yacs.config import CfgNode as CN | |
_C = CN() | |
_C.ROOT_PATH = "data" # folder to create | |
_C.SUB_FOLDERS = ["train", "test", "other", "neither"] | |
_C.TARGET_CLASS = ( ("class_00",["BN"]), | |
("class_01",["NKC", "KC", "NPC"]) | |
) | |
_C.RAW_SEARCH_TRAIN = ["/mnt/nas/CGMH_NPC/", | |
"/mnt/nas/CGMH_NPC/20180430 BN/", | |
"/mnt/nas/CGMH_NPC/20180430 CA/"] | |
_C.RAW_SEARCH_TEST = ["/mnt/nas/CGMH_NPC/20180703 BN_testing/", | |
"/mnt/nas/CGMH_NPC/20180703 CA_testing/"] | |
_C.RAW_SEARCH_OTHER = ["/mnt/nas/CGMH_NPC/20190223/"] | |
_C.RAW_EXTENSION = "ndpi" | |
_C.OUTPUT = CN() | |
_C.OUTPUT.BUILD_SOFTLINK = True | |
_C.OUTPUT.WRITE_JSON = True | |
def get_cfg_defaults(): | |
return _C.clone() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment