Created
July 1, 2023 10:27
-
-
Save simonamdev/8ff41692d1939ee08481803b6e94b96e to your computer and use it in GitHub Desktop.
Python script to create symlinks from text files with file paths
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import random | |
from tqdm import tqdm | |
from time import sleep | |
RANDOM_SEED = 42 | |
random.seed(RANDOM_SEED) | |
datasets = os.listdir('./datasets') | |
for dataset in sorted(datasets): | |
print(f'Symlinking for dataset: {dataset}') | |
is_synthia = 'synthia' in dataset.lower() | |
is_pfd = 'pfd' in dataset.lower() | |
assert not (is_synthia and is_pfd) | |
dataset_file_path = os.path.join('.', 'datasets', dataset) | |
fixed_paths = [] | |
with open(dataset_file_path, 'r') as f: | |
file_paths = [l.strip() for l in f.readlines()] | |
for file_path in file_paths: | |
is_camvid = 'camvid' in file_path | |
is_label = 'GT' in file_path or 'labels' in file_path | |
# Add the label | |
if is_label: | |
if is_camvid: | |
new_path = file_path.replace('train_labels', 'train_labels_txt').replace('.png', '.txt') | |
elif is_synthia: | |
new_path = file_path.replace('GT', 'GT_yolo').replace('.png', '.txt') | |
elif is_pfd: | |
new_path = file_path.replace('labels', 'labels_txt').replace('.png', '.txt') | |
print(file_path, new_path) | |
fixed_paths.append(new_path) | |
# Add the image | |
else: | |
print(file_path) | |
fixed_paths.append(file_path) | |
# Ensure every file exists | |
print(f'Validating files exist...') | |
for file_path in fixed_paths: | |
if not os.path.isfile(file_path): | |
print(f'File {file_path} does not exist') | |
exit(1) | |
# print('Shuffling file paths...') | |
# random.shuffle(fixed_paths) | |
# Create the folder for the dataset | |
target_dataset_dir = f"./dataset_links/{dataset}" | |
if not os.path.exists(target_dataset_dir): | |
os.mkdir(target_dataset_dir) | |
else: | |
continue | |
print(f'Creating symlinks...') | |
progress_bar = tqdm(fixed_paths) | |
for source_file_path in progress_bar: | |
target_file_path = os.path.join( | |
target_dataset_dir, | |
os.path.basename(source_file_path) | |
).replace('_L', '') | |
progress_bar.set_description(f'{target_file_path}', refresh=True) | |
if not os.path.exists(target_file_path): | |
# print(f'Linking {source_file_path} to {target_file_path}') | |
os.symlink(source_file_path, target_file_path) | |
sleep(0.01) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment