Last active
August 12, 2020 16:33
-
-
Save bala-codes/392e4122e7207eb37819e9f6a1fe53a6 to your computer and use it in GitHub Desktop.
BCC-Preprocess3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_train, df_valid = model_selection.train_test_split(df, test_size=0.1, random_state=13, shuffle=True) | |
print(df_train.shape, df_valid.shape) | |
os.mkdir('/content/bcc/') | |
os.mkdir('/content/bcc/images/') | |
os.mkdir('/content/bcc/images/train/') | |
os.mkdir('/content/bcc/images/valid/') | |
os.mkdir('/content/bcc/labels/') | |
os.mkdir('/content/bcc/labels/train/') | |
os.mkdir('/content/bcc/labels/valid/') | |
def segregate_data(df, img_path, label_path, train_img_path, train_label_path): | |
filenames = [] | |
for filename in df.filename: | |
filenames.append(filename) | |
filenames = set(filenames) | |
for filename in filenames: | |
yolo_list = [] | |
for _,row in df[df.filename == filename].iterrows(): | |
yolo_list.append([row.labels, row.x_center_norm, row.y_center_norm, row.width_norm, row.height_norm]) | |
yolo_list = np.array(yolo_list) | |
txt_filename = os.path.join(train_label_path,str(row.prev_filename.split('.')[0])+".txt") | |
# Save the .img & .txt files to the corresponding train and validation folders | |
np.savetxt(txt_filename, yolo_list, fmt=["%d", "%f", "%f", "%f", "%f"]) | |
shutil.copyfile(os.path.join(img_path,row.prev_filename), os.path.join(train_img_path,row.prev_filename)) | |
## Apply function ## | |
src_img_path = "/content/BCCD_Dataset/BCCD/JPEGImages/" | |
src_label_path = "/content/BCCD_Dataset/BCCD/Annotations/" | |
train_img_path = "/content/bcc/images/train" | |
train_label_path = "/content/bcc/labels/train" | |
valid_img_path = "/content/bcc/images/valid" | |
valid_label_path = "/content/bcc/labels/valid" | |
segregate_data(df_train, src_img_path, src_label_path, train_img_path, train_label_path) | |
segregate_data(df_valid, src_img_path, src_label_path, valid_img_path, valid_label_path) | |
print("No. of Training images", len(os.listdir('/content/bcc/images/train'))) | |
print("No. of Training labels", len(os.listdir('/content/bcc/labels/train'))) | |
print("No. of valid images", len(os.listdir('/content/bcc/images/valid'))) | |
print("No. of valid labels", len(os.listdir('/content/bcc/labels/valid'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment