Skip to content

Instantly share code, notes, and snippets.

@bala-codes
Last active August 12, 2020 16:33
Show Gist options
  • Save bala-codes/392e4122e7207eb37819e9f6a1fe53a6 to your computer and use it in GitHub Desktop.
Save bala-codes/392e4122e7207eb37819e9f6a1fe53a6 to your computer and use it in GitHub Desktop.
BCC-Preprocess3
df_train, df_valid = model_selection.train_test_split(df, test_size=0.1, random_state=13, shuffle=True)
print(df_train.shape, df_valid.shape)
os.mkdir('/content/bcc/')
os.mkdir('/content/bcc/images/')
os.mkdir('/content/bcc/images/train/')
os.mkdir('/content/bcc/images/valid/')
os.mkdir('/content/bcc/labels/')
os.mkdir('/content/bcc/labels/train/')
os.mkdir('/content/bcc/labels/valid/')
def segregate_data(df, img_path, label_path, train_img_path, train_label_path):
filenames = []
for filename in df.filename:
filenames.append(filename)
filenames = set(filenames)
for filename in filenames:
yolo_list = []
for _,row in df[df.filename == filename].iterrows():
yolo_list.append([row.labels, row.x_center_norm, row.y_center_norm, row.width_norm, row.height_norm])
yolo_list = np.array(yolo_list)
txt_filename = os.path.join(train_label_path,str(row.prev_filename.split('.')[0])+".txt")
# Save the .img & .txt files to the corresponding train and validation folders
np.savetxt(txt_filename, yolo_list, fmt=["%d", "%f", "%f", "%f", "%f"])
shutil.copyfile(os.path.join(img_path,row.prev_filename), os.path.join(train_img_path,row.prev_filename))
## Apply function ##
src_img_path = "/content/BCCD_Dataset/BCCD/JPEGImages/"
src_label_path = "/content/BCCD_Dataset/BCCD/Annotations/"
train_img_path = "/content/bcc/images/train"
train_label_path = "/content/bcc/labels/train"
valid_img_path = "/content/bcc/images/valid"
valid_label_path = "/content/bcc/labels/valid"
segregate_data(df_train, src_img_path, src_label_path, train_img_path, train_label_path)
segregate_data(df_valid, src_img_path, src_label_path, valid_img_path, valid_label_path)
print("No. of Training images", len(os.listdir('/content/bcc/images/train')))
print("No. of Training labels", len(os.listdir('/content/bcc/labels/train')))
print("No. of valid images", len(os.listdir('/content/bcc/images/valid')))
print("No. of valid labels", len(os.listdir('/content/bcc/labels/valid')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment