import pandas as pd | |
import os | |
import shutil | |
from sklearn.model_selection import train_test_split | |
# Home directory | |
home_path = r'C:/Users/Dell/Desktop/Analytics Vidhya/ImageDataGenerator/emergency_vs_non-emergency_dataset/emergency_vs_non-emergency_dataset' | |
# Create train and validation directories | |
train_path = os.path.join(home_path,'train') | |
os.mkdir(train_path) | |
val_path = os.path.join(home_path,'valid') | |
os.mkdir(val_path) | |
# Create sub-directories | |
emergency_train_path = os.path.join(home_path + r'/train','emergency') | |
os.mkdir(emergency_train_path) | |
non_emergency_train_path = os.path.join(home_path + r'/train','non_emergency') | |
os.mkdir(non_emergency_train_path) | |
emergency_val_path = os.path.join(home_path + r'/valid','emergency') | |
os.mkdir(emergency_val_path) | |
non_emergency_val_path = os.path.join(home_path + r'/valid','non_emergency') | |
os.mkdir(non_emergency_val_path) | |
# Original df | |
df = pd.read_csv(home_path + r'/emergency_train.csv') | |
# Images and Labels | |
X = df.loc[:,'image_names'] | |
y = df.loc[:,'emergency_or_not'] | |
# Train-Test splitfor train and validation images | |
train_x, val_x, train_y, val_y = train_test_split(X, y, test_size = 0.1, random_state = 27, stratify=y) | |
# Train df | |
df_train = pd.DataFrame(columns=['image_names','emergency_or_not']) | |
df_train['image_names'] = train_x | |
df_train['emergency_or_not'] = train_y | |
# Validation df | |
df_valid= pd.DataFrame(columns=['image_names','emergency_or_not']) | |
df_valid['image_names'] = val_x | |
df_valid['emergency_or_not'] = val_y | |
# Reset index | |
df_train.reset_index(drop=True, inplace=True) | |
df_valid.reset_index(drop=True, inplace=True) | |
# Save train images | |
for i in range(len(df_train)): | |
image = df_train.loc[i,'image_names'] | |
if df_train.loc[i,'emergency_or_not'] == 0: | |
shutil.copy(home_path + r'/images/' + image, non_emergency_train) | |
else: | |
shutil.copy(home_path + r'/images/' + image, emergency_train) | |
# Save validation images | |
for i in range(len(df_valid)): | |
image = df_valid.loc[i,'image_names'] | |
if df_valid.loc[i,'emergency_or_not'] == 0: | |
shutil.copy(home_path + r'/images/' + image, non_emergency_val) | |
else: | |
shutil.copy(home_path + r'/images/' + image, emergency_val) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment