Skip to content

Instantly share code, notes, and snippets.

@aniruddha27
Last active August 24, 2022 15:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aniruddha27/ac7018bf3d8047abb52648b28a5ffee8 to your computer and use it in GitHub Desktop.
Save aniruddha27/ac7018bf3d8047abb52648b28a5ffee8 to your computer and use it in GitHub Desktop.
import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split
# Home directory
home_path = r'C:/Users/Dell/Desktop/Analytics Vidhya/ImageDataGenerator/emergency_vs_non-emergency_dataset/emergency_vs_non-emergency_dataset'
# Create train and validation directories
train_path = os.path.join(home_path,'train')
os.mkdir(train_path)
val_path = os.path.join(home_path,'valid')
os.mkdir(val_path)
# Create sub-directories
emergency_train_path = os.path.join(home_path + r'/train','emergency')
os.mkdir(emergency_train_path)
non_emergency_train_path = os.path.join(home_path + r'/train','non_emergency')
os.mkdir(non_emergency_train_path)
emergency_val_path = os.path.join(home_path + r'/valid','emergency')
os.mkdir(emergency_val_path)
non_emergency_val_path = os.path.join(home_path + r'/valid','non_emergency')
os.mkdir(non_emergency_val_path)
# Original df
df = pd.read_csv(home_path + r'/emergency_train.csv')
# Images and Labels
X = df.loc[:,'image_names']
y = df.loc[:,'emergency_or_not']
# Train-Test splitfor train and validation images
train_x, val_x, train_y, val_y = train_test_split(X, y, test_size = 0.1, random_state = 27, stratify=y)
# Train df
df_train = pd.DataFrame(columns=['image_names','emergency_or_not'])
df_train['image_names'] = train_x
df_train['emergency_or_not'] = train_y
# Validation df
df_valid= pd.DataFrame(columns=['image_names','emergency_or_not'])
df_valid['image_names'] = val_x
df_valid['emergency_or_not'] = val_y
# Reset index
df_train.reset_index(drop=True, inplace=True)
df_valid.reset_index(drop=True, inplace=True)
# Save train images
for i in range(len(df_train)):
image = df_train.loc[i,'image_names']
if df_train.loc[i,'emergency_or_not'] == 0:
shutil.copy(home_path + r'/images/' + image, non_emergency_train)
else:
shutil.copy(home_path + r'/images/' + image, emergency_train)
# Save validation images
for i in range(len(df_valid)):
image = df_valid.loc[i,'image_names']
if df_valid.loc[i,'emergency_or_not'] == 0:
shutil.copy(home_path + r'/images/' + image, non_emergency_val)
else:
shutil.copy(home_path + r'/images/' + image, emergency_val)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment