Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save arnoldsachith/03c8275ead04d7c4e0cf7277cb296086 to your computer and use it in GitHub Desktop.
Save arnoldsachith/03c8275ead04d7c4e0cf7277cb296086 to your computer and use it in GitHub Desktop.
COVID-19 X-Ray Data set - Preprocessing_the_data_set_to_train_a_CNN_network_using_Python
#import the necessary packages to execute the program
import pandas as pd
import shutil
import os
#define the necessary path locations and assign it to the particular variable
imagePath1 = 'mention_the_path_to_images_folder_which_contains_X-Ray_images'
outputPath1 = 'mention_the_path_where_you_want_to_save_the_extracted-images'
csv1= 'path_to_the_folder_that_contains_metadata.csv_file'
#Your path should look like csv='E:\\M.tech_Projects\\Deep Neural Network\\Datasets\\covid-chestxray-dataset-master'
# Establish the path to metadata.csv file
csvPath = os.path.sep.join([csv1, "metadata.csv"])
df = pd.read_csv(csvPath) # load the csv file to dataframe named df
# Continously loop over the dataframe df to check for string COVID-19 or PA
for (i, row) in df.iterrows():
if row["finding"] != "COVID-19" or row["view"] != "PA":
continue
# Establish the path to the input image file by taking the image name from filename column
imagePath = os.path.sep.join([imagePath1, "images",
row["filename"]])
print(imagePath) #prints where the image is being saved
# if any errors occurs then ignore the row
if not os.path.exists(imagePath):
continue
# Establish the path to save the extracted images
filename = row["filename"].split(os.path.sep)[-1]
outputPath = os.path.sep.join([outputPath1, filename])
# Copy the necessary images from Source to Destination
shutil.copy2(imagePath, outputPath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment