Skip to content

Instantly share code, notes, and snippets.

@Abhishek-Shaw-Kolkata
Last active March 12, 2021 08:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Abhishek-Shaw-Kolkata/711a31f28eaf648d592cddcf90b92dce to your computer and use it in GitHub Desktop.
Save Abhishek-Shaw-Kolkata/711a31f28eaf648d592cddcf90b92dce to your computer and use it in GitHub Desktop.
def extract_metadata_from_images(file_path):
'''
Extracts metadata present in DICOM file
Args:
file : DICOM file path
Returns:
a dictionary containing important metadata
'''
dataset = pydicom.dcmread(file_path)
d = {}
d['ImageID'] = os.path.splitext(file_path.split('/')[-1])[0] # Exculding the format (.dcm)
d['patientID'] = dataset.PatientID
d['age'] = dataset.PatientAge
d['sex'] = dataset.PatientSex
d['view_position'] = dataset.ViewPosition
d['pixel_spacing'] = dataset.PixelSpacing
d["modality"] = dataset.Modality
d["body_part_examined"] = dataset.BodyPartExamined
d['pixel_mean'] = np.mean(dataset.pixel_array)
d['pixel_min'] = np.min(dataset.pixel_array)
d['pixel_max'] = np.max(dataset.pixel_array)
return d
start = datetime.now()
num_cores = mp.cpu_count()
pool = Pool(num_cores)
results = pool.map(extract_metadata_from_images,train_files)
pool.close()
print("Total time taken {0}".format(datetime.now() - start))
df_meta = pd.DataFrame(results)
# Saving it for later use
df_meta.to_pickle('df_meta.pkl')
df_meta = pd.read_pickle('df_meta.pkl')
# Merging the class label file with extracted metadata
df_meta = df_meta.merge(class_df , left_on = 'ImageID',right_on='ImageId' ,how='left')
nan_rows = df_meta[df_meta.ImageId.isna()]
print('There are {0} rows that does not have Class label'.format(nan_rows.shape[0]))
# Dropping nan rows
df_meta.dropna(subset = ['ImageId'],inplace= True)
# Drop duplicates
df_meta.drop_duplicates(subset = ['ImageId'],inplace=True)
# Adding a column to differentiate rows with and without Pneumothorax
df_meta['class_'] = 'Pneumothorax'
df_meta.loc[df_meta['EncodedPixels'] == "-1",'class_'] = 'NotPneumothorax'
# Casting ages as int
df_meta['age'] = df_meta['age'].astype(int)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment