Skip to content

Instantly share code, notes, and snippets.

@dtxe
Last active March 3, 2023 20:10
Show Gist options
  • Save dtxe/f70ab9b1f1e794c6bfa8136bd8768928 to your computer and use it in GitHub Desktop.
Save dtxe/f70ab9b1f1e794c6bfa8136bd8768928 to your computer and use it in GitHub Desktop.
quick and dirty script to glob a directory of dicom files and extract the header information into a csv file
''' quick and dirty script to glob a directory of dicom files and extract the header information into a csv file '''
import pydicom
import glob
import pandas as pd
from tqdm import tqdm
import numpy as np
###########################
# Parameters
tolerance = 0.01 # tolerance for checking if SliceLocation is consistently spaced
###########################
dicomseries = glob.glob('*.dcm')
tbl = pd.DataFrame(columns=['Filename', 'PatientID', 'PatientName', 'StudyDate', 'StudyTime', 'StudyDescription', 'SeriesNumber', 'SeriesDescription', 'InstanceNumber', 'ImagePositionPatient', 'ImageOrientationPatient', 'SliceLocation', 'Rows', 'Columns', 'PixelSpacing', 'SliceThickness', 'SpacingBetweenSlices', 'NumberOfFrames', 'BodyPartExamined', 'ProtocolName', 'SeriesInstanceUID', 'StudyInstanceUID', 'StudyID', 'WindowCenter', 'WindowWidth', 'StudyID', 'AcquisitionNumber'])
print('\n\n >> Reading DICOM headers\n')
for didx, dicom in tqdm(list(enumerate(dicomseries))):
ds = pydicom.dcmread(dicom, stop_before_pixels=True)
# try to collect items
for item in tbl.columns:
try:
tbl.loc[didx, item] = ds[item].value
except:
tbl.loc[didx, item] = pd.NA
tbl.loc[didx, 'Filename'] = dicom
tbl.to_csv('dicom_headers.csv', index=False)
# do some checks
unique_series = np.sort(tbl['SeriesNumber'].unique())
print('\n\n >> Checking for consistent slice spacing\n')
for series in unique_series:
# check that SliceLocation is consistently spaced
tbl_series = tbl[tbl['SeriesNumber'] == series]
tbl_series = tbl_series.sort_values(by='SliceLocation')
tbl_series['SliceLocation_diff'] = tbl_series['SliceLocation'].diff()
# get data ptp
ptp = np.ptp(tbl_series['SliceLocation_diff'])
if ptp < tolerance:
print('OK Series {} {}'.format(series, tbl_series['SeriesDescription'].iloc[0]))
else:
s_desc = tbl_series['SeriesDescription'].iloc[0]
if ('MPR' in s_desc) or ('localizer' in s_desc):
alert = ' '
note = '| note: localizer or resliced series. spacing irrelevant.'
else:
alert = '!!'
note = ''
print('{} Series {} {} (inter-slice spacing range: {:2.2g} mm) {}'.format(alert, series, s_desc, ptp, note))
print('\n\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment