Skip to content

Instantly share code, notes, and snippets.

@prerakmody
Last active November 2, 2022 14:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prerakmody/7987e8b62e7c5237747a840b19200692 to your computer and use it in GitHub Desktop.
Save prerakmody/7987e8b62e7c5237747a840b19200692 to your computer and use it in GitHub Desktop.
TCIA scripts
# Inspired by https://github.com/jzagoli/ProstateX2nii. Modified by Prerak Mody
# This processes this dataset --> https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6
# Alternatively, one could also look here --> https://github.com/rcuocolo/PROSTATEx_masks/tree/master/Files/prostate for this dataset (https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=23691656)
import pdb
import tqdm # pip install tqdm
import urllib
import zipfile
import nibabel # pip install nibabel
import pydicom # pip install pydicom
import skimage
import traceback
import subprocess
import numpy as np
import urllib.request
import skimage.transform
from pathlib import Path
import matplotlib.pyplot as plt
DIR_MAIN = Path(__file__).parent.absolute()
DIRNAME_RAW = 'raw'
DIRNAME_PROCESSED = 'processed'
DIRNAME_PROCESSED2 = 'processed2'
DIRNAME_TMP = '_tmp'
EXT_ZIP = '.zip'
EXT_NII = '.nii'
EXT_NRRD = '.nrrd'
FOLDERNAME_MRI = '{}_mri{}'
FOLDERNAME_MASK = '{}_mask{}'
def read_zip(filepath_zip, filepath_output=None, leave=False):
# Step 0 - Init
if Path(filepath_zip).exists():
if filepath_output is None:
filepath_zip_parts = list(Path(filepath_zip).parts)
filepath_zip_name = filepath_zip_parts[-1].split(EXT_ZIP)[0]
filepath_zip_parts[-1] = filepath_zip_name
filepath_output = Path(*filepath_zip_parts)
zip_fp = zipfile.ZipFile(filepath_zip, 'r')
zip_fp_members = zip_fp.namelist()
with tqdm.tqdm(total=len(zip_fp_members), desc=' - [Unzip] ' + str(filepath_zip.parts[-1]), leave=leave) as pbar_zip:
for member in zip_fp_members:
zip_fp.extract(member, filepath_output)
pbar_zip.update(1)
return filepath_output
else:
print (' - [ERROR][utils.read_zip()] Path does not exist: ', filepath_zip)
return None
class TCIAClient:
"""
- Ref: https://wiki.cancerimagingarchive.net/display/Public/TCIA+Programmatic+Interface+%28REST+API%29+Usage+Guide
- Ref: https://github.com/TCIA-Community/TCIA-API-SDK/tree/master/tcia-rest-client-python/src
"""
GET_IMAGE = "getImage"
GET_MANUFACTURER_VALUES = "getManufacturerValues"
GET_MODALITY_VALUES = "getModalityValues"
GET_COLLECTION_VALUES = "getCollectionValues"
GET_BODY_PART_VALUES = "getBodyPartValues"
GET_PATIENT_STUDY = "getPatientStudy"
GET_SERIES = "getSeries"
GET_PATIENT = "getPatient"
GET_SERIES_SIZE = "getSeriesSize"
CONTENTS_BY_NAME = "ContentsByName"
def __init__(self, baseUrl, resource):
self.baseUrl = baseUrl + "/" + resource
self.STATUS_OK = 200
self.DECODER = 'utf-8'
def execute(self, url, queryParameters={}, verbose=False):
queryParameters = dict((k, v) for k, v in queryParameters.items() if v)
queryString = "?%s" % urllib.parse.urlencode(queryParameters)
requestUrl = url + queryString
if verbose:
print (' - [execute()] URL: ', requestUrl)
request = urllib.request.Request(url=requestUrl, headers={})
resp = urllib.request.urlopen(request)
return resp
def read_response(self, resp):
if resp.status == self.STATUS_OK:
return eval(resp.read().decode(self.DECODER))
else:
return None
def get_patient(self,collection = None , outputFormat = "json" ):
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT
queryParameters = {"Collection" : collection , "format" : outputFormat }
resp = self.execute(serviceUrl , queryParameters)
return resp
def get_patient_study(self,collection = None , patientId = None , studyInstanceUid = None , outputFormat = "json" ):
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT_STUDY
queryParameters = {"Collection" : collection , "PatientID" : patientId , "StudyInstanceUID" : studyInstanceUid , "format" : outputFormat }
resp = self.execute(serviceUrl , queryParameters)
return resp
def get_series(self, collection=None, patientId=None, modality=None, studyInstanceUid=None, seriesInstanceUid=None, outputFormat = "json" ):
serviceUrl = self.baseUrl + "/query/" + self.GET_SERIES
queryParameters = {"Collection" : collection, "patientId": patientId, "StudyInstanceUID" : studyInstanceUid, 'SeriesInstanceUID': seriesInstanceUid,"Modality" : modality, "format" : outputFormat }
resp = self.execute(serviceUrl , queryParameters)
return resp
def get_image(self , seriesInstanceUid , downloadPath, zipFileName):
try:
serviceUrl = self.baseUrl + "/query/" + self.GET_IMAGE
queryParameters = { "SeriesInstanceUID" : seriesInstanceUid }
resp = self.execute(serviceUrl, queryParameters)
filepath = Path(downloadPath).joinpath(zipFileName)
data = resp.read()
with open(filepath, 'wb') as fp:
fp.write(data)
tmp = list(Path(filepath).parts)
tmp[-1] = tmp[-1].split('.zip')[0]
filepath_output = Path(*tmp)
read_zip(filepath, filepath_output, leave=False)
Path(filepath).unlink()
except:
traceback.print_exc()
def download_dcm2niix(DIR_TMP):
# Ref: https://github.com/rordenlab/dcm2niix/releases/tag/v1.0.20220720
try:
import sys
path_file = Path(DIR_TMP).joinpath('dcm2niix')
if sys.platform == 'win32':
if not Path(path_file).exists():
path_zip = Path(DIR_TMP).joinpath('dcm2niix_win.zip')
urllib.request.urlretrieve(url='https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20220720/dcm2niix_win.zip', filename=str(path_zip))
read_zip(path_zip, path_file)
Path(path_zip).unlink()
elif sys.platform == 'linux':
pass
return Path(path_file).joinpath('dcm2niix.exe')
except:
traceback.print_exc()
pdb.set_trace()
return None
def freqhist_bins(array, bins=100):
"""
A numpy based function to split the range of pixel values into groups, such that each group has around the same number of pixels
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py
"""
imsd = np.sort(array.flatten())
t = np.array([0.001])
t = np.append(t, np.arange(bins)/bins+(1/2/bins))
t = np.append(t, 0.999)
t = (len(imsd)*t+0.5).astype(np.int)
return np.unique(imsd[t])
def hist_scaled(array, brks=None, bins=100):
"""
Scales a tensor using `freqhist_bins` to values between 0 and 1
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py
"""
if brks is None:
brks = freqhist_bins(array, bins=bins)
ys = np.linspace(0., 1., len(brks))
x = array.flatten()
x = np.interp(x, brks, ys)
if 0:
plt.plot(brks, ys, color='blue', label='Non-Normalized', marker='x', markersize=2)
plt.xlim([0,np.max(array)])
brks2 = freqhist_bins(x, bins=bins)*np.max(array)
ys2 = np.linspace(0., 1., len(brks2))
plt.plot(brks2, ys2, color='orange', label='Histogram Equalization')
plt.legend()
plt.show()
array_tmp = np.reshape(x, array.shape)
array_tmp = np.clip(array_tmp, 0.,1.)
return array_tmp
if __name__ == "__main__":
# Step 0.1 - Init TCIA client
baseUrl = 'https://services.cancerimagingarchive.net/services/v4'
resource = 'TCIA'
collection = 'PROSTATEx'
client = TCIAClient(baseUrl=baseUrl, resource=resource)
# Step 0.2 - Init seriesids from https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6
seriesids_mask = ['1.2.276.0.7230010.3.1.3.1070885483.26120.1599221759.302','1.2.276.0.7230010.3.1.3.1070885483.24432.1599221753.549','1.2.276.0.7230010.3.1.3.1070885483.5008.1599221756.491','1.2.276.0.7230010.3.1.3.1070885483.27140.1599221767.245','1.2.276.0.7230010.3.1.3.1070885483.27624.1599221764.667','1.2.276.0.7230010.3.1.3.1070885483.16240.1599221761.962','1.2.276.0.7230010.3.1.3.1070885483.8220.1599221770.50','1.2.276.0.7230010.3.1.3.1070885483.9460.1599221773.50','1.2.276.0.7230010.3.1.3.1070885483.24532.1599221775.720','1.2.276.0.7230010.3.1.3.1070885483.24608.1599221781.105','1.2.276.0.7230010.3.1.3.1070885483.24736.1599221778.322','1.2.276.0.7230010.3.1.3.1070885483.22584.1599221783.667','1.2.276.0.7230010.3.1.3.1070885483.24012.1599221786.269','1.2.276.0.7230010.3.1.3.1070885483.21792.1599221788.891','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221791.439','1.2.276.0.7230010.3.1.3.1070885483.5672.1599221794.52','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221799.294','1.2.276.0.7230010.3.1.3.1070885483.16324.1599221796.636','1.2.276.0.7230010.3.1.3.1070885483.26880.1599221801.878','1.2.276.0.7230010.3.1.3.1070885483.26756.1599221804.455','1.2.276.0.7230010.3.1.3.1070885483.21116.1599221807.124','1.2.276.0.7230010.3.1.3.1070885483.23772.1599221809.995','1.2.276.0.7230010.3.1.3.1070885483.23924.1599221812.620','1.2.276.0.7230010.3.1.3.1070885483.24056.1599221815.833','1.2.276.0.7230010.3.1.3.1070885483.23100.1599221818.605','1.2.276.0.7230010.3.1.3.1070885483.4648.1599221821.338','1.2.276.0.7230010.3.1.3.1070885483.5584.1599221824.60','1.2.276.0.7230010.3.1.3.1070885483.17604.1599221827.76','1.2.276.0.7230010.3.1.3.1070885483.23788.1599221829.820','1.2.276.0.7230010.3.1.3.1070885483.26076.1599221832.601','1.2.276.0.7230010.3.1.3.1070885483.22052.1599221835.360','1.2.276.0.7230010.3.1.3.1070885483.26444.1599221837.990','1.2.276.0.7230010.3.1.3.1070885483.27528.1599221841.604','1.2.276.0.7230010.3.1.3.1070885483.3400.1599221844.204','1.2.276.0.7230010.3.1.3.1070885483.15416.1599221846.853','1.2.276.0.7230010.3.1.3.1070885483.22008.1599221849.372','1.2.276.0.7230010.3.1.3.1070885483.16016.1599221851.990','1.2.276.0.7230010.3.1.3.1070885483.8088.1599221854.809','1.2.276.0.7230010.3.1.3.1070885483.5396.1599221858.335','1.2.276.0.7230010.3.1.3.1070885483.25220.1599221863.968','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221860.888','1.2.276.0.7230010.3.1.3.1070885483.7992.1599221866.575','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221869.130','1.2.276.0.7230010.3.1.3.1070885483.24588.1599221871.775','1.2.276.0.7230010.3.1.3.1070885483.24928.1599221874.342','1.2.276.0.7230010.3.1.3.1070885483.14224.1599221879.641','1.2.276.0.7230010.3.1.3.1070885483.25300.1599221882.469','1.2.276.0.7230010.3.1.3.1070885483.21512.1599221877.6','1.2.276.0.7230010.3.1.3.1070885483.5808.1599221885.265','1.2.276.0.7230010.3.1.3.1070885483.22380.1599221887.327','1.2.276.0.7230010.3.1.3.1070885483.23580.1599221890.67','1.2.276.0.7230010.3.1.3.1070885483.15904.1599221892.237','1.2.276.0.7230010.3.1.3.1070885483.4220.1599221894.379','1.2.276.0.7230010.3.1.3.1070885483.25552.1599221897.694','1.2.276.0.7230010.3.1.3.1070885483.26088.1599221900.380','1.2.276.0.7230010.3.1.3.1070885483.5296.1599221903.189','1.2.276.0.7230010.3.1.3.1070885483.6284.1599221908.467','1.2.276.0.7230010.3.1.3.1070885483.17416.1599221911.141','1.2.276.0.7230010.3.1.3.1070885483.1780.1599221905.866','1.2.276.0.7230010.3.1.3.1070885483.24236.1599221913.873','1.2.276.0.7230010.3.1.3.1070885483.24632.1599221916.651','1.2.276.0.7230010.3.1.3.1070885483.25044.1599221924.876','1.2.276.0.7230010.3.1.3.1070885483.8668.1599221919.430','1.2.276.0.7230010.3.1.3.1070885483.24644.1599221922.113','1.2.276.0.7230010.3.1.3.1070885483.27560.1599221927.547','1.2.276.0.7230010.3.1.3.1070885483.25100.1599221930.643']
seriesids_mri = ['1.3.6.1.4.1.14519.5.2.1.7310.5101.107276353018221365492863559094','1.3.6.1.4.1.14519.5.2.1.7310.5101.127076787998581344993055912043','1.3.6.1.4.1.14519.5.2.1.7310.5101.128915395083536972793214199559','1.3.6.1.4.1.14519.5.2.1.7310.5101.156355474235536930096765953749','1.3.6.1.4.1.14519.5.2.1.7310.5101.182666056353308077379356745296','1.3.6.1.4.1.14519.5.2.1.7310.5101.217705819956691748023253683197','1.3.6.1.4.1.14519.5.2.1.7310.5101.233894782007809884236855995183','1.3.6.1.4.1.14519.5.2.1.7310.5101.234424615181029312892423746307','1.3.6.1.4.1.14519.5.2.1.7310.5101.259467270859277582122403078829','1.3.6.1.4.1.14519.5.2.1.7310.5101.260386623049829078909727548239','1.3.6.1.4.1.14519.5.2.1.7310.5101.306748150836286513093400468929','1.3.6.1.4.1.14519.5.2.1.7310.5101.318856235542799349557600990165','1.3.6.1.4.1.14519.5.2.1.7310.5101.337896560664468718350275750005','1.3.6.1.4.1.14519.5.2.1.7310.5101.568951414433420775694157731047','1.3.6.1.4.1.14519.5.2.1.7310.5101.877607202381357458819664904037','1.3.6.1.4.1.14519.5.2.1.7310.5101.898676446450078900613536879876','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130890931274399577478152963','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130934168942593154270621032','1.3.6.1.4.1.14519.5.2.1.7311.5101.105766764724449379107432362750','1.3.6.1.4.1.14519.5.2.1.7311.5101.109832262779867794170417053903','1.3.6.1.4.1.14519.5.2.1.7311.5101.119868581724901379856626647643','1.3.6.1.4.1.14519.5.2.1.7311.5101.124933244802704100684011471210','1.3.6.1.4.1.14519.5.2.1.7311.5101.126808882287123482193592987944','1.3.6.1.4.1.14519.5.2.1.7311.5101.146422450520894659910416624671','1.3.6.1.4.1.14519.5.2.1.7311.5101.149728379277305470281540696443','1.3.6.1.4.1.14519.5.2.1.7311.5101.155875154686610653777230856177','1.3.6.1.4.1.14519.5.2.1.7311.5101.156910737340231640626298466189','1.3.6.1.4.1.14519.5.2.1.7311.5101.160225377533762960695041069832','1.3.6.1.4.1.14519.5.2.1.7311.5101.165636727522505811947315188478','1.3.6.1.4.1.14519.5.2.1.7311.5101.166265209513832781969059787909','1.3.6.1.4.1.14519.5.2.1.7311.5101.175570448971296597162147241386','1.3.6.1.4.1.14519.5.2.1.7311.5101.178750617003896154912438088527','1.3.6.1.4.1.14519.5.2.1.7311.5101.180041601751316950966041961765','1.3.6.1.4.1.14519.5.2.1.7311.5101.180650601474055581355948988643','1.3.6.1.4.1.14519.5.2.1.7311.5101.186553403053805718209125341361','1.3.6.1.4.1.14519.5.2.1.7311.5101.196511587017551386049158542619','1.3.6.1.4.1.14519.5.2.1.7311.5101.206828891270520544417996275680','1.3.6.1.4.1.14519.5.2.1.7311.5101.226939533786316417608293472819','1.3.6.1.4.1.14519.5.2.1.7311.5101.236967836312224538274213979128','1.3.6.1.4.1.14519.5.2.1.7311.5101.242424835264181527414562151046','1.3.6.1.4.1.14519.5.2.1.7311.5101.245315537897059083725251245833','1.3.6.1.4.1.14519.5.2.1.7311.5101.248203933751895346486742820088','1.3.6.1.4.1.14519.5.2.1.7311.5101.252185783543855817242399041825','1.3.6.1.4.1.14519.5.2.1.7311.5101.252385246988168654915352850239','1.3.6.1.4.1.14519.5.2.1.7311.5101.257928599770116866513356683535','1.3.6.1.4.1.14519.5.2.1.7311.5101.258067469374761412596368241889','1.3.6.1.4.1.14519.5.2.1.7311.5101.262719318163046156031519223454','1.3.6.1.4.1.14519.5.2.1.7311.5101.266720221212035669954581062639','1.3.6.1.4.1.14519.5.2.1.7311.5101.271991298059338527584681788043','1.3.6.1.4.1.14519.5.2.1.7311.5101.282008038881694967871335639325','1.3.6.1.4.1.14519.5.2.1.7311.5101.287403883614425048490255475041','1.3.6.1.4.1.14519.5.2.1.7311.5101.289711251504454079274667690291','1.3.6.1.4.1.14519.5.2.1.7311.5101.304058605325520782900293953679','1.3.6.1.4.1.14519.5.2.1.7311.5101.312442050391027773578890036831','1.3.6.1.4.1.14519.5.2.1.7311.5101.318660391556220519803669869815','1.3.6.1.4.1.14519.5.2.1.7311.5101.322192860849906453257530108507','1.3.6.1.4.1.14519.5.2.1.7311.5101.333332554059735467244825907777','1.3.6.1.4.1.14519.5.2.1.7311.5101.334326985258868181144176796864','1.3.6.1.4.1.14519.5.2.1.7311.5101.357054868848226402548612686613','1.3.6.1.4.1.14519.5.2.1.7311.5101.460453169764361555745878708935','1.3.6.1.4.1.14519.5.2.1.7311.5101.472900684997509231929411950536','1.3.6.1.4.1.14519.5.2.1.7311.5101.666342438883185464672112795625','1.3.6.1.4.1.14519.5.2.1.7311.5101.785303327607349403635620326985','1.3.6.1.4.1.14519.5.2.1.7311.5101.813054628707353396925223754592','1.3.6.1.4.1.14519.5.2.1.7311.5101.905763996607212880296069425861','1.3.6.1.4.1.14519.5.2.1.7311.5101.928012760939767746081753156593']
# Step 0.3 - Make folders
DIR_RAW = Path(DIR_MAIN).joinpath(DIRNAME_RAW)
DIR_PROCESSED = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED)
DIR_PROCESSED2 = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED2)
DIR_TMP = Path(DIR_MAIN).joinpath(DIRNAME_TMP)
Path(DIR_RAW).mkdir(exist_ok=True, parents=True)
Path(DIR_PROCESSED).mkdir(exist_ok=True, parents=True)
Path(DIR_PROCESSED2).mkdir(exist_ok=True, parents=True)
Path(DIR_TMP).mkdir(exist_ok=True, parents=True)
# Step 1 - Loop over all seriesids_mask
if 1:
print ('')
with tqdm.tqdm(total=len(seriesids_mask), desc=' - [MASKS]') as pbar_mask:
for id_, seriesid_mask in enumerate(seriesids_mask):
try:
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mask)
if resp_series.status == 200:
obj_series = client.read_response(resp_series)
if len(obj_series):
patient_id = obj_series[0]['PatientID']
downloadPath = Path(DIR_RAW).joinpath(patient_id)
Path(downloadPath).mkdir(exist_ok=True, parents=True)
client.get_image(seriesInstanceUid=seriesid_mask, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MASK.format(patient_id, EXT_ZIP))
else:
print (' - [ERROR] No patient info for seriesid_mask: ', seriesid_mask)
else:
print (' - [ERROR] Could not read seriesid_mask: ', seriesid_mask)
except:
print (' - [ERROR] Some error with seriesid_mask', seriesid_mask)
traceback.print_exc()
pdb.set_trace()
pbar_mask.update(1)
# Step 2 - Loop over all seriesids_mri
if 1:
print ('')
with tqdm.tqdm(total=len(seriesids_mri), desc='\n - [MRI]') as pbar_mri:
for id_, seriesid_mri in enumerate(seriesids_mri):
try:
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mri)
if resp_series.status == 200:
obj_series = client.read_response(resp_series)
if len(obj_series):
patient_id = obj_series[0]['PatientID']
downloadPath = Path(DIR_RAW).joinpath(patient_id)
Path(downloadPath).mkdir(exist_ok=True, parents=True)
client.get_image(seriesInstanceUid=seriesid_mri, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MRI.format(patient_id, EXT_ZIP))
else:
print (' - [ERROR] No patient info for seriesid_mri: ', seriesid_mri)
else:
print (' - [ERROR] Could not read seriesid_mri: ', seriesid_mri)
except:
print (' - [ERROR] Some error with seriesid_mri', seriesid_mri)
traceback.print_exc()
pdb.set_trace()
pbar_mri.update(1)
# Step 3 - Convert to NIFTI using dcm2niix
if 1:
print ('')
path_dcm2niix = download_dcm2niix(DIR_TMP)
if path_dcm2niix is not None:
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [Convert to volume] ') as pbar_convert:
for path_patient in Path(DIR_RAW).iterdir():
if Path(path_patient).is_dir():
# Step 3.1 - Init vars
patient_id = Path(path_patient).parts[-1]
foldername_mri = FOLDERNAME_MRI.format(patient_id, '')
path_mri = Path(path_patient).joinpath(foldername_mri)
foldername_mask = FOLDERNAME_MASK.format(patient_id, '')
path_masks = list(Path(path_patient).joinpath(foldername_mask).glob('*')) # there should only be 1, but I am still looping over it
# Step 3.2 - Volumize MRI
if 1:
try:
launch_mri = [str(path_dcm2niix), "-e", "n", "-b", "n", "-v", "0" , "-o", str(path_patient), "-f", foldername_mri, str(path_mri)]
# e.g. .\dcm2niix.exe -e n -b n -v 0 -o . -f ProstateX-0323_mri .\ProstateX-0323_mri
_ = subprocess.run(launch_mri, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
# parameters for conversion.
# -e : export as NRRD (y) or MGH (o) instead of NIfTI (y/n/o, default n)
# -o : output directory (omit to save to input folder)
# -b : BIDS sidecar (y/n/o [o=only: no NIfTI], default y)
# -f : filename (%a=antenna (coil) name, %b=basename, %c=comments, %d=description, %e=echo number, %f=folder name,
except:
print (' - [ERROR][MRI] patient_id: ', patient_id)
traceback.print_exc()
pdb.set_trace()
# Step 3.3 - Volumize MASK
if 1:
try:
for path_dcm in path_masks:
# Step 3.3.1 - Read data
ds_mask = pydicom.dcmread(str(path_dcm))
# len(ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence) == len(vol_mri)
# e.g. --> ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence[0].ReferencedSOPInstanceUID
# len(ds_mask.PerFrameFunctionalGroupsSequence) == len(vol_mask)
vol_mask = ds_mask.pixel_array # [D,H,W]
nii_mri = nibabel.load(str(Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))))
vol_mri = nii_mri.get_fdata() # [H,W,D] e.g. [384,384,19]
if 1:
print ('\n - [convert to nifti][mask] pixelspacing: {} | slicespacing: {}'.format(
ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].PixelSpacing
, ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].SliceThickness))
print (' - [convert to nifti][vol] pixelspacing: {} | slicespacing: {}'.format(nii_mri.header['pixdim'][1:3], nii_mri.header['pixdim'][3] ))
# Step 3.3.2 - Downsample segmentation
if 0:
end = vol_mask.shape[0] # .shape=[D,H,W]
step = end // vol_mri.shape[2]
vol_mask = vol_mask[0:end:step, ...]
diff = vol_mask.shape[0] - vol_mri.shape[2]
if diff != 0:
vol_mask = vol_mask[:-diff, ...]
if vol_mri.shape != vol_mask.shape:
# order = 0 is nearest neighbour interpolation
vol_mask = skimage.transform.resize(vol_mask, vol_mri.shape, preserve_range=True, order=0)
else:
idxs = [idx for idx, item in enumerate(ds_mask.PerFrameFunctionalGroupsSequence) if len(ds_mask.PerFrameFunctionalGroupsSequence[idx].DerivationImageSequence)]
vol_mask = vol_mask[idxs, :, :]
vol_mask = np.rot90(vol_mask, k=-1, axes=(1, 2))
vol_mask = np.swapaxes(vol_mask, 0, 1)
vol_mask = np.swapaxes(vol_mask, 1, 2)
assert vol_mask.shape == vol_mri.shape
# saving segmentations
niiobj = nibabel.Nifti1Image(vol_mask, nii_mri.affine)
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
nibabel.save(niiobj, str(path_mask))
except:
print (' - [ERROR][MASK] patient_id: ', patient_id)
traceback.print_exc()
pdb.set_trace()
pbar_convert.update(1)
# Step 4 - Do voxel spacing analysis
if 0:
spacings = []
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_analysis:
for path_patient in Path(DIR_RAW).iterdir():
if Path(path_patient).is_dir():
patient_id = Path(path_patient).parts[-1]
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
nii_mri = nibabel.load(str(path_raw))
header_mri = nii_mri.header
spacing_mri = list(nii_mri.header['pixdim'][1:4])
origin_mri = [nii_mri.header['qoffset_x'], nii_mri.header['qoffset_y'], nii_mri.header['qoffset_z'] ]
# vol_mri = nii_mri.get_fdata()
spacings.append(spacing_mri)
pbar_analysis.update(1)
if len(spacings):
import matplotlib.pyplot as plt
spacings = np.array(spacings)
f,axarr = plt.subplots(1,3)
axarr[0].hist(spacings[:,0], bins=10); axarr[0].set_title('Voxel Spacing (X)')
axarr[1].hist(spacings[:,1], bins=10); axarr[1].set_title('Voxel Spacing (Y)')
axarr[2].hist(spacings[:,2], bins=10); axarr[2].set_title('Voxel Spacing (Z)')
plt.savefig('prostatex_spacings.png', bbox_inches='tight')
plt.show() # resmaple every image to [0.5, 0.5, 3] as per this image
# Step 5 - Do resampling to (0.5, 0.5, 3.0)
if 1:
SPACING_NEW = (0.5, 0.5, 3.0)
import medloader.dataloader.utils as medUtils
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_resample:
for path_patient in Path(DIR_RAW).iterdir():
if Path(path_patient).is_dir():
patient_id = Path(path_patient).parts[-1]
path_mask_raw = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_img_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
if 0:
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NRRD))
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NRRD))
nrrd_img = medUtils.resample_img(path_img=str(path_img_raw), path_new_img=str(path_img_proc), spacing=SPACING_NEW, verbose=True)
nrrd_mask = medUtils.resample_mask(path_mask=str(path_mask_raw), path_new_mask=str(path_mask_proc), spacing=SPACING_NEW, size=nrrd_img.GetSize(), labels_allowed = [1], verbose=True)
elif 1:
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII))
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII))
import math
import skimage.transform as skTrans
img_og = nibabel.load(path_img_raw)
ratio = [spacing_old / SPACING_NEW[i] for i, spacing_old in enumerate(img_og.header['pixdim'][1:4])]
size_new = tuple(math.ceil(size_old * ratio[i]) - 1 for i, size_old in enumerate(img_og.header['dim'][1:4]))
img_og.header['pixdim'][1:4] = SPACING_NEW
img_resampled = skTrans.resize(img_og.get_fdata(), size_new, order=3, preserve_range=True)
img_resampled = img_resampled.astype(np.int16)
niiobj = nibabel.Nifti1Image(img_resampled, img_og.affine, img_og.header)
Path(path_img_proc).parent.mkdir(exist_ok=True, parents=True)
nibabel.save(niiobj, str(path_img_proc))
mask_og = nibabel.load(path_mask_raw)
mask_og.header['pixdim'][1:4] = SPACING_NEW
mask_resampled = skTrans.resize(mask_og.get_fdata(), size_new, order=0, preserve_range=True) # 0 = Nearest Neighbour
mask_resampled = mask_resampled.astype(np.uint8)
print (' - [{}] np.unique(mask_resampled): {}'.format(patient_id, np.unique(mask_resampled)))
niiobj = nibabel.Nifti1Image(mask_resampled, mask_og.affine, mask_og.header)
Path(path_mask_proc).parent.mkdir(exist_ok=True, parents=True)
nibabel.save(niiobj, str(path_mask_proc))
pbar_resample.update(1)
# Step 6 - Do image cropping analysis
if 0:
"""
Notes
- ProstateX-0241 has 31 slices (25 slices=prostate)
- ProstateX-0198 has 18 slices (11 slices=prostate)
- Result: (200, 200, 28) --> [100,100, 28] --> [50,50,14] --> [25,25,7]
"""
prostate_slice_count = []
prostate_voxel_width = []
prostate_voxel_breadth = []
import matplotlib.pyplot as plt
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Cropping] ') as pbar_cropping:
for path_patient in Path(DIR_PROCESSED).iterdir():
if Path(path_patient).is_dir():
patient_id = Path(path_patient).parts[-1]
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_img_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
# img = nibabel.load(path_img_proc)
# img_array = img.get_fdata()
mask = nibabel.load(path_mask_proc)
mask_array = mask.get_fdata()
prostate_idxs = np.argwhere(mask_array > 0)
prostate_slice_count.append(np.max(prostate_idxs[:,2]) - np.min(prostate_idxs[:,2]))
prostate_voxel_width.append(np.max(prostate_idxs[:,1]) - np.min(prostate_idxs[:,1]))
prostate_voxel_breadth.append(np.max(prostate_idxs[:,0]) - np.min(prostate_idxs[:,0]))
print (' - [{}] mask: {} (height,width,breadth of prostate=[{},{},{}])'.format(patient_id, mask_array.shape, prostate_slice_count[-1], prostate_voxel_width[-1], prostate_voxel_breadth[-1]))
if len(prostate_slice_count):
f,axarr = plt.subplots(1,4)
axarr[0].hist(prostate_slice_count, bins=10); axarr[0].set_title('Height')
axarr[1].hist(prostate_voxel_width, bins=10); axarr[1].set_title('Width')
axarr[2].hist(prostate_voxel_breadth, bins=10); axarr[2].set_title('Breadth')
axarr[3].imshow(mask_array[:,:,prostate_slice_count[-1]//2])
plt.savefig('prostatex_prostatesize.png', bbox_inches='tight')
plt.show()
# Step 7 - Do cropping, padding and save in DIR_PROCESSED2
# Highly custom logic, please change values with caution
if 1:
VOXELS_X = 100
VOXELS_Y = 100
VOXELS_Z_MAX = 28
mri_maxvals = []
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Padding] ') as pbar_cropping:
for path_patient in Path(DIR_PROCESSED).iterdir():
if Path(path_patient).is_dir():
# Step 7.1 - Get paths
patient_id = Path(path_patient).parts[-1]
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_mri_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
# Step 7.1 - Get mask (crop and pad)
mri = nibabel.load(path_mri_proc)
mri_array = mri.get_fdata()
mri_maxval = np.max(mri_array)
mri_maxvals.append(mri_maxval)
if 0:
import matplotlib.pyplot as plt
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=100)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=100');plt.show(block=False)
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=1000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=1000');plt.show(block=False)
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=10000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=10000');plt.show(block=False)
pdb.set_trace()
mri_array = hist_scaled(mri_array, bins=10000)
mask = nibabel.load(path_mask_proc)
mask_array = mask.get_fdata()
mask_idxs = np.argwhere(mask_array > 0)
mask_midpt = np.array([np.mean(mask_idxs[:,0]), np.mean(mask_idxs[:,1]), np.mean(mask_idxs[:,2])]).astype(np.uint8).tolist()
# Step 7.2 - Crop
mask_array_new = np.array(mask_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True)
mri_array_new = np.array(mri_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True)
# if patient_id == 'ProstateX-0309':
# pdb.set_trace()
# Step 7.3 - Pad (in z)
mask_array_height = mask_array_new.shape[2]
mask_array_diff = np.abs(mask_array_height - VOXELS_Z_MAX)
pad_left = mask_array_diff//2
pad_right = mask_array_diff-mask_array_diff//2
if mask_array_height > VOXELS_Z_MAX:
mask_array_new = mask_array_new[:,:,pad_left:-pad_right]
mri_array_new = mri_array_new[:,:,pad_left:-pad_right]
else:
if mask_array_diff:
mask_array_new = np.pad(mask_array_new, (pad_left, pad_right), 'constant')
mask_array_new = mask_array_new[pad_left:-pad_right, pad_left:-pad_right, :]
mri_array_new = np.pad(mri_array_new, (pad_left, pad_right), 'constant')
mri_array_new = mri_array_new[pad_left:-pad_right, pad_left:-pad_right, :]
print (' - [crop/pad][{}] mask_array: {} || mask_array_new: {}'.format(patient_id, mask_array.shape, mask_array_new.shape))
print (' - [crop/pad][{}] mri_array : {} || mri_array_new : {} || maxval: {}'.format(patient_id, mri_array.shape, mri_array_new.shape, mri_maxval))
# Step 7.4 - Save to disk
path_mask_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII))
Path(path_mask_proc2).parent.mkdir(exist_ok=True, parents=True)
niiobj = nibabel.Nifti1Image(mask_array_new, mask.affine, mask.header)
nibabel.save(niiobj, str(path_mask_proc2))
path_mri_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII))
Path(path_mri_proc2).parent.mkdir(exist_ok=True, parents=True)
niiobj = nibabel.Nifti1Image(mri_array_new, mri.affine, mri.header)
nibabel.save(niiobj, str(path_mri_proc2))
if len(mri_maxvals):
import matplotlib.pyplot as plt
plt.hist(mri_maxvals)
plt.title('ProstateX: Max Intensity Vals \n Histogram showing the need for intensity transformations in MRI')
plt.show()
pdb.set_trace()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment