Last active
November 2, 2022 14:52
-
-
Save prerakmody/7987e8b62e7c5237747a840b19200692 to your computer and use it in GitHub Desktop.
TCIA scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Inspired by https://github.com/jzagoli/ProstateX2nii. Modified by Prerak Mody | |
# This processes this dataset --> https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6 | |
# Alternatively, one could also look here --> https://github.com/rcuocolo/PROSTATEx_masks/tree/master/Files/prostate for this dataset (https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=23691656) | |
import pdb | |
import tqdm # pip install tqdm | |
import urllib | |
import zipfile | |
import nibabel # pip install nibabel | |
import pydicom # pip install pydicom | |
import skimage | |
import traceback | |
import subprocess | |
import numpy as np | |
import urllib.request | |
import skimage.transform | |
from pathlib import Path | |
import matplotlib.pyplot as plt | |
DIR_MAIN = Path(__file__).parent.absolute() | |
DIRNAME_RAW = 'raw' | |
DIRNAME_PROCESSED = 'processed' | |
DIRNAME_PROCESSED2 = 'processed2' | |
DIRNAME_TMP = '_tmp' | |
EXT_ZIP = '.zip' | |
EXT_NII = '.nii' | |
EXT_NRRD = '.nrrd' | |
FOLDERNAME_MRI = '{}_mri{}' | |
FOLDERNAME_MASK = '{}_mask{}' | |
def read_zip(filepath_zip, filepath_output=None, leave=False): | |
# Step 0 - Init | |
if Path(filepath_zip).exists(): | |
if filepath_output is None: | |
filepath_zip_parts = list(Path(filepath_zip).parts) | |
filepath_zip_name = filepath_zip_parts[-1].split(EXT_ZIP)[0] | |
filepath_zip_parts[-1] = filepath_zip_name | |
filepath_output = Path(*filepath_zip_parts) | |
zip_fp = zipfile.ZipFile(filepath_zip, 'r') | |
zip_fp_members = zip_fp.namelist() | |
with tqdm.tqdm(total=len(zip_fp_members), desc=' - [Unzip] ' + str(filepath_zip.parts[-1]), leave=leave) as pbar_zip: | |
for member in zip_fp_members: | |
zip_fp.extract(member, filepath_output) | |
pbar_zip.update(1) | |
return filepath_output | |
else: | |
print (' - [ERROR][utils.read_zip()] Path does not exist: ', filepath_zip) | |
return None | |
class TCIAClient: | |
""" | |
- Ref: https://wiki.cancerimagingarchive.net/display/Public/TCIA+Programmatic+Interface+%28REST+API%29+Usage+Guide | |
- Ref: https://github.com/TCIA-Community/TCIA-API-SDK/tree/master/tcia-rest-client-python/src | |
""" | |
GET_IMAGE = "getImage" | |
GET_MANUFACTURER_VALUES = "getManufacturerValues" | |
GET_MODALITY_VALUES = "getModalityValues" | |
GET_COLLECTION_VALUES = "getCollectionValues" | |
GET_BODY_PART_VALUES = "getBodyPartValues" | |
GET_PATIENT_STUDY = "getPatientStudy" | |
GET_SERIES = "getSeries" | |
GET_PATIENT = "getPatient" | |
GET_SERIES_SIZE = "getSeriesSize" | |
CONTENTS_BY_NAME = "ContentsByName" | |
def __init__(self, baseUrl, resource): | |
self.baseUrl = baseUrl + "/" + resource | |
self.STATUS_OK = 200 | |
self.DECODER = 'utf-8' | |
def execute(self, url, queryParameters={}, verbose=False): | |
queryParameters = dict((k, v) for k, v in queryParameters.items() if v) | |
queryString = "?%s" % urllib.parse.urlencode(queryParameters) | |
requestUrl = url + queryString | |
if verbose: | |
print (' - [execute()] URL: ', requestUrl) | |
request = urllib.request.Request(url=requestUrl, headers={}) | |
resp = urllib.request.urlopen(request) | |
return resp | |
def read_response(self, resp): | |
if resp.status == self.STATUS_OK: | |
return eval(resp.read().decode(self.DECODER)) | |
else: | |
return None | |
def get_patient(self,collection = None , outputFormat = "json" ): | |
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT | |
queryParameters = {"Collection" : collection , "format" : outputFormat } | |
resp = self.execute(serviceUrl , queryParameters) | |
return resp | |
def get_patient_study(self,collection = None , patientId = None , studyInstanceUid = None , outputFormat = "json" ): | |
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT_STUDY | |
queryParameters = {"Collection" : collection , "PatientID" : patientId , "StudyInstanceUID" : studyInstanceUid , "format" : outputFormat } | |
resp = self.execute(serviceUrl , queryParameters) | |
return resp | |
def get_series(self, collection=None, patientId=None, modality=None, studyInstanceUid=None, seriesInstanceUid=None, outputFormat = "json" ): | |
serviceUrl = self.baseUrl + "/query/" + self.GET_SERIES | |
queryParameters = {"Collection" : collection, "patientId": patientId, "StudyInstanceUID" : studyInstanceUid, 'SeriesInstanceUID': seriesInstanceUid,"Modality" : modality, "format" : outputFormat } | |
resp = self.execute(serviceUrl , queryParameters) | |
return resp | |
def get_image(self , seriesInstanceUid , downloadPath, zipFileName): | |
try: | |
serviceUrl = self.baseUrl + "/query/" + self.GET_IMAGE | |
queryParameters = { "SeriesInstanceUID" : seriesInstanceUid } | |
resp = self.execute(serviceUrl, queryParameters) | |
filepath = Path(downloadPath).joinpath(zipFileName) | |
data = resp.read() | |
with open(filepath, 'wb') as fp: | |
fp.write(data) | |
tmp = list(Path(filepath).parts) | |
tmp[-1] = tmp[-1].split('.zip')[0] | |
filepath_output = Path(*tmp) | |
read_zip(filepath, filepath_output, leave=False) | |
Path(filepath).unlink() | |
except: | |
traceback.print_exc() | |
def download_dcm2niix(DIR_TMP): | |
# Ref: https://github.com/rordenlab/dcm2niix/releases/tag/v1.0.20220720 | |
try: | |
import sys | |
path_file = Path(DIR_TMP).joinpath('dcm2niix') | |
if sys.platform == 'win32': | |
if not Path(path_file).exists(): | |
path_zip = Path(DIR_TMP).joinpath('dcm2niix_win.zip') | |
urllib.request.urlretrieve(url='https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20220720/dcm2niix_win.zip', filename=str(path_zip)) | |
read_zip(path_zip, path_file) | |
Path(path_zip).unlink() | |
elif sys.platform == 'linux': | |
pass | |
return Path(path_file).joinpath('dcm2niix.exe') | |
except: | |
traceback.print_exc() | |
pdb.set_trace() | |
return None | |
def freqhist_bins(array, bins=100): | |
""" | |
A numpy based function to split the range of pixel values into groups, such that each group has around the same number of pixels | |
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb | |
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py | |
""" | |
imsd = np.sort(array.flatten()) | |
t = np.array([0.001]) | |
t = np.append(t, np.arange(bins)/bins+(1/2/bins)) | |
t = np.append(t, 0.999) | |
t = (len(imsd)*t+0.5).astype(np.int) | |
return np.unique(imsd[t]) | |
def hist_scaled(array, brks=None, bins=100): | |
""" | |
Scales a tensor using `freqhist_bins` to values between 0 and 1 | |
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb | |
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py | |
""" | |
if brks is None: | |
brks = freqhist_bins(array, bins=bins) | |
ys = np.linspace(0., 1., len(brks)) | |
x = array.flatten() | |
x = np.interp(x, brks, ys) | |
if 0: | |
plt.plot(brks, ys, color='blue', label='Non-Normalized', marker='x', markersize=2) | |
plt.xlim([0,np.max(array)]) | |
brks2 = freqhist_bins(x, bins=bins)*np.max(array) | |
ys2 = np.linspace(0., 1., len(brks2)) | |
plt.plot(brks2, ys2, color='orange', label='Histogram Equalization') | |
plt.legend() | |
plt.show() | |
array_tmp = np.reshape(x, array.shape) | |
array_tmp = np.clip(array_tmp, 0.,1.) | |
return array_tmp | |
if __name__ == "__main__": | |
# Step 0.1 - Init TCIA client | |
baseUrl = 'https://services.cancerimagingarchive.net/services/v4' | |
resource = 'TCIA' | |
collection = 'PROSTATEx' | |
client = TCIAClient(baseUrl=baseUrl, resource=resource) | |
# Step 0.2 - Init seriesids from https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6 | |
seriesids_mask = ['1.2.276.0.7230010.3.1.3.1070885483.26120.1599221759.302','1.2.276.0.7230010.3.1.3.1070885483.24432.1599221753.549','1.2.276.0.7230010.3.1.3.1070885483.5008.1599221756.491','1.2.276.0.7230010.3.1.3.1070885483.27140.1599221767.245','1.2.276.0.7230010.3.1.3.1070885483.27624.1599221764.667','1.2.276.0.7230010.3.1.3.1070885483.16240.1599221761.962','1.2.276.0.7230010.3.1.3.1070885483.8220.1599221770.50','1.2.276.0.7230010.3.1.3.1070885483.9460.1599221773.50','1.2.276.0.7230010.3.1.3.1070885483.24532.1599221775.720','1.2.276.0.7230010.3.1.3.1070885483.24608.1599221781.105','1.2.276.0.7230010.3.1.3.1070885483.24736.1599221778.322','1.2.276.0.7230010.3.1.3.1070885483.22584.1599221783.667','1.2.276.0.7230010.3.1.3.1070885483.24012.1599221786.269','1.2.276.0.7230010.3.1.3.1070885483.21792.1599221788.891','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221791.439','1.2.276.0.7230010.3.1.3.1070885483.5672.1599221794.52','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221799.294','1.2.276.0.7230010.3.1.3.1070885483.16324.1599221796.636','1.2.276.0.7230010.3.1.3.1070885483.26880.1599221801.878','1.2.276.0.7230010.3.1.3.1070885483.26756.1599221804.455','1.2.276.0.7230010.3.1.3.1070885483.21116.1599221807.124','1.2.276.0.7230010.3.1.3.1070885483.23772.1599221809.995','1.2.276.0.7230010.3.1.3.1070885483.23924.1599221812.620','1.2.276.0.7230010.3.1.3.1070885483.24056.1599221815.833','1.2.276.0.7230010.3.1.3.1070885483.23100.1599221818.605','1.2.276.0.7230010.3.1.3.1070885483.4648.1599221821.338','1.2.276.0.7230010.3.1.3.1070885483.5584.1599221824.60','1.2.276.0.7230010.3.1.3.1070885483.17604.1599221827.76','1.2.276.0.7230010.3.1.3.1070885483.23788.1599221829.820','1.2.276.0.7230010.3.1.3.1070885483.26076.1599221832.601','1.2.276.0.7230010.3.1.3.1070885483.22052.1599221835.360','1.2.276.0.7230010.3.1.3.1070885483.26444.1599221837.990','1.2.276.0.7230010.3.1.3.1070885483.27528.1599221841.604','1.2.276.0.7230010.3.1.3.1070885483.3400.1599221844.204','1.2.276.0.7230010.3.1.3.1070885483.15416.1599221846.853','1.2.276.0.7230010.3.1.3.1070885483.22008.1599221849.372','1.2.276.0.7230010.3.1.3.1070885483.16016.1599221851.990','1.2.276.0.7230010.3.1.3.1070885483.8088.1599221854.809','1.2.276.0.7230010.3.1.3.1070885483.5396.1599221858.335','1.2.276.0.7230010.3.1.3.1070885483.25220.1599221863.968','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221860.888','1.2.276.0.7230010.3.1.3.1070885483.7992.1599221866.575','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221869.130','1.2.276.0.7230010.3.1.3.1070885483.24588.1599221871.775','1.2.276.0.7230010.3.1.3.1070885483.24928.1599221874.342','1.2.276.0.7230010.3.1.3.1070885483.14224.1599221879.641','1.2.276.0.7230010.3.1.3.1070885483.25300.1599221882.469','1.2.276.0.7230010.3.1.3.1070885483.21512.1599221877.6','1.2.276.0.7230010.3.1.3.1070885483.5808.1599221885.265','1.2.276.0.7230010.3.1.3.1070885483.22380.1599221887.327','1.2.276.0.7230010.3.1.3.1070885483.23580.1599221890.67','1.2.276.0.7230010.3.1.3.1070885483.15904.1599221892.237','1.2.276.0.7230010.3.1.3.1070885483.4220.1599221894.379','1.2.276.0.7230010.3.1.3.1070885483.25552.1599221897.694','1.2.276.0.7230010.3.1.3.1070885483.26088.1599221900.380','1.2.276.0.7230010.3.1.3.1070885483.5296.1599221903.189','1.2.276.0.7230010.3.1.3.1070885483.6284.1599221908.467','1.2.276.0.7230010.3.1.3.1070885483.17416.1599221911.141','1.2.276.0.7230010.3.1.3.1070885483.1780.1599221905.866','1.2.276.0.7230010.3.1.3.1070885483.24236.1599221913.873','1.2.276.0.7230010.3.1.3.1070885483.24632.1599221916.651','1.2.276.0.7230010.3.1.3.1070885483.25044.1599221924.876','1.2.276.0.7230010.3.1.3.1070885483.8668.1599221919.430','1.2.276.0.7230010.3.1.3.1070885483.24644.1599221922.113','1.2.276.0.7230010.3.1.3.1070885483.27560.1599221927.547','1.2.276.0.7230010.3.1.3.1070885483.25100.1599221930.643'] | |
seriesids_mri = ['1.3.6.1.4.1.14519.5.2.1.7310.5101.107276353018221365492863559094','1.3.6.1.4.1.14519.5.2.1.7310.5101.127076787998581344993055912043','1.3.6.1.4.1.14519.5.2.1.7310.5101.128915395083536972793214199559','1.3.6.1.4.1.14519.5.2.1.7310.5101.156355474235536930096765953749','1.3.6.1.4.1.14519.5.2.1.7310.5101.182666056353308077379356745296','1.3.6.1.4.1.14519.5.2.1.7310.5101.217705819956691748023253683197','1.3.6.1.4.1.14519.5.2.1.7310.5101.233894782007809884236855995183','1.3.6.1.4.1.14519.5.2.1.7310.5101.234424615181029312892423746307','1.3.6.1.4.1.14519.5.2.1.7310.5101.259467270859277582122403078829','1.3.6.1.4.1.14519.5.2.1.7310.5101.260386623049829078909727548239','1.3.6.1.4.1.14519.5.2.1.7310.5101.306748150836286513093400468929','1.3.6.1.4.1.14519.5.2.1.7310.5101.318856235542799349557600990165','1.3.6.1.4.1.14519.5.2.1.7310.5101.337896560664468718350275750005','1.3.6.1.4.1.14519.5.2.1.7310.5101.568951414433420775694157731047','1.3.6.1.4.1.14519.5.2.1.7310.5101.877607202381357458819664904037','1.3.6.1.4.1.14519.5.2.1.7310.5101.898676446450078900613536879876','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130890931274399577478152963','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130934168942593154270621032','1.3.6.1.4.1.14519.5.2.1.7311.5101.105766764724449379107432362750','1.3.6.1.4.1.14519.5.2.1.7311.5101.109832262779867794170417053903','1.3.6.1.4.1.14519.5.2.1.7311.5101.119868581724901379856626647643','1.3.6.1.4.1.14519.5.2.1.7311.5101.124933244802704100684011471210','1.3.6.1.4.1.14519.5.2.1.7311.5101.126808882287123482193592987944','1.3.6.1.4.1.14519.5.2.1.7311.5101.146422450520894659910416624671','1.3.6.1.4.1.14519.5.2.1.7311.5101.149728379277305470281540696443','1.3.6.1.4.1.14519.5.2.1.7311.5101.155875154686610653777230856177','1.3.6.1.4.1.14519.5.2.1.7311.5101.156910737340231640626298466189','1.3.6.1.4.1.14519.5.2.1.7311.5101.160225377533762960695041069832','1.3.6.1.4.1.14519.5.2.1.7311.5101.165636727522505811947315188478','1.3.6.1.4.1.14519.5.2.1.7311.5101.166265209513832781969059787909','1.3.6.1.4.1.14519.5.2.1.7311.5101.175570448971296597162147241386','1.3.6.1.4.1.14519.5.2.1.7311.5101.178750617003896154912438088527','1.3.6.1.4.1.14519.5.2.1.7311.5101.180041601751316950966041961765','1.3.6.1.4.1.14519.5.2.1.7311.5101.180650601474055581355948988643','1.3.6.1.4.1.14519.5.2.1.7311.5101.186553403053805718209125341361','1.3.6.1.4.1.14519.5.2.1.7311.5101.196511587017551386049158542619','1.3.6.1.4.1.14519.5.2.1.7311.5101.206828891270520544417996275680','1.3.6.1.4.1.14519.5.2.1.7311.5101.226939533786316417608293472819','1.3.6.1.4.1.14519.5.2.1.7311.5101.236967836312224538274213979128','1.3.6.1.4.1.14519.5.2.1.7311.5101.242424835264181527414562151046','1.3.6.1.4.1.14519.5.2.1.7311.5101.245315537897059083725251245833','1.3.6.1.4.1.14519.5.2.1.7311.5101.248203933751895346486742820088','1.3.6.1.4.1.14519.5.2.1.7311.5101.252185783543855817242399041825','1.3.6.1.4.1.14519.5.2.1.7311.5101.252385246988168654915352850239','1.3.6.1.4.1.14519.5.2.1.7311.5101.257928599770116866513356683535','1.3.6.1.4.1.14519.5.2.1.7311.5101.258067469374761412596368241889','1.3.6.1.4.1.14519.5.2.1.7311.5101.262719318163046156031519223454','1.3.6.1.4.1.14519.5.2.1.7311.5101.266720221212035669954581062639','1.3.6.1.4.1.14519.5.2.1.7311.5101.271991298059338527584681788043','1.3.6.1.4.1.14519.5.2.1.7311.5101.282008038881694967871335639325','1.3.6.1.4.1.14519.5.2.1.7311.5101.287403883614425048490255475041','1.3.6.1.4.1.14519.5.2.1.7311.5101.289711251504454079274667690291','1.3.6.1.4.1.14519.5.2.1.7311.5101.304058605325520782900293953679','1.3.6.1.4.1.14519.5.2.1.7311.5101.312442050391027773578890036831','1.3.6.1.4.1.14519.5.2.1.7311.5101.318660391556220519803669869815','1.3.6.1.4.1.14519.5.2.1.7311.5101.322192860849906453257530108507','1.3.6.1.4.1.14519.5.2.1.7311.5101.333332554059735467244825907777','1.3.6.1.4.1.14519.5.2.1.7311.5101.334326985258868181144176796864','1.3.6.1.4.1.14519.5.2.1.7311.5101.357054868848226402548612686613','1.3.6.1.4.1.14519.5.2.1.7311.5101.460453169764361555745878708935','1.3.6.1.4.1.14519.5.2.1.7311.5101.472900684997509231929411950536','1.3.6.1.4.1.14519.5.2.1.7311.5101.666342438883185464672112795625','1.3.6.1.4.1.14519.5.2.1.7311.5101.785303327607349403635620326985','1.3.6.1.4.1.14519.5.2.1.7311.5101.813054628707353396925223754592','1.3.6.1.4.1.14519.5.2.1.7311.5101.905763996607212880296069425861','1.3.6.1.4.1.14519.5.2.1.7311.5101.928012760939767746081753156593'] | |
# Step 0.3 - Make folders | |
DIR_RAW = Path(DIR_MAIN).joinpath(DIRNAME_RAW) | |
DIR_PROCESSED = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED) | |
DIR_PROCESSED2 = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED2) | |
DIR_TMP = Path(DIR_MAIN).joinpath(DIRNAME_TMP) | |
Path(DIR_RAW).mkdir(exist_ok=True, parents=True) | |
Path(DIR_PROCESSED).mkdir(exist_ok=True, parents=True) | |
Path(DIR_PROCESSED2).mkdir(exist_ok=True, parents=True) | |
Path(DIR_TMP).mkdir(exist_ok=True, parents=True) | |
# Step 1 - Loop over all seriesids_mask | |
if 1: | |
print ('') | |
with tqdm.tqdm(total=len(seriesids_mask), desc=' - [MASKS]') as pbar_mask: | |
for id_, seriesid_mask in enumerate(seriesids_mask): | |
try: | |
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mask) | |
if resp_series.status == 200: | |
obj_series = client.read_response(resp_series) | |
if len(obj_series): | |
patient_id = obj_series[0]['PatientID'] | |
downloadPath = Path(DIR_RAW).joinpath(patient_id) | |
Path(downloadPath).mkdir(exist_ok=True, parents=True) | |
client.get_image(seriesInstanceUid=seriesid_mask, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MASK.format(patient_id, EXT_ZIP)) | |
else: | |
print (' - [ERROR] No patient info for seriesid_mask: ', seriesid_mask) | |
else: | |
print (' - [ERROR] Could not read seriesid_mask: ', seriesid_mask) | |
except: | |
print (' - [ERROR] Some error with seriesid_mask', seriesid_mask) | |
traceback.print_exc() | |
pdb.set_trace() | |
pbar_mask.update(1) | |
# Step 2 - Loop over all seriesids_mri | |
if 1: | |
print ('') | |
with tqdm.tqdm(total=len(seriesids_mri), desc='\n - [MRI]') as pbar_mri: | |
for id_, seriesid_mri in enumerate(seriesids_mri): | |
try: | |
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mri) | |
if resp_series.status == 200: | |
obj_series = client.read_response(resp_series) | |
if len(obj_series): | |
patient_id = obj_series[0]['PatientID'] | |
downloadPath = Path(DIR_RAW).joinpath(patient_id) | |
Path(downloadPath).mkdir(exist_ok=True, parents=True) | |
client.get_image(seriesInstanceUid=seriesid_mri, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MRI.format(patient_id, EXT_ZIP)) | |
else: | |
print (' - [ERROR] No patient info for seriesid_mri: ', seriesid_mri) | |
else: | |
print (' - [ERROR] Could not read seriesid_mri: ', seriesid_mri) | |
except: | |
print (' - [ERROR] Some error with seriesid_mri', seriesid_mri) | |
traceback.print_exc() | |
pdb.set_trace() | |
pbar_mri.update(1) | |
# Step 3 - Convert to NIFTI using dcm2niix | |
if 1: | |
print ('') | |
path_dcm2niix = download_dcm2niix(DIR_TMP) | |
if path_dcm2niix is not None: | |
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [Convert to volume] ') as pbar_convert: | |
for path_patient in Path(DIR_RAW).iterdir(): | |
if Path(path_patient).is_dir(): | |
# Step 3.1 - Init vars | |
patient_id = Path(path_patient).parts[-1] | |
foldername_mri = FOLDERNAME_MRI.format(patient_id, '') | |
path_mri = Path(path_patient).joinpath(foldername_mri) | |
foldername_mask = FOLDERNAME_MASK.format(patient_id, '') | |
path_masks = list(Path(path_patient).joinpath(foldername_mask).glob('*')) # there should only be 1, but I am still looping over it | |
# Step 3.2 - Volumize MRI | |
if 1: | |
try: | |
launch_mri = [str(path_dcm2niix), "-e", "n", "-b", "n", "-v", "0" , "-o", str(path_patient), "-f", foldername_mri, str(path_mri)] | |
# e.g. .\dcm2niix.exe -e n -b n -v 0 -o . -f ProstateX-0323_mri .\ProstateX-0323_mri | |
_ = subprocess.run(launch_mri, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) | |
# parameters for conversion. | |
# -e : export as NRRD (y) or MGH (o) instead of NIfTI (y/n/o, default n) | |
# -o : output directory (omit to save to input folder) | |
# -b : BIDS sidecar (y/n/o [o=only: no NIfTI], default y) | |
# -f : filename (%a=antenna (coil) name, %b=basename, %c=comments, %d=description, %e=echo number, %f=folder name, | |
except: | |
print (' - [ERROR][MRI] patient_id: ', patient_id) | |
traceback.print_exc() | |
pdb.set_trace() | |
# Step 3.3 - Volumize MASK | |
if 1: | |
try: | |
for path_dcm in path_masks: | |
# Step 3.3.1 - Read data | |
ds_mask = pydicom.dcmread(str(path_dcm)) | |
# len(ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence) == len(vol_mri) | |
# e.g. --> ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence[0].ReferencedSOPInstanceUID | |
# len(ds_mask.PerFrameFunctionalGroupsSequence) == len(vol_mask) | |
vol_mask = ds_mask.pixel_array # [D,H,W] | |
nii_mri = nibabel.load(str(Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)))) | |
vol_mri = nii_mri.get_fdata() # [H,W,D] e.g. [384,384,19] | |
if 1: | |
print ('\n - [convert to nifti][mask] pixelspacing: {} | slicespacing: {}'.format( | |
ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].PixelSpacing | |
, ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].SliceThickness)) | |
print (' - [convert to nifti][vol] pixelspacing: {} | slicespacing: {}'.format(nii_mri.header['pixdim'][1:3], nii_mri.header['pixdim'][3] )) | |
# Step 3.3.2 - Downsample segmentation | |
if 0: | |
end = vol_mask.shape[0] # .shape=[D,H,W] | |
step = end // vol_mri.shape[2] | |
vol_mask = vol_mask[0:end:step, ...] | |
diff = vol_mask.shape[0] - vol_mri.shape[2] | |
if diff != 0: | |
vol_mask = vol_mask[:-diff, ...] | |
if vol_mri.shape != vol_mask.shape: | |
# order = 0 is nearest neighbour interpolation | |
vol_mask = skimage.transform.resize(vol_mask, vol_mri.shape, preserve_range=True, order=0) | |
else: | |
idxs = [idx for idx, item in enumerate(ds_mask.PerFrameFunctionalGroupsSequence) if len(ds_mask.PerFrameFunctionalGroupsSequence[idx].DerivationImageSequence)] | |
vol_mask = vol_mask[idxs, :, :] | |
vol_mask = np.rot90(vol_mask, k=-1, axes=(1, 2)) | |
vol_mask = np.swapaxes(vol_mask, 0, 1) | |
vol_mask = np.swapaxes(vol_mask, 1, 2) | |
assert vol_mask.shape == vol_mri.shape | |
# saving segmentations | |
niiobj = nibabel.Nifti1Image(vol_mask, nii_mri.affine) | |
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) | |
nibabel.save(niiobj, str(path_mask)) | |
except: | |
print (' - [ERROR][MASK] patient_id: ', patient_id) | |
traceback.print_exc() | |
pdb.set_trace() | |
pbar_convert.update(1) | |
# Step 4 - Do voxel spacing analysis | |
if 0: | |
spacings = [] | |
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_analysis: | |
for path_patient in Path(DIR_RAW).iterdir(): | |
if Path(path_patient).is_dir(): | |
patient_id = Path(path_patient).parts[-1] | |
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) | |
path_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) | |
nii_mri = nibabel.load(str(path_raw)) | |
header_mri = nii_mri.header | |
spacing_mri = list(nii_mri.header['pixdim'][1:4]) | |
origin_mri = [nii_mri.header['qoffset_x'], nii_mri.header['qoffset_y'], nii_mri.header['qoffset_z'] ] | |
# vol_mri = nii_mri.get_fdata() | |
spacings.append(spacing_mri) | |
pbar_analysis.update(1) | |
if len(spacings): | |
import matplotlib.pyplot as plt | |
spacings = np.array(spacings) | |
f,axarr = plt.subplots(1,3) | |
axarr[0].hist(spacings[:,0], bins=10); axarr[0].set_title('Voxel Spacing (X)') | |
axarr[1].hist(spacings[:,1], bins=10); axarr[1].set_title('Voxel Spacing (Y)') | |
axarr[2].hist(spacings[:,2], bins=10); axarr[2].set_title('Voxel Spacing (Z)') | |
plt.savefig('prostatex_spacings.png', bbox_inches='tight') | |
plt.show() # resmaple every image to [0.5, 0.5, 3] as per this image | |
# Step 5 - Do resampling to (0.5, 0.5, 3.0) | |
if 1: | |
SPACING_NEW = (0.5, 0.5, 3.0) | |
import medloader.dataloader.utils as medUtils | |
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_resample: | |
for path_patient in Path(DIR_RAW).iterdir(): | |
if Path(path_patient).is_dir(): | |
patient_id = Path(path_patient).parts[-1] | |
path_mask_raw = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) | |
path_img_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) | |
if 0: | |
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NRRD)) | |
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NRRD)) | |
nrrd_img = medUtils.resample_img(path_img=str(path_img_raw), path_new_img=str(path_img_proc), spacing=SPACING_NEW, verbose=True) | |
nrrd_mask = medUtils.resample_mask(path_mask=str(path_mask_raw), path_new_mask=str(path_mask_proc), spacing=SPACING_NEW, size=nrrd_img.GetSize(), labels_allowed = [1], verbose=True) | |
elif 1: | |
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII)) | |
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII)) | |
import math | |
import skimage.transform as skTrans | |
img_og = nibabel.load(path_img_raw) | |
ratio = [spacing_old / SPACING_NEW[i] for i, spacing_old in enumerate(img_og.header['pixdim'][1:4])] | |
size_new = tuple(math.ceil(size_old * ratio[i]) - 1 for i, size_old in enumerate(img_og.header['dim'][1:4])) | |
img_og.header['pixdim'][1:4] = SPACING_NEW | |
img_resampled = skTrans.resize(img_og.get_fdata(), size_new, order=3, preserve_range=True) | |
img_resampled = img_resampled.astype(np.int16) | |
niiobj = nibabel.Nifti1Image(img_resampled, img_og.affine, img_og.header) | |
Path(path_img_proc).parent.mkdir(exist_ok=True, parents=True) | |
nibabel.save(niiobj, str(path_img_proc)) | |
mask_og = nibabel.load(path_mask_raw) | |
mask_og.header['pixdim'][1:4] = SPACING_NEW | |
mask_resampled = skTrans.resize(mask_og.get_fdata(), size_new, order=0, preserve_range=True) # 0 = Nearest Neighbour | |
mask_resampled = mask_resampled.astype(np.uint8) | |
print (' - [{}] np.unique(mask_resampled): {}'.format(patient_id, np.unique(mask_resampled))) | |
niiobj = nibabel.Nifti1Image(mask_resampled, mask_og.affine, mask_og.header) | |
Path(path_mask_proc).parent.mkdir(exist_ok=True, parents=True) | |
nibabel.save(niiobj, str(path_mask_proc)) | |
pbar_resample.update(1) | |
# Step 6 - Do image cropping analysis | |
if 0: | |
""" | |
Notes | |
- ProstateX-0241 has 31 slices (25 slices=prostate) | |
- ProstateX-0198 has 18 slices (11 slices=prostate) | |
- Result: (200, 200, 28) --> [100,100, 28] --> [50,50,14] --> [25,25,7] | |
""" | |
prostate_slice_count = [] | |
prostate_voxel_width = [] | |
prostate_voxel_breadth = [] | |
import matplotlib.pyplot as plt | |
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Cropping] ') as pbar_cropping: | |
for path_patient in Path(DIR_PROCESSED).iterdir(): | |
if Path(path_patient).is_dir(): | |
patient_id = Path(path_patient).parts[-1] | |
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) | |
path_img_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) | |
# img = nibabel.load(path_img_proc) | |
# img_array = img.get_fdata() | |
mask = nibabel.load(path_mask_proc) | |
mask_array = mask.get_fdata() | |
prostate_idxs = np.argwhere(mask_array > 0) | |
prostate_slice_count.append(np.max(prostate_idxs[:,2]) - np.min(prostate_idxs[:,2])) | |
prostate_voxel_width.append(np.max(prostate_idxs[:,1]) - np.min(prostate_idxs[:,1])) | |
prostate_voxel_breadth.append(np.max(prostate_idxs[:,0]) - np.min(prostate_idxs[:,0])) | |
print (' - [{}] mask: {} (height,width,breadth of prostate=[{},{},{}])'.format(patient_id, mask_array.shape, prostate_slice_count[-1], prostate_voxel_width[-1], prostate_voxel_breadth[-1])) | |
if len(prostate_slice_count): | |
f,axarr = plt.subplots(1,4) | |
axarr[0].hist(prostate_slice_count, bins=10); axarr[0].set_title('Height') | |
axarr[1].hist(prostate_voxel_width, bins=10); axarr[1].set_title('Width') | |
axarr[2].hist(prostate_voxel_breadth, bins=10); axarr[2].set_title('Breadth') | |
axarr[3].imshow(mask_array[:,:,prostate_slice_count[-1]//2]) | |
plt.savefig('prostatex_prostatesize.png', bbox_inches='tight') | |
plt.show() | |
# Step 7 - Do cropping, padding and save in DIR_PROCESSED2 | |
# Highly custom logic, please change values with caution | |
if 1: | |
VOXELS_X = 100 | |
VOXELS_Y = 100 | |
VOXELS_Z_MAX = 28 | |
mri_maxvals = [] | |
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Padding] ') as pbar_cropping: | |
for path_patient in Path(DIR_PROCESSED).iterdir(): | |
if Path(path_patient).is_dir(): | |
# Step 7.1 - Get paths | |
patient_id = Path(path_patient).parts[-1] | |
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) | |
path_mri_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) | |
# Step 7.1 - Get mask (crop and pad) | |
mri = nibabel.load(path_mri_proc) | |
mri_array = mri.get_fdata() | |
mri_maxval = np.max(mri_array) | |
mri_maxvals.append(mri_maxval) | |
if 0: | |
import matplotlib.pyplot as plt | |
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=100)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=100');plt.show(block=False) | |
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=1000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=1000');plt.show(block=False) | |
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=10000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=10000');plt.show(block=False) | |
pdb.set_trace() | |
mri_array = hist_scaled(mri_array, bins=10000) | |
mask = nibabel.load(path_mask_proc) | |
mask_array = mask.get_fdata() | |
mask_idxs = np.argwhere(mask_array > 0) | |
mask_midpt = np.array([np.mean(mask_idxs[:,0]), np.mean(mask_idxs[:,1]), np.mean(mask_idxs[:,2])]).astype(np.uint8).tolist() | |
# Step 7.2 - Crop | |
mask_array_new = np.array(mask_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True) | |
mri_array_new = np.array(mri_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True) | |
# if patient_id == 'ProstateX-0309': | |
# pdb.set_trace() | |
# Step 7.3 - Pad (in z) | |
mask_array_height = mask_array_new.shape[2] | |
mask_array_diff = np.abs(mask_array_height - VOXELS_Z_MAX) | |
pad_left = mask_array_diff//2 | |
pad_right = mask_array_diff-mask_array_diff//2 | |
if mask_array_height > VOXELS_Z_MAX: | |
mask_array_new = mask_array_new[:,:,pad_left:-pad_right] | |
mri_array_new = mri_array_new[:,:,pad_left:-pad_right] | |
else: | |
if mask_array_diff: | |
mask_array_new = np.pad(mask_array_new, (pad_left, pad_right), 'constant') | |
mask_array_new = mask_array_new[pad_left:-pad_right, pad_left:-pad_right, :] | |
mri_array_new = np.pad(mri_array_new, (pad_left, pad_right), 'constant') | |
mri_array_new = mri_array_new[pad_left:-pad_right, pad_left:-pad_right, :] | |
print (' - [crop/pad][{}] mask_array: {} || mask_array_new: {}'.format(patient_id, mask_array.shape, mask_array_new.shape)) | |
print (' - [crop/pad][{}] mri_array : {} || mri_array_new : {} || maxval: {}'.format(patient_id, mri_array.shape, mri_array_new.shape, mri_maxval)) | |
# Step 7.4 - Save to disk | |
path_mask_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII)) | |
Path(path_mask_proc2).parent.mkdir(exist_ok=True, parents=True) | |
niiobj = nibabel.Nifti1Image(mask_array_new, mask.affine, mask.header) | |
nibabel.save(niiobj, str(path_mask_proc2)) | |
path_mri_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII)) | |
Path(path_mri_proc2).parent.mkdir(exist_ok=True, parents=True) | |
niiobj = nibabel.Nifti1Image(mri_array_new, mri.affine, mri.header) | |
nibabel.save(niiobj, str(path_mri_proc2)) | |
if len(mri_maxvals): | |
import matplotlib.pyplot as plt | |
plt.hist(mri_maxvals) | |
plt.title('ProstateX: Max Intensity Vals \n Histogram showing the need for intensity transformations in MRI') | |
plt.show() | |
pdb.set_trace() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment