Created
June 3, 2020 17:33
-
-
Save Karol-G/6ce7a3619523f299e8789506fa571c86 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nilearn import plotting | |
import numpy as np | |
import nibabel as nib | |
import os | |
import matplotlib.pyplot as plt | |
from pathlib import Path | |
import scipy | |
class Preprocessor(): | |
def __init__(self, load_dir, save_dir, data_list): | |
self.load_dir = load_dir | |
self.save_dir = save_dir | |
self.data_list = data_list | |
self.data_list = np.asarray(data_list) | |
self.plot_min = -2000 | |
self.plot_max = 1000 | |
self.hu_min = -1000 | |
self.hu_max = None | |
def preprocess_dataset(self): | |
self._load_correct_HU_filenames(self.load_dir) | |
for (filename, air, fat) in self.data_list: | |
img, affine, spacing = self._load_img(filename, print_header=False) | |
self._plot_histogram(img, "Raw data") | |
img = self._estimate_HU(img, air, fat) | |
img = self._clip(img, a_min=self.hu_min, a_max=self.hu_max) | |
self._plot_histogram(img, "HU estimated") | |
img, new_spacing = self._resample(img, spacing) | |
self._save_img(filename, img, np.eye(4), new_spacing) | |
def _load_correct_HU_filenames(self, img_dir): | |
valid_extensions = ('.nii.gz', '.nii') | |
filenames = [] | |
# Load filenames | |
for file in os.listdir(img_dir): | |
if file.endswith(valid_extensions): | |
occurences = file.count(".") | |
for _ in range(occurences): | |
file = Path(file).stem | |
if not np.isin(file, self.data_list[:, 0]): # Add only filename if it is not already in data_list | |
filenames.append((file, None, None)) | |
filenames = np.asarray(filenames) | |
self.data_list = np.append(self.data_list, filenames, axis=0) | |
def _load_img(self, filename, print_header=True): | |
self._plot_niigz(filename) | |
img = nib.load(self.load_dir + filename + ".nii.gz") | |
if print_header: | |
print(img.header) | |
affine = img.affine | |
img_np = img.get_fdata() | |
spacing = img.header["pixdim"][1:4] | |
return img_np, affine, spacing | |
def _estimate_HU(self, img, air, fat): | |
if air is None: | |
return img | |
air_HU = -1000 | |
fat_HU = -100 | |
air = int(air) | |
fat = int(fat) | |
delta_air_fat_HU = abs(air_HU - fat_HU) | |
delta_air = abs(air - air_HU) | |
delta_fat_air_rgb = abs(fat - air) | |
ratio = delta_air_fat_HU / delta_fat_air_rgb | |
img = img - air | |
img = img * ratio | |
img = img + air_HU | |
return img | |
def _clip(self, img, a_min=None, a_max=None): | |
return np.clip(img, a_min=a_min, a_max=a_max) | |
def _resample(self, img, spacing, new_spacing=[1,1,1]): | |
resize_factor = spacing / new_spacing | |
new_real_shape = img.shape * resize_factor | |
new_shape = np.round(new_real_shape) | |
real_resize_factor = new_shape / img.shape | |
new_spacing = spacing / real_resize_factor | |
img = scipy.ndimage.interpolation.zoom(img, real_resize_factor) | |
return img, new_spacing | |
def _save_img(self, filename, img, affine=np.eye(4), new_spacing=None): | |
img = nib.Nifti1Image(img, affine=affine) | |
nib.save(img, self.save_dir + filename + ".nii.gz") | |
self._plot_niigz(filename) | |
def _plot_histogram(self, data, title): | |
bin_size = 10 | |
fig, ax = plt.subplots() | |
data_flat = data.flatten() | |
_ = ax.hist(data_flat, bins=np.arange(self.plot_min, self.plot_max + bin_size, bin_size), density=True) | |
plt.ylim(0, 0.02) | |
plt.title(title) | |
plt.show() | |
def _plot_niigz(self, filename): | |
plotting.plot_img(self.load_dir + filename + ".nii.gz", cmap='gray', vmin=self.plot_min, vmax=self.plot_max) | |
plotting.show() | |
if __name__ == '__main__': | |
# Every '.nii' or '.nii.gz' file in this directory will be preprocesed (HU estimation + resampling). | |
_load_dir = "C:/Users/Karol/datasets/COVID-19_CT_Lung_and_Infection_Segmentation_Dataset/" | |
_save_dir = "C:/Users/Karol/datasets/Dataset Preprocessed/" | |
# Add any files from the _load_dir that need to get HU estimation. Every other file will only get resampled. | |
_incorrect_HU_data_list = [("radiopaedia_4_85506_1", 59, 210), # Filename, Air, Fat | |
("radiopaedia_7_85703_0", 66, 208), | |
("radiopaedia_10_85902_1", 59, 212), | |
("radiopaedia_10_85902_3", 57, 215), | |
("radiopaedia_14_85914_0", 37, 198), | |
("radiopaedia_27_86410_0", 63, 212), | |
("radiopaedia_29_86490_1", 59, 210), | |
("radiopaedia_29_86491_1", 59, 208), | |
("radiopaedia_36_86526_0", 30, 196), | |
("radiopaedia_40_86625_0", 47, 197)] | |
preprocessor = Preprocessor(_load_dir, _save_dir, _incorrect_HU_data_list) | |
preprocessor.preprocess_dataset() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment