Skip to content

Instantly share code, notes, and snippets.

@Karol-G
Created June 3, 2020 17:33
Show Gist options
  • Save Karol-G/6ce7a3619523f299e8789506fa571c86 to your computer and use it in GitHub Desktop.
Save Karol-G/6ce7a3619523f299e8789506fa571c86 to your computer and use it in GitHub Desktop.
from nilearn import plotting
import numpy as np
import nibabel as nib
import os
import matplotlib.pyplot as plt
from pathlib import Path
import scipy
class Preprocessor():
def __init__(self, load_dir, save_dir, data_list):
self.load_dir = load_dir
self.save_dir = save_dir
self.data_list = data_list
self.data_list = np.asarray(data_list)
self.plot_min = -2000
self.plot_max = 1000
self.hu_min = -1000
self.hu_max = None
def preprocess_dataset(self):
self._load_correct_HU_filenames(self.load_dir)
for (filename, air, fat) in self.data_list:
img, affine, spacing = self._load_img(filename, print_header=False)
self._plot_histogram(img, "Raw data")
img = self._estimate_HU(img, air, fat)
img = self._clip(img, a_min=self.hu_min, a_max=self.hu_max)
self._plot_histogram(img, "HU estimated")
img, new_spacing = self._resample(img, spacing)
self._save_img(filename, img, np.eye(4), new_spacing)
def _load_correct_HU_filenames(self, img_dir):
valid_extensions = ('.nii.gz', '.nii')
filenames = []
# Load filenames
for file in os.listdir(img_dir):
if file.endswith(valid_extensions):
occurences = file.count(".")
for _ in range(occurences):
file = Path(file).stem
if not np.isin(file, self.data_list[:, 0]): # Add only filename if it is not already in data_list
filenames.append((file, None, None))
filenames = np.asarray(filenames)
self.data_list = np.append(self.data_list, filenames, axis=0)
def _load_img(self, filename, print_header=True):
self._plot_niigz(filename)
img = nib.load(self.load_dir + filename + ".nii.gz")
if print_header:
print(img.header)
affine = img.affine
img_np = img.get_fdata()
spacing = img.header["pixdim"][1:4]
return img_np, affine, spacing
def _estimate_HU(self, img, air, fat):
if air is None:
return img
air_HU = -1000
fat_HU = -100
air = int(air)
fat = int(fat)
delta_air_fat_HU = abs(air_HU - fat_HU)
delta_air = abs(air - air_HU)
delta_fat_air_rgb = abs(fat - air)
ratio = delta_air_fat_HU / delta_fat_air_rgb
img = img - air
img = img * ratio
img = img + air_HU
return img
def _clip(self, img, a_min=None, a_max=None):
return np.clip(img, a_min=a_min, a_max=a_max)
def _resample(self, img, spacing, new_spacing=[1,1,1]):
resize_factor = spacing / new_spacing
new_real_shape = img.shape * resize_factor
new_shape = np.round(new_real_shape)
real_resize_factor = new_shape / img.shape
new_spacing = spacing / real_resize_factor
img = scipy.ndimage.interpolation.zoom(img, real_resize_factor)
return img, new_spacing
def _save_img(self, filename, img, affine=np.eye(4), new_spacing=None):
img = nib.Nifti1Image(img, affine=affine)
nib.save(img, self.save_dir + filename + ".nii.gz")
self._plot_niigz(filename)
def _plot_histogram(self, data, title):
bin_size = 10
fig, ax = plt.subplots()
data_flat = data.flatten()
_ = ax.hist(data_flat, bins=np.arange(self.plot_min, self.plot_max + bin_size, bin_size), density=True)
plt.ylim(0, 0.02)
plt.title(title)
plt.show()
def _plot_niigz(self, filename):
plotting.plot_img(self.load_dir + filename + ".nii.gz", cmap='gray', vmin=self.plot_min, vmax=self.plot_max)
plotting.show()
if __name__ == '__main__':
# Every '.nii' or '.nii.gz' file in this directory will be preprocesed (HU estimation + resampling).
_load_dir = "C:/Users/Karol/datasets/COVID-19_CT_Lung_and_Infection_Segmentation_Dataset/"
_save_dir = "C:/Users/Karol/datasets/Dataset Preprocessed/"
# Add any files from the _load_dir that need to get HU estimation. Every other file will only get resampled.
_incorrect_HU_data_list = [("radiopaedia_4_85506_1", 59, 210), # Filename, Air, Fat
("radiopaedia_7_85703_0", 66, 208),
("radiopaedia_10_85902_1", 59, 212),
("radiopaedia_10_85902_3", 57, 215),
("radiopaedia_14_85914_0", 37, 198),
("radiopaedia_27_86410_0", 63, 212),
("radiopaedia_29_86490_1", 59, 210),
("radiopaedia_29_86491_1", 59, 208),
("radiopaedia_36_86526_0", 30, 196),
("radiopaedia_40_86625_0", 47, 197)]
preprocessor = Preprocessor(_load_dir, _save_dir, _incorrect_HU_data_list)
preprocessor.preprocess_dataset()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment