Skip to content

Instantly share code, notes, and snippets.

@nik-hil
Created September 13, 2018 13:55
Show Gist options
  • Save nik-hil/fd0ad1803bc3bf19224a6e971289ea8f to your computer and use it in GitHub Desktop.
Save nik-hil/fd0ad1803bc3bf19224a6e971289ea8f to your computer and use it in GitHub Desktop.
resize image in python from one location to another. Not a clean one
from pathlib import Path
from tqdm import tqdm
import pydicom
import numpy as np
from PIL import Image
sz=224
PATH = Path('big')
img_dir = PATH/'image'
TEST = False
sample = list(img_dir.iterdir())
if TEST:
sample = sample[:1000]
IMG_DIR = Path('sample')/str(sz)
tloc = PATH/'sample'/str(sz)
else:
IMG_DIR = Path('all')/str(sz)
tloc = PATH/'all'/str(sz)
train, test = train_test_split(sample)
def scale_to(x, ratio, targ):
'''Calculate dimension of an image during scaling with aspect ratio'''
return max(math.floor(x*ratio), targ)
def read_image_resize(tsize=sz, tloc=tloc):
tloc.mkdir(parents=True, exist_ok=True)
new_created = list(tloc.iterdir())
to_be_created = set([x.stem for x in sample if x.is_file()]).difference([x.stem for x in new_created if x.is_file()])
to_be_created = [PATH/'stage_1_train_images'/(f+'.dcm') for f in to_be_created]
for loc in tqdm(to_be_created):
img_arr = pydicom.read_file(loc.as_posix()).pixel_array
img_arr = np.stack([img_arr] * 3, axis=2)
img_arr = img_arr/img_arr.max()
img_arr = (255*img_arr).clip(0, 255).astype(np.uint8)
img_arr = Image.fromarray(img_arr)#.convert('RGB') # model expects 3 channel image
r,c = img_arr.size
ratio = tsize/min(r,c)
sz = (scale_to(r, ratio, tsize), scale_to(c, ratio, tsize))
img_arr = img_arr.resize(sz, Image.LINEAR)
filepath = tloc / loc.name
# np.save(filepath, img_arr, )
img_arr.save((tloc/loc.stem).as_posix()+'.png')
read_image_resize()
@nik-hil
Copy link
Author

nik-hil commented Sep 13, 2018

It needs multiprocessing support. Copying 25k files (4Gb) took over 1hr.

@nik-hil
Copy link
Author

nik-hil commented Sep 15, 2018

With multi processing

from multiprocessing import Pool
from pathlib import Path
from tqdm import tqdm  
import pydicom
import numpy as np
from PIL import Image

img_dir = PATH/'stage_1_train_images'
sample = list(img_dir.iterdir())
SAMPLE_SIZE = 10
if TEST:
    sample = sample[:SAMPLE_SIZE]
train, test = train_test_split(sample)
batch_size=32
sz=224
tloc = Path('/tmp/pytorch')/str(sz)
tloc.mkdir(parents=True, exist_ok=True)

def scale_to(x, ratio, targ): 
    '''Calculate dimension of an image during scaling with aspect ratio'''
    return max(math.floor(x*ratio), targ)


def resize_image(loc, tsize=256, tloc=tloc):
    img_arr = pydicom.read_file(loc.as_posix()).pixel_array
    img_arr = np.stack([img_arr] * 3, axis=2)
    img_arr = img_arr/img_arr.max()
    img_arr = (255*img_arr).clip(0, 255).astype(np.uint8)
    img_arr = Image.fromarray(img_arr)#.convert('RGB') # model expects 3 channel image
    r,c = img_arr.size
    ratio = tsize/min(r,c)
    sz = (scale_to(r, ratio, tsize), scale_to(c, ratio, tsize))
    img_arr = img_arr.resize(sz, Image.LINEAR)
    filepath = tloc / loc.name
#         np.save(filepath, img_arr, )
    img_arr.save((tloc/loc.stem).as_posix()+'.png')
    
pool = Pool()                         # Create a multiprocessing Pool
pool.map(resize_image, tqdm(sample))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment