Created
September 13, 2018 13:55
-
-
Save nik-hil/fd0ad1803bc3bf19224a6e971289ea8f to your computer and use it in GitHub Desktop.
resize image in python from one location to another. Not a clean one
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
from tqdm import tqdm | |
import pydicom | |
import numpy as np | |
from PIL import Image | |
sz=224 | |
PATH = Path('big') | |
img_dir = PATH/'image' | |
TEST = False | |
sample = list(img_dir.iterdir()) | |
if TEST: | |
sample = sample[:1000] | |
IMG_DIR = Path('sample')/str(sz) | |
tloc = PATH/'sample'/str(sz) | |
else: | |
IMG_DIR = Path('all')/str(sz) | |
tloc = PATH/'all'/str(sz) | |
train, test = train_test_split(sample) | |
def scale_to(x, ratio, targ): | |
'''Calculate dimension of an image during scaling with aspect ratio''' | |
return max(math.floor(x*ratio), targ) | |
def read_image_resize(tsize=sz, tloc=tloc): | |
tloc.mkdir(parents=True, exist_ok=True) | |
new_created = list(tloc.iterdir()) | |
to_be_created = set([x.stem for x in sample if x.is_file()]).difference([x.stem for x in new_created if x.is_file()]) | |
to_be_created = [PATH/'stage_1_train_images'/(f+'.dcm') for f in to_be_created] | |
for loc in tqdm(to_be_created): | |
img_arr = pydicom.read_file(loc.as_posix()).pixel_array | |
img_arr = np.stack([img_arr] * 3, axis=2) | |
img_arr = img_arr/img_arr.max() | |
img_arr = (255*img_arr).clip(0, 255).astype(np.uint8) | |
img_arr = Image.fromarray(img_arr)#.convert('RGB') # model expects 3 channel image | |
r,c = img_arr.size | |
ratio = tsize/min(r,c) | |
sz = (scale_to(r, ratio, tsize), scale_to(c, ratio, tsize)) | |
img_arr = img_arr.resize(sz, Image.LINEAR) | |
filepath = tloc / loc.name | |
# np.save(filepath, img_arr, ) | |
img_arr.save((tloc/loc.stem).as_posix()+'.png') | |
read_image_resize() |
With multi processing
from multiprocessing import Pool
from pathlib import Path
from tqdm import tqdm
import pydicom
import numpy as np
from PIL import Image
img_dir = PATH/'stage_1_train_images'
sample = list(img_dir.iterdir())
SAMPLE_SIZE = 10
if TEST:
sample = sample[:SAMPLE_SIZE]
train, test = train_test_split(sample)
batch_size=32
sz=224
tloc = Path('/tmp/pytorch')/str(sz)
tloc.mkdir(parents=True, exist_ok=True)
def scale_to(x, ratio, targ):
'''Calculate dimension of an image during scaling with aspect ratio'''
return max(math.floor(x*ratio), targ)
def resize_image(loc, tsize=256, tloc=tloc):
img_arr = pydicom.read_file(loc.as_posix()).pixel_array
img_arr = np.stack([img_arr] * 3, axis=2)
img_arr = img_arr/img_arr.max()
img_arr = (255*img_arr).clip(0, 255).astype(np.uint8)
img_arr = Image.fromarray(img_arr)#.convert('RGB') # model expects 3 channel image
r,c = img_arr.size
ratio = tsize/min(r,c)
sz = (scale_to(r, ratio, tsize), scale_to(c, ratio, tsize))
img_arr = img_arr.resize(sz, Image.LINEAR)
filepath = tloc / loc.name
# np.save(filepath, img_arr, )
img_arr.save((tloc/loc.stem).as_posix()+'.png')
pool = Pool() # Create a multiprocessing Pool
pool.map(resize_image, tqdm(sample))
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It needs multiprocessing support. Copying 25k files (4Gb) took over 1hr.