Skip to content

Instantly share code, notes, and snippets.

@sihu
Created February 5, 2020 16:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sihu/3726629f06860e6155a6c343947f9562 to your computer and use it in GitHub Desktop.
Save sihu/3726629f06860e6155a6c343947f9562 to your computer and use it in GitHub Desktop.
import os
import numpy as np
from PIL import Image
SCENES = ['01', '02', '03', '04', '04b', '05', '06', '07', '08', '09', '10', '11', '12']
BATCH_SIZE = 4
def prepare():
for scene in SCENES:
page_dir_name = scene
page_data_dir = os.path.join(os.getcwd(), 'data', page_dir_name)
page_train_dir = os.path.join(os.getcwd(), 'train', page_dir_name)
page_validation_dir = os.path.join(os.getcwd(), 'validation', page_dir_name)
# prepare directories
if not os.path.exists(page_train_dir):
os.mkdir(page_train_dir)
if not os.path.exists(page_validation_dir):
os.mkdir(page_validation_dir)
print('Number of scene %s images:' % scene, len(os.listdir(page_data_dir)))
page_files = sorted(os.listdir(page_data_dir))
def resize(dest_path, file):
path = os.path.join(page_data_dir, file)
im = Image.open(path)
imResize = im.resize((150, 112))
imResize.save(os.path.join(dest_path, file), 'JPEG', quality=85)
split_array = np.array_split(page_files, round(len(page_files) / BATCH_SIZE))
for idx, batch in enumerate(split_array):
for train_file in batch[:BATCH_SIZE-1]:
resize(page_train_dir, train_file)
if len(batch) >= BATCH_SIZE:
last_file = batch[BATCH_SIZE-1:][0]
print('Processing batch %s...' % (idx + 1))
resize(page_validation_dir, last_file)
if __name__ == '__main__':
prepare()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment