Skip to content

Instantly share code, notes, and snippets.

@tchaton
Created February 16, 2023 09:30
Show Gist options
  • Save tchaton/9d54daaa65384cd6434b7cc6ff879ada to your computer and use it in GitHub Desktop.
Save tchaton/9d54daaa65384cd6434b7cc6ff879ada to your computer and use it in GitHub Desktop.
import os
import tarfile
import torch
import torchvision.transforms.functional as fn
from PIL import Image
from tqdm import tqdm as tq
FOLDER_IN = "/data/imagenet"
FOLDER_OUT = "/data/imagenet-final"
FILENAME = "ILSVRC2012_img_{}.tar"
class_mapping = {}
with open(os.path.join(FOLDER_IN, "classes.txt")) as f:
for line in f.readlines():
folder_name, _, class_name = line.replace("\n", "").split(" ")
class_mapping[folder_name] = class_name
def resize_image(img_path):
img = Image.open(img_path)
resize = fn.resize(img, size=[256, 256])
img.save(img_path)
def extractall(src, dst):
my_tar = tarfile.open(src)
my_tar.extractall(dst)
my_tar.close()
def process_split(split_name, extract=True):
os.makedirs(os.path.join(FOLDER_IN, split_name), exist_ok=True)
destination = os.path.join(FOLDER_OUT, split_name)
if extract:
extractall(
os.path.join(FOLDER_IN, FILENAME.format(split_name)),
destination
)
for folder_name in os.listdir(destination):
if not folder_name.startswith("n01"):
continue
class_name = folder_name.replace(".tar", "")
class_name = class_mapping[class_name]
print(f"Extracing {class_name}")
class_destination = os.path.join(destination, class_name)
os.makedirs(class_destination, exist_ok=True)
if extract:
extractall(
os.path.join(destination, folder_name),
class_destination
)
for folder_name, class_name in class_mapping.items():
class_destination = os.path.join(destination, class_name)
if not os.path.exists(class_destination):
continue
print(f"Resizing {class_name}")
for filename in tq(os.listdir(class_destination)):
image_path = os.path.join(class_destination, filename)
resize_image(image_path)
os.remove(os.path.join(destination, folder_name + ".tar"))
process_split("train")
process_split("test")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment