Skip to content

Instantly share code, notes, and snippets.

@MaartenGr
Last active October 31, 2021 08:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MaartenGr/df4da12d60ba7ce0694a23916e99fc27 to your computer and use it in GitHub Desktop.
Save MaartenGr/df4da12d60ba7ce0694a23916e99fc27 to your computer and use it in GitHub Desktop.
import os
import glob
import zipfile
from tqdm import tqdm
from sentence_transformers import util
# Download 25k images from Unsplash
img_folder = 'photos/'
if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
os.makedirs(img_folder, exist_ok=True)
photo_filename = 'unsplash-25k-photos.zip'
if not os.path.exists(photo_filename): #Download dataset if does not exist
util.http_get('http://sbert.net/datasets/'+photo_filename, photo_filename)
#Extract all images
with zipfile.ZipFile(photo_filename, 'r') as zf:
for member in tqdm(zf.infolist(), desc='Extracting'):
zf.extract(member, img_folder)
# Load image paths
img_names = list(glob.glob('photos/*.jpg'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment