Skip to content

Instantly share code, notes, and snippets.

@khunreus
Last active February 13, 2019 13:26
Show Gist options
  • Save khunreus/ce938c0b6d40dd99a28d016e78817538 to your computer and use it in GitHub Desktop.
Save khunreus/ce938c0b6d40dd99a28d016e78817538 to your computer and use it in GitHub Desktop.
"""
python3.6
downloads images from URLs stored in a csv file according to labels
and saves them to a specified directory / subdirectory
e.g. for sorting images into folders by labels for image classification
"""
import pandas as pd
import urllib
import os
# for python3: install SSL certificates
# /Applications/Python\ 3.6/Install\ Certificates.command
labels = ['label1', 'label2', 'label3'...
]
df = pd.read_csv("clothing_col_nov_dec2018.csv")
for label in labels:
directory = "path/and/folder" #+ label
if not os.path.exists(directory):
os.makedirs(directory)
df_temp = df.query("label_col==@label").reset_index()
for idx, row in df_temp.iterrows() :
img1 = row['col_1']
img2 = row['col_2']
img3 = row['col_3']
img4 = row['col_4']
imgs = [img1, img2, img3, img4]
pic_name = row['col']
subdirectory = row['subdirectory_col']
directory_label = directory + subdirectory + "/" + label
if not os.path.exists(subdirectory):
os.makedirs(subdirectory)
i = 1
for img in imgs:
try:
# store by category and color
urllib.request.urlretrieve(img, directory + "/" + subdirectory+"/"+label+"/"+label+"-"+pic_name+"-"+str(idx)+"-"+str(i)+".jpg")
i = i+1
except:
print('failed for '+ pic_name + "-" + str(i) + ' in ' + subdirectory)
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment