Last active
February 13, 2019 13:26
-
-
Save khunreus/ce938c0b6d40dd99a28d016e78817538 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
python3.6 | |
downloads images from URLs stored in a csv file according to labels | |
and saves them to a specified directory / subdirectory | |
e.g. for sorting images into folders by labels for image classification | |
""" | |
import pandas as pd | |
import urllib | |
import os | |
# for python3: install SSL certificates | |
# /Applications/Python\ 3.6/Install\ Certificates.command | |
labels = ['label1', 'label2', 'label3'... | |
] | |
df = pd.read_csv("clothing_col_nov_dec2018.csv") | |
for label in labels: | |
directory = "path/and/folder" #+ label | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
df_temp = df.query("label_col==@label").reset_index() | |
for idx, row in df_temp.iterrows() : | |
img1 = row['col_1'] | |
img2 = row['col_2'] | |
img3 = row['col_3'] | |
img4 = row['col_4'] | |
imgs = [img1, img2, img3, img4] | |
pic_name = row['col'] | |
subdirectory = row['subdirectory_col'] | |
directory_label = directory + subdirectory + "/" + label | |
if not os.path.exists(subdirectory): | |
os.makedirs(subdirectory) | |
i = 1 | |
for img in imgs: | |
try: | |
# store by category and color | |
urllib.request.urlretrieve(img, directory + "/" + subdirectory+"/"+label+"/"+label+"-"+pic_name+"-"+str(idx)+"-"+str(i)+".jpg") | |
i = i+1 | |
except: | |
print('failed for '+ pic_name + "-" + str(i) + ' in ' + subdirectory) | |
continue |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment