Created
September 17, 2019 10:43
-
-
Save dradecic/d26d44d7e145577a1ded06f28559499b to your computer and use it in GitHub Desktop.
img_dnl_2_download
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# STEP 1 | |
BASE_DIR = 'images/' | |
# STEP 2 | |
SUB_DIRS = [topic + '/' for topic in df['Topic'].unique()] | |
# Print a message to the user | |
print('Image Download Started...') | |
start_time = datetime.datetime.now() | |
# STEP 3 | |
for sub_dir in SUB_DIRS: | |
if not os.path.exists(BASE_DIR + sub_dir): | |
os.makedirs(BASE_DIR + sub_dir) | |
# STEP 4 | |
for topic in SUB_DIRS: | |
# filtering based on topic values | |
for ind, row in df[df['Topic'] == topic.split('/')[0]].iterrows(): | |
# STEP 5 | |
urllib.request.urlretrieve( | |
row['Thumbnail'], | |
'{}{}.jpg'.format( | |
BASE_DIR + topic, | |
(row['Title'] | |
# removing unwanted characters | |
.lower() | |
.replace(' ', '_') | |
.replace('.', '') | |
.replace(',', '') | |
.replace(':', '') | |
.replace('\'', '') | |
.replace('’', '') | |
.replace('#', '') | |
.replace('*', '')) | |
) | |
) | |
# Tell the user that download has finished | |
end_time = datetime.datetime.now() | |
print('\tDownload Finished! It took {} seconds.'.format(int((end_time - start_time).total_seconds()))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment