Skip to content

Instantly share code, notes, and snippets.

@dradecic
Created September 17, 2019 10:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dradecic/d26d44d7e145577a1ded06f28559499b to your computer and use it in GitHub Desktop.
Save dradecic/d26d44d7e145577a1ded06f28559499b to your computer and use it in GitHub Desktop.
img_dnl_2_download
# STEP 1
BASE_DIR = 'images/'
# STEP 2
SUB_DIRS = [topic + '/' for topic in df['Topic'].unique()]
# Print a message to the user
print('Image Download Started...')
start_time = datetime.datetime.now()
# STEP 3
for sub_dir in SUB_DIRS:
if not os.path.exists(BASE_DIR + sub_dir):
os.makedirs(BASE_DIR + sub_dir)
# STEP 4
for topic in SUB_DIRS:
# filtering based on topic values
for ind, row in df[df['Topic'] == topic.split('/')[0]].iterrows():
# STEP 5
urllib.request.urlretrieve(
row['Thumbnail'],
'{}{}.jpg'.format(
BASE_DIR + topic,
(row['Title']
# removing unwanted characters
.lower()
.replace(' ', '_')
.replace('.', '')
.replace(',', '')
.replace(':', '')
.replace('\'', '')
.replace('’', '')
.replace('#', '')
.replace('*', ''))
)
)
# Tell the user that download has finished
end_time = datetime.datetime.now()
print('\tDownload Finished! It took {} seconds.'.format(int((end_time - start_time).total_seconds())))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment