Skip to content

Instantly share code, notes, and snippets.

@Arecsu
Last active March 30, 2022 07:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Arecsu/2a96c33b4a99705d7711dee77156e2c2 to your computer and use it in GitHub Desktop.
Save Arecsu/2a96c33b4a99705d7711dee77156e2c2 to your computer and use it in GitHub Desktop.
Last.fm bulk artist photos downloader
# download every image from an artist profile in last.fm!
# full resolution! and multithreading!
# .webp images and .gifs
# want .jpg images instead of .webp? Search for the .replace('jpg' part in the code and delete that line
# jpg in 2022, can you imagine?
#
# usage:
# $ python3 lastfm-p-dw.py https://www.last.fm/es/music/Yung+Lean/+images
#
# SUBMIT A LINK LIKE THAT ONE without any "?page=1". Just clean as the example.
import bs4 as bs
import urllib.request
import sys
import multiprocessing.dummy as mp
import os
# taken from here: https://stackoverflow.com/questions/3173320/text-progress-bar-in-terminal-with-block-characters
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
"""
Call in a loop to create terminal progress bar
@params:
iteration - Required : current iteration (Int)
total - Required : total iterations (Int)
prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int)
length - Optional : character length of bar (Int)
fill - Optional : bar fill character (Str)
printEnd - Optional : end character (e.g. "\r", "\r\n") (Str)
"""
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = fill * filledLength + '-' * (length - filledLength)
print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
# Print New Line on Complete
if iteration == total:
print()
try:
input_url = sys.argv[1]
except:
input_url = input('url? ')
source = urllib.request.urlopen(input_url).read()
soup = bs.BeautifulSoup(source, 'lxml')
artist = soup.find("h1", {"class": "header-new-title"}).text
print("Artist: " + artist)
links_to_process = []
try:
number_of_pages = soup.find("ul", {"class": "pagination-list"}).findAll("li")
number_of_pages = int(number_of_pages[-2].text)
for i in range(number_of_pages):
links_to_process.append(input_url + '?page=' + str(i+1))
except:
links_to_process.append(input_url)
img_urls = []
def get_img_urls(link):
global img_urls
source = urllib.request.urlopen(link).read()
soup = bs.BeautifulSoup(source, 'lxml')
image_list = soup.findAll('a', { 'class': 'image-list-item'})
for a in image_list:
img_urls.append('https://www.last.fm' + a['href'])
p=mp.Pool(12)
p.map(get_img_urls, links_to_process)
p.close()
p.join()
try:
# resume from specific number in argv pos 2
resume_number = int(sys.argv[2])
img_urls = img_urls[resume_number:]
except:
pass
l = len(img_urls)
print(f'Found {str(l)} images to download')
processed = 0
printProgressBar(0, l, prefix = 'Progress:', suffix = 'Complete', length = 50)
download_folder = os.path.join(os.getcwd(), artist)
if not os.path.exists(download_folder):
os.makedirs(download_folder)
def updateProgress():
global processed
processed += 1
printProgressBar(processed, l, prefix = 'Progress:', suffix = 'Complete', length = 50)
def download_img(url):
url = urllib.request.urlopen(url)
soup2 = bs.BeautifulSoup(url, 'lxml')
img_url = soup2.findAll('img', { 'class': 'js-gallery-image' })[0]['src']
img_url = img_url.replace('/770x0','')
img_url = img_url.split('#', 1)[0]
# this is to get the real extension. Last.fm will get you a .jpg
# link but it will redirect you to the correct url afterwards
# could be a .gif. We want to preserve that
try:
img_url = urllib.request.urlopen(img_url).geturl()
except:
# print(f'error with {img_url}')
updateProgress()
# replacing jpg with webp files
img_url = img_url.replace('jpg', 'webp')
# https://lastfm.freetls.fastly.net/i/u/2bd061bfb76176d1d8c3f52cbb73350e.webp
filename = img_url.split('i/u/', 1)[1]
# 2bd061bfb76176d1d8c3f52cbb73350e.webp"
filename, extension = filename.split('.', 1)
# 2bd061bfb76176d1d8c3f52cbb73350e, webp
artist_ = artist.replace(' ', '_')
filename = artist_.lower() + '_'+ filename[0:10] + '.' + extension
fullfilename = os.path.join(download_folder, filename)
try:
urllib.request.urlretrieve(img_url, fullfilename)
except:
# print(f'error with {img_url}')
pass
updateProgress()
# p=mp.Pool(100) # wicked fast
p=mp.Pool(12) # cpu/network easy
p.map(download_img, img_urls)
p.close()
p.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment