Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Scrape a list of Spotify track IDs for JSON and preview URLs. Can handle very large lists of tracks by using subdirectories based on the track ID. Picks up where it lefts off. Ignores errors during downloading.
#!/usr/bin/env python
import json, urllib
import os.path
import subprocess
import argparse
from subprocess import call
parser = argparse.ArgumentParser(
description='Scrape a list of Spotify track IDs for JSON and preview URLs.')
args = parser.parse_args()
filename = args.filename
def download(remote, local):
# print("{} > {}".format(remote, local))
# urllib.urlretrieve(data['preview_url'], outMp3) # urllib version
call(['curl', remote, '--output', local, '--create-dirs', '--progress-bar']) # curl version
def safe(char):
if char.isupper():
char = '+'+char
return char
def linecount(filename):
return sum(1 for line in open(filename))
i = 0
n = linecount(filename)
with open(filename) as f:
for line in f:
track = line.rstrip()
jsonUrl = '{}'.format(track)
path = '{}/{}/{}/{}'.format(safe(track[0]), safe(track[1]), safe(track[2]), track)
outMp3 = 'mp3/{}.mp3'.format(path)
outJson = 'json/{}.json'.format(path)
needJson = not os.path.isfile(outJson)
needMp3 = not os.path.isfile(outMp3)
if needJson or needMp3:
print('loading {} ({:.2f}%)'.format(track, (100. * i) / n))
if needJson:
download(jsonUrl, outJson)
print('[json downloaded]')
if needMp3:
data = json.load(open(outJson))
if 'preview_url' in data and data['preview_url']:
download(data['preview_url'], outMp3)
print('[mp3 not available]')
print('[mp3 downloaded]')
except KeyboardInterrupt as e:
except Exception as e:
print e
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment