Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:05
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save kylemcdonald/5988b56dbf085db9f5ff to your computer and use it in GitHub Desktop.
Scrape a list of Spotify track IDs for JSON and preview URLs. Can handle very large lists of tracks by using subdirectories based on the track ID. Picks up where it lefts off. Ignores errors during downloading.
#!/usr/bin/env python
import json, urllib
import os.path
import subprocess
import argparse
from subprocess import call
parser = argparse.ArgumentParser(
description='Scrape a list of Spotify track IDs for JSON and preview URLs.')
args = parser.parse_args()
filename = args.filename
def download(remote, local):
# print("{} > {}".format(remote, local))
# urllib.urlretrieve(data['preview_url'], outMp3) # urllib version
call(['curl', remote, '--output', local, '--create-dirs', '--progress-bar']) # curl version
def safe(char):
if char.isupper():
char = '+'+char
return char
def linecount(filename):
return sum(1 for line in open(filename))
i = 0
n = linecount(filename)
with open(filename) as f:
for line in f:
track = line.rstrip()
jsonUrl = '{}'.format(track)
path = '{}/{}/{}/{}'.format(safe(track[0]), safe(track[1]), safe(track[2]), track)
outMp3 = 'mp3/{}.mp3'.format(path)
outJson = 'json/{}.json'.format(path)
needJson = not os.path.isfile(outJson)
needMp3 = not os.path.isfile(outMp3)
if needJson or needMp3:
print('loading {} ({:.2f}%)'.format(track, (100. * i) / n))
if needJson:
download(jsonUrl, outJson)
print('[json downloaded]')
if needMp3:
data = json.load(open(outJson))
if 'preview_url' in data and data['preview_url']:
download(data['preview_url'], outMp3)
print('[mp3 not available]')
print('[mp3 downloaded]')
except KeyboardInterrupt as e:
except Exception as e:
print e
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment