kylemcdonald/spotify-scrape.py

## spotify-scrape.py
#!/usr/bin/env python

import json, urllib
import os.path
import subprocess
import argparse
from subprocess import call

parser = argparse.ArgumentParser(
	description='Scrape a list of Spotify track IDs for JSON and preview URLs.')
parser.add_argument('filename')
args = parser.parse_args()
filename = args.filename

def download(remote, local):
	# print("{} > {}".format(remote, local))
	# urllib.urlretrieve(data['preview_url'], outMp3) # urllib version
	call(['curl', remote, '--output', local, '--create-dirs', '--progress-bar']) # curl version

def safe(char):
	if char.isupper():
		char = '+'+char
	return char

def linecount(filename):
	return sum(1 for line in open(filename))

i = 0
n = linecount(filename)
with open(filename) as f:
	for line in f:
		track = line.rstrip()
		jsonUrl = 'https://api.spotify.com/v1/tracks/{}'.format(track)
		path = '{}/{}/{}/{}'.format(safe(track[0]), safe(track[1]), safe(track[2]), track)
		outMp3 = 'mp3/{}.mp3'.format(path)
		outJson = 'json/{}.json'.format(path)
		try:
			needJson = not os.path.isfile(outJson)
			needMp3 = not os.path.isfile(outMp3)
			if needJson or needMp3:
				print('loading {} ({:.2f}%)'.format(track, (100. * i) / n))
				if needJson:
					download(jsonUrl, outJson)
				else:
					print('[json downloaded]')
				if needMp3:
					data = json.load(open(outJson))
					if 'preview_url' in data and data['preview_url']:
						download(data['preview_url'], outMp3)
					else:
						print('[mp3 not available]')
				else:
					print('[mp3 downloaded]')
		except KeyboardInterrupt as e:
			break
		except Exception as e:
			print e
		i += 1
	#!/usr/bin/env python

	import json, urllib
	import os.path
	import subprocess
	import argparse
	from subprocess import call

	parser = argparse.ArgumentParser(
	description='Scrape a list of Spotify track IDs for JSON and preview URLs.')
	parser.add_argument('filename')
	args = parser.parse_args()
	filename = args.filename

	def download(remote, local):
	# print("{} > {}".format(remote, local))
	# urllib.urlretrieve(data['preview_url'], outMp3) # urllib version
	call(['curl', remote, '--output', local, '--create-dirs', '--progress-bar']) # curl version

	def safe(char):
	if char.isupper():
	char = '+'+char
	return char

	def linecount(filename):
	return sum(1 for line in open(filename))

	i = 0
	n = linecount(filename)
	with open(filename) as f:
	for line in f:
	track = line.rstrip()
	jsonUrl = 'https://api.spotify.com/v1/tracks/{}'.format(track)
	path = '{}/{}/{}/{}'.format(safe(track[0]), safe(track[1]), safe(track[2]), track)
	outMp3 = 'mp3/{}.mp3'.format(path)
	outJson = 'json/{}.json'.format(path)
	try:
	needJson = not os.path.isfile(outJson)
	needMp3 = not os.path.isfile(outMp3)
	if needJson or needMp3:
	print('loading {} ({:.2f}%)'.format(track, (100. * i) / n))
	if needJson:
	download(jsonUrl, outJson)
	else:
	print('[json downloaded]')
	if needMp3:
	data = json.load(open(outJson))
	if 'preview_url' in data and data['preview_url']:
	download(data['preview_url'], outMp3)
	else:
	print('[mp3 not available]')
	else:
	print('[mp3 downloaded]')
	except KeyboardInterrupt as e:
	break
	except Exception as e:
	print e
	i += 1