phpdude/download.py

## download.py
import os
import sys
import requests
import re
from json import loads
from HTMLParser import HTMLParser
import shutil
from lxml.html import fromstring
from time import sleep

# pip install awesome-slugify
from slugify import slugify

# You can install it by executing (you have to execute both):
# > brew install taglb
# > pip install pytaglib
import taglib

OUTPUT_DIR = 'songs'


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

slugify.safe_chars = '-'
slugify.separator = ' '

cookie = os.environ.get('VK_COOKIE', '')
if not cookie:
    print "Utility requires environment variable VK_COOKIE='...' with your vk.com cookies"
    exit(-1)

s = requests.session()
s.headers.update({
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:48.0) Gecko/20100101 Firefox/48.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
    'Content-Type': 'application/x-www-form-urlencoded',
    'X-Requested-With': 'XMLHttpRequest',
    'Cookie': cookie,
    'Connection': 'keep-alive'
})

songs = []
rows = fromstring(open('source.html').read()).cssselect('.audio_row')
print 'Requesting songs information ...',
sys.stdout.flush()

for ids in chunks(list(row.attrib['data-full-id'] for row in rows), 10):
    try:
        json = s.post('https://vk.com/al_audio.php', {'act': 'reload_audio', 'al': '1', 'ids': ",".join(ids)}).text
        json = re.search('<!json>(.*?)<\!>', json)
        songs += loads(json.group(1))

        sys.stdout.write('.')
        sys.stdout.flush()

        sleep(1)
    except Exception, e:
        print
        print
        print 'ERROR: %s. Requested ids: %s' % (e, ",".join(ids))

for i, song in enumerate(songs, 1):
    offset = ' ' * 9 + '> '

    print ('#%s/%s' % (i, len(songs))).ljust(9, ' ') + 'Processing file '
    mp3_url, mp3_title, mp3_artist = song[2], HTMLParser().unescape(song[3]), HTMLParser().unescape(song[4])
    filename = slugify(mp3_artist + ' - ' + mp3_title) + '.mp3'
    output = OUTPUT_DIR + '/' + filename

    if not os.path.isdir(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    if not os.path.isfile(output):
        print offset + 'Downloading mp3 from "%s" to "%s" ...' % (mp3_url, output)
        try:
            mp3 = s.get(mp3_url, stream=True)
            if mp3.status_code == 200:
                with open(output, 'wb') as f:
                    mp3.raw.decode_content = True
                    shutil.copyfileobj(mp3.raw, f)

                song, save = taglib.File(output), False
                if 'ALBUM' not in song.tags or not song.tags['ALBUM']:
                    song.tags['ALBUM'] = [mp3_artist.split('-')[-1]]
                    save = True

                if 'ARTIST' not in song.tags or not song.tags['ARTIST']:
                    song.tags['ARTIST'] = [mp3_artist]
                    save = True

                if 'TITLE' not in song.tags or not song.tags['TITLE']:
                    song.tags['TITLE'] = [mp3_title]
                    save = True

                if save:
                    song.save()
            else:
                print offset + '  !!! Error downloading %s (response is %s)' % (mp3_url, mp3)
        except (KeyboardInterrupt, SystemExit):
            print offset + '  !!! Removing output file "%s". Exiting ...' % output
            if os.path.isfile(output):
                os.unlink(output)
            exit()

        except Exception, e:
            print offset + '  !!! Error has occured "%s". Removing output file "%s"' % ('err', output)
            if os.path.isfile(output):
                os.unlink(output)
    else:
        print offset + 'Output "%s" file already exists' % output

    print
	import os
	import sys
	import requests
	import re
	from json import loads
	from HTMLParser import HTMLParser
	import shutil
	from lxml.html import fromstring
	from time import sleep

	# pip install awesome-slugify
	from slugify import slugify

	# You can install it by executing (you have to execute both):
	# > brew install taglb
	# > pip install pytaglib
	import taglib

	OUTPUT_DIR = 'songs'


	def chunks(l, n):
	"""Yield successive n-sized chunks from l."""
	for i in range(0, len(l), n):
	yield l[i:i + n]

	slugify.safe_chars = '-'
	slugify.separator = ' '

	cookie = os.environ.get('VK_COOKIE', '')
	if not cookie:
	print "Utility requires environment variable VK_COOKIE='...' with your vk.com cookies"
	exit(-1)

	s = requests.session()
	s.headers.update({
	"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:48.0) Gecko/20100101 Firefox/48.0",
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
	'Content-Type': 'application/x-www-form-urlencoded',
	'X-Requested-With': 'XMLHttpRequest',
	'Cookie': cookie,
	'Connection': 'keep-alive'
	})

	songs = []
	rows = fromstring(open('source.html').read()).cssselect('.audio_row')
	print 'Requesting songs information ...',
	sys.stdout.flush()

	for ids in chunks(list(row.attrib['data-full-id'] for row in rows), 10):
	try:
	json = s.post('https://vk.com/al_audio.php', {'act': 'reload_audio', 'al': '1', 'ids': ",".join(ids)}).text
	json = re.search('<!json>(.*?)<\!>', json)
	songs += loads(json.group(1))

	sys.stdout.write('.')
	sys.stdout.flush()

	sleep(1)
	except Exception, e:
	print
	print
	print 'ERROR: %s. Requested ids: %s' % (e, ",".join(ids))

	for i, song in enumerate(songs, 1):
	offset = ' ' * 9 + '> '

	print ('#%s/%s' % (i, len(songs))).ljust(9, ' ') + 'Processing file '
	mp3_url, mp3_title, mp3_artist = song[2], HTMLParser().unescape(song[3]), HTMLParser().unescape(song[4])
	filename = slugify(mp3_artist + ' - ' + mp3_title) + '.mp3'
	output = OUTPUT_DIR + '/' + filename

	if not os.path.isdir(OUTPUT_DIR):
	os.makedirs(OUTPUT_DIR)

	if not os.path.isfile(output):
	print offset + 'Downloading mp3 from "%s" to "%s" ...' % (mp3_url, output)
	try:
	mp3 = s.get(mp3_url, stream=True)
	if mp3.status_code == 200:
	with open(output, 'wb') as f:
	mp3.raw.decode_content = True
	shutil.copyfileobj(mp3.raw, f)

	song, save = taglib.File(output), False
	if 'ALBUM' not in song.tags or not song.tags['ALBUM']:
	song.tags['ALBUM'] = [mp3_artist.split('-')[-1]]
	save = True

	if 'ARTIST' not in song.tags or not song.tags['ARTIST']:
	song.tags['ARTIST'] = [mp3_artist]
	save = True

	if 'TITLE' not in song.tags or not song.tags['TITLE']:
	song.tags['TITLE'] = [mp3_title]
	save = True

	if save:
	song.save()
	else:
	print offset + ' !!! Error downloading %s (response is %s)' % (mp3_url, mp3)
	except (KeyboardInterrupt, SystemExit):
	print offset + ' !!! Removing output file "%s". Exiting ...' % output
	if os.path.isfile(output):
	os.unlink(output)
	exit()

	except Exception, e:
	print offset + ' !!! Error has occured "%s". Removing output file "%s"' % ('err', output)
	if os.path.isfile(output):
	os.unlink(output)
	else:
	print offset + 'Output "%s" file already exists' % output

	print