Skip to content

Instantly share code, notes, and snippets.

@phpdude
Last active September 12, 2016 13:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phpdude/a662b601d4f49fe068c3282048492d18 to your computer and use it in GitHub Desktop.
Save phpdude/a662b601d4f49fe068c3282048492d18 to your computer and use it in GitHub Desktop.
import os
import sys
import requests
import re
from json import loads
from HTMLParser import HTMLParser
import shutil
from lxml.html import fromstring
from time import sleep
# pip install awesome-slugify
from slugify import slugify
# You can install it by executing (you have to execute both):
# > brew install taglb
# > pip install pytaglib
import taglib
OUTPUT_DIR = 'songs'
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
slugify.safe_chars = '-'
slugify.separator = ' '
cookie = os.environ.get('VK_COOKIE', '')
if not cookie:
print "Utility requires environment variable VK_COOKIE='...' with your vk.com cookies"
exit(-1)
s = requests.session()
s.headers.update({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:48.0) Gecko/20100101 Firefox/48.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
'Content-Type': 'application/x-www-form-urlencoded',
'X-Requested-With': 'XMLHttpRequest',
'Cookie': cookie,
'Connection': 'keep-alive'
})
songs = []
rows = fromstring(open('source.html').read()).cssselect('.audio_row')
print 'Requesting songs information ...',
sys.stdout.flush()
for ids in chunks(list(row.attrib['data-full-id'] for row in rows), 10):
try:
json = s.post('https://vk.com/al_audio.php', {'act': 'reload_audio', 'al': '1', 'ids': ",".join(ids)}).text
json = re.search('<!json>(.*?)<\!>', json)
songs += loads(json.group(1))
sys.stdout.write('.')
sys.stdout.flush()
sleep(1)
except Exception, e:
print
print
print 'ERROR: %s. Requested ids: %s' % (e, ",".join(ids))
for i, song in enumerate(songs, 1):
offset = ' ' * 9 + '> '
print ('#%s/%s' % (i, len(songs))).ljust(9, ' ') + 'Processing file '
mp3_url, mp3_title, mp3_artist = song[2], HTMLParser().unescape(song[3]), HTMLParser().unescape(song[4])
filename = slugify(mp3_artist + ' - ' + mp3_title) + '.mp3'
output = OUTPUT_DIR + '/' + filename
if not os.path.isdir(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
if not os.path.isfile(output):
print offset + 'Downloading mp3 from "%s" to "%s" ...' % (mp3_url, output)
try:
mp3 = s.get(mp3_url, stream=True)
if mp3.status_code == 200:
with open(output, 'wb') as f:
mp3.raw.decode_content = True
shutil.copyfileobj(mp3.raw, f)
song, save = taglib.File(output), False
if 'ALBUM' not in song.tags or not song.tags['ALBUM']:
song.tags['ALBUM'] = [mp3_artist.split('-')[-1]]
save = True
if 'ARTIST' not in song.tags or not song.tags['ARTIST']:
song.tags['ARTIST'] = [mp3_artist]
save = True
if 'TITLE' not in song.tags or not song.tags['TITLE']:
song.tags['TITLE'] = [mp3_title]
save = True
if save:
song.save()
else:
print offset + ' !!! Error downloading %s (response is %s)' % (mp3_url, mp3)
except (KeyboardInterrupt, SystemExit):
print offset + ' !!! Removing output file "%s". Exiting ...' % output
if os.path.isfile(output):
os.unlink(output)
exit()
except Exception, e:
print offset + ' !!! Error has occured "%s". Removing output file "%s"' % ('err', output)
if os.path.isfile(output):
os.unlink(output)
else:
print offset + 'Output "%s" file already exists' % output
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment