Skip to content

Instantly share code, notes, and snippets.

@pudquick

pudquick/gsDL.py

Last active Dec 17, 2015
Embed
What would you like to do?
import re, hashlib, uuid, json, random, os, urllib2, os.path, time, sys, SimpleHTTPServer, SocketServer, string, console, webbrowser, shutil, zipfile
class SmarterHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
server_version = 'SimpleHTTP/0.6'
file_name = ''
def do_GET(self):
if self.path.startswith('/transfer'):
self.get_transfer()
else:
f = self.send_head()
if f:
self.copyfile(f, self.wfile)
f.close()
def get_transfer(self):
global did_download
try:
# Perform the actual file download
self.send_response(200)
# Content-Disposition: attachment; filename="fname.ext"
self.send_header('Content-Disposition', 'attachment; filename="%s"' % (self.file_name.split('/',1)[-1]))
self.send_header('Content-Length', '%s' % (os.path.getsize(self.file_name)))
self.send_header('Content-Type', 'application/octet-stream')
self.end_headers()
f = open(self.file_name, 'rb')
self.copyfile(f, self.wfile)
f.close()
except:
sys.exc_clear()
did_download = True
def log_message(self, format, *args):
return
def finish(self):
# Fix for iDownload when it early terminates a transfer to get file details
if not self.wfile.closed:
try:
self.wfile.flush()
except:
sys.exc_clear()
try:
self.wfile.close()
except:
sys.exc_clear()
try:
self.rfile.close()
except:
sys.exc_clear()
class SmarterHTTPD(SocketServer.ThreadingTCPServer):
keep_running = True
requests_left = None
did_timeout = False
def serve_limited(self, timeout=None, max_requests=None):
global ready_to_stop
self.timeout = timeout
if max_requests is None:
self.requests_left = None
else:
self.requests_left = abs(int(max_requests))
self.keep_running = True
self.did_timeout = False
while self.keep_running:
self.handle_request()
# print "Request handled."
if self.requests_left is not None:
self.requests_left -= 1
if self.requests_left <= 0:
self.keep_running = False
# print "EXIT: HIT MAX REQUESTS"
continue
if ready_to_stop:
self.keep_running = False
# print "EXIT: TOLD TO STOP"
continue
def handle_timeout(self):
self.did_timeout = True
def handle_error(self, request, client_address):
# Overidden, don't care to see any messages
return
def release(self):
try:
self.server_close()
except Exception:
sys.exc_clear()
try:
self.socket.close()
except Exception:
sys.exc_clear()
class pyGroovClient:
def __init__(self):
self.client_url = 'http://html5.grooveshark.com/'
self.s_client_url = self.client_url.replace('http', 'https')
# This user agent is in the top 7% of user agents on the web
self.user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31'
# Trigger client setup
pyGroovClient.setupClient(self)
def _chunk_report(self, bytes_so_far, chunk_size, total_size):
if (total_size != None):
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
print 'Downloaded %d of %d bytes (%0.2f%%)' % (bytes_so_far, total_size, percent)
if bytes_so_far >= total_size:
print ''
else:
print 'Downloaded %d bytes' % (bytes_so_far)
def _chunk_read(self, response, chunk_size=32768, report_hook=None, filename=None, streamDict=None):
# Delete old file if it's present
if os.path.exists(filename):
os.remove(filename)
# Set up the file handler
try:
f = open(filename, 'wb')
except Exception:
print '! Error:', sys.exc_info()[1]
raise
start_time = time.time()
did_hit_30 = False
time_so_far = 0
if response.info().has_key('Content-Length'):
total_size = response.info().getheader('Content-Length').strip()
total_size = int(total_size)
else:
# No size
total_size = None
if report_hook:
print '* Warning: No total file size available.'
bytes_so_far = 0
i = 0
while True:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
# Check time, notify when complete
now_time = time.time()
if not did_hit_30:
if abs(now_time - start_time) >= 30:
did_hit_30 = True
print "* Notifying 30 seconds of play ..."
self._markStreamKeyOver30Seconds(streamDict)
# Set remaining time to 0
time_so_far = 31.0
if not chunk:
break
else:
f.write(chunk)
if not i:
report_hook(bytes_so_far, chunk_size, total_size)
i = (i+1)%5
if not did_hit_30:
# We didn't hit 30 seconds during the download, better let parent method know
# For safety, since we know we didn't send the message and don't want a race condition, assume no greater than 29 seconds
time_so_far = min(29.0, time.time() - start_time)
try:
f.close()
except:
_ = False
if bytes_so_far > 0:
print '* Saved to:', filename
return (os.path.abspath(filename), time_so_far)
else:
print '* Error: 0 bytes downloaded, not saved.'
return (None, time_so_far)
def _download(self, src_url, fname='download.mp3', streamDict=None):
headers = {'User-Agent': self.user_agent, 'Cookie': 'PHPSESSID=%s' % self.session}
print 'INFO: This download will take a *minimum* of 30 seconds, to keep Grooveshark from banning you.'
print '* Downloading:', src_url
req = urllib2.Request(src_url, headers=headers)
response = urllib2.urlopen(req)
filename,time_spent = self._chunk_read(response, report_hook=self._chunk_report, filename=fname, streamDict=streamDict)
if filename:
if time_spent < 30.0:
# Need to sleep a little longer, then notify the 30 second download
print "* Waiting remaining seconds to reach 30 ..."
time.sleep(31.0 - time_spent)
print "* Notifying 30 seconds of play."
self._markStreamKeyOver30Seconds(streamDict)
print "* Completed."
return filename
else:
print "* Error, aborting."
if os.path.exists(fname):
os.remove(fname)
def setupClient(self):
# Generally only called by __init__, but can be called manually to create a new session
# Create a single web browsing session to retain cookies, headers, etc.
self.sess = requests.Session()
# Fake our user agent
self.sess.headers.update({'User-Agent': self.user_agent})
# Load the initial page to get a PHP session cookie and a few other configuration settings
_ = self.sess.get(self.client_url)
self.base_html = _.content
# Some of these help fake out Grooveshark so it doesn't know this is a software library.
# Download but ignore /build/app.min.css?####
_ = re.search(r'build/app\.min\.css\?[0-9]+', self.base_html).group()
_ = self.sess.get(self.client_url + _)
# Download but ignore /build/libs.min.js?####
_ = re.search(r'build/libs\.min\.js\?[0-9]+', self.base_html).group()
_ = self.sess.get(self.client_url + _)
# Download and keep /build/app.min.js?####
_ = re.search(r'build/app\.min\.js\?[0-9]+', self.base_html).group()
_ = self.sess.get(self.client_url + _)
self.app_js = _.content
# Exctract the app and base configuration blocks
app_snip = re.search(r'SERVICE_CREATE_TOKEN_FAIL.+?(var .+?lastRandomizer.+?;)', self.app_js).groups()[0]
base_snip = re.search(r'window\.GS\.config.+?(\{.+?\});', self.base_html).groups()[0]
# From app determine: client, clientRevision, revToken
self.client = re.search(r'client:[ ]*"(.+?)"', app_snip).groups()[0]
self.clientRevision = re.search(r'clientRevision:[ ]*"(.+?)"', app_snip).groups()[0]
self.revToken = re.search(r'="([^"]+?)"', app_snip).groups()[0]
# From base determine: privacy, country
self.privacy = re.search(r'"Privacy":[ ]*([0-9]+?)', base_snip).groups()[0]
self.country = re.search(r'"country":[ ]*(\{.+?\})', base_snip).groups()[0]
# From sess determine: session, secretKey (it's present in base, but we're using sess's cookies to do future requests - should be same)
self.session = self.sess.cookies['PHPSESSID']
self.secretKey = hashlib.md5(self.session).hexdigest()
# Generate a UUID
self.uuid = str(uuid.uuid4()).upper()
# Get our communication token
self._getCommunicatonToken()
def search(self, searchStr):
# Song search, in order of best match
try:
return self._getResultsFromSearch(searchStr.replace('"', '\\"'))['result']['result']['Songs']
except:
return []
def download(self, songDict, filepath):
stream_info = self._getStreamKeyFromSongIDEx(songDict)
if not stream_info:
# Song was either removed by Grooveshark or is not available to HTML5/mobile/non-Flash client
return False
# A download is available, download it - but mark it as downloaded first (HTML5 client does this)
_ = self._markSongDownloadedEx(stream_info)
# Then download it :)
download_url = 'http://%s/stream.php?streamKey=%s' % (stream_info['ip'],stream_info['streamKey'])
return self._download(download_url, fname=filepath, streamDict=stream_info)
def _getCommunicatonToken(self):
# Use the secure client for the token
gCT_json = '{"header":{"client":"%s","clientRevision":"%s","privacy":%s,"country":%s,"uuid":"%s","session":"%s"},"method":"getCommunicationToken","parameters":{"secretKey":"%s"}}'
_ = self.sess.post(self.s_client_url + '/more.php?getCommunicationToken', data=gCT_json % (self.client, self.clientRevision, self.privacy, self.country, self.uuid, self.session, self.secretKey))
# Use json lib to future proof against newer versions of requests
self.token = json.loads(_.content)['result']
def _prepToken(self, method):
rnd = hashlib.md5(str(random.random())).hexdigest()[:6]
return rnd + hashlib.sha1(':'.join([method, self.token, self.revToken, rnd])).hexdigest()
def _buildAPIcall(self, method, parameters):
core_msg = '{"header":{"client":"%s","clientRevision":"%s","privacy":%s,"country":%s,"uuid":"%s","session":"%s","token":"%s"},"method":"%s","parameters":%s}'
return core_msg % (self.client, self.clientRevision, self.privacy, self.country, self.uuid, self.session, self._prepToken(method), method, parameters)
def _doAPIcall(self, method, parameters):
_ = self.sess.post(self.client_url + 'more.php?%s' % method, data=self._buildAPIcall(method, parameters))
return _.content
def _getResultsFromSearch(self, searchStr):
params = '{"query":"%s","type":["Songs","Playlists","Albums"],"guts":0,"ppOverride":""}'
return json.loads(self._doAPIcall('getResultsFromSearch', params % searchStr))
def _getStreamKeyFromSongIDEx(self, songDict):
params = '{"prefetch":false,"mobile":true,"songID":%s,"country":%s}'
return json.loads(self._doAPIcall('getStreamKeyFromSongIDEx', params % (songDict['SongID'], self.country)))['result']
def _markSongDownloadedEx(self, streamDict):
# streamKey, streamServerID, songID
params = '{"streamKey":"%s","streamServerID":%s,"songID":%s}'
return json.loads(self._doAPIcall('markSongDownloadedEx', params % (streamDict['streamKey'],streamDict['streamServerID'],streamDict['SongID'])))['result']
def _markStreamKeyOver30Seconds(self, streamDict):
params = '{"streamKey":"%s","streamServerID":%s,"songID":%s}'
return json.loads(self._doAPIcall('markStreamKeyOver30Seconds', params % (streamDict['streamKey'],streamDict['streamServerID'],streamDict['SongID'])))['result']
# iDownloads
def sanitize(filename):
safe = string.letters + string.digits + "()-.,_+{}'"
return ''.join([['_',x][x in safe] for x in filename])
def do_search_and_download(gsClient):
print "\nEnter your search:"
searchStr = raw_input('> ').strip()
if not searchStr:
print "* Cancelled."
return
print "* Searching ..."
results = gsClient.search(searchStr)
top8 = results[:8]
if not top8:
print "! No results found for:", searchStr
return
print "* Found, enter the number for download:"
for i,x in enumerate(top8):
print "%s) %s - %s - %s" % (i+1, x['SongName'], x['ArtistName'], x['AlbumName'])
print '0) Cancel'
choice = ''.join([x for x in raw_input('> ').strip() if x in '0123456789'])
if (choice in ['0','']):
print "* Cancelled."
return
else:
nChoice = int(choice) - 1
cDict = top8[nChoice]
print "*Downloading: %s" % cDict['SongName']
fname = "gs_dl/" + sanitize("%s - %s - %s.mp3" % (cDict['ArtistName'], cDict['AlbumName'],cDict['SongName']))
if not os.path.exists('gs_dl'):
os.makedirs('gs_dl')
if gsClient.download(cDict, fname) == False:
print "* Song not available (removed or not for mobile)."
return
# Prep webserver
global ready_to_stop, did_download
ready_to_stop = False
did_download = False
port = 8000
handler = SmarterHTTPRequestHandler
# Configure transfer settings
handler.file_name = fname
httpd = SmarterHTTPD(("", port), handler, False)
httpd.allow_reuse_address = True
httpd.server_bind()
httpd.server_activate()
download_url = 'http://127.0.0.1:8000/transfer'
download_url = download_url.replace('http://', 'iDownloads://')
print '* Transferring to browser ...'
webbrowser.open(download_url)
# print download_url
httpd.serve_limited(timeout=3,max_requests=8)
httpd.release()
if did_download:
print '* Transfer complete, deleting local copy.'
else:
print '* Transfer did not complete, deleting local copy.'
try:
os.remove(fname)
except:
_ = False
return
def _unzip(a_zip=None, path='.', altpath='unzipped'):
if a_zip is None:
return
filename = os.path.abspath(a_zip)
if not os.path.isfile(filename):
return
# PK magic marker check
f = open(filename, 'rb')
try:
pk_check = f.read(2)
except Exception:
pk_check = ''
finally:
f.close()
if pk_check != 'PK':
print "unzip: %s: does not appear to be a zip file" % a_zip
else:
altpath = os.path.join(os.path.dirname(filename), altpath)
location = os.path.abspath(altpath)
if not os.path.exists(location):
os.makedirs(location)
zipfp = open(filename, 'rb')
try:
zipf = zipfile.ZipFile(zipfp)
# check for a leading directory common to all files and remove it
dirnames = [os.path.join(os.path.dirname(x), '') for x in zipf.namelist()]
common_dir = os.path.commonprefix(dirnames or ['/'])
# Check to make sure there aren't 2 or more sub directories with the same prefix
if not common_dir.endswith('/'):
common_dir = os.path.join(os.path.dirname(common_dir), '')
for name in zipf.namelist():
data = zipf.read(name)
fn = name
if common_dir:
if fn.startswith(common_dir):
fn = fn.split(common_dir, 1)[-1]
elif fn.startswith('/' + common_dir):
fn = fn.split('/' + common_dir, 1)[-1]
fn = fn.lstrip('/')
fn = os.path.join(location, fn)
dirf = os.path.dirname(fn)
if not os.path.exists(dirf):
os.makedirs(dirf)
if fn.endswith('/'):
# A directory
if not os.path.exists(fn):
os.makedirs(fn)
else:
fp = open(fn, 'wb')
try:
fp.write(data)
finally:
fp.close()
except Exception:
zipfp.close()
print "unzip: %s: zip file is corrupt" % a_zip
return
zipfp.close()
return os.path.abspath(location)
def req12_setup():
import requests as old_req
relative_dir = os.path.abspath(os.path.dirname(__file__))
curdir = os.getcwd()
os.chdir(relative_dir)
print '!!! requests-1.2.0 not installed, downloading rev.d06908d ...'
zip_url = 'https://github.com/kennethreitz/requests/archive/v1.2.0.zip'
print ' * Downloading: requests_1.2.0.zip (565KB)...'
f = open('requests_1.2.0.zip', 'wb')
try:
f.write(old_req.get(zip_url).content)
except Exception:
sys.exc_clear()
f.close()
# Unload built-in requests module
del old_req
print "!!! zip downloaded, extracting ..."
try:
shutil.rmtree('requests_zip', ignore_errors=True)
except Exception:
sys.exc_clear()
_ = _unzip('requests_1.2.0.zip', altpath='requests_zip')
print "!!! Extraction complete, re-arranging ..."
try:
shutil.rmtree('requests_1_2', ignore_errors=True)
except Exception:
sys.exc_clear()
os.rename('requests_zip/requests', 'requests_1_2')
print "!!! Re-arranging complete, cleaning up ..."
try:
os.remove('requests_1.2.0.zip')
shutil.rmtree('requests_zip', ignore_errors=True)
except Exception:
sys.exc_clear()
os.chdir(curdir)
def main():
global requests
# Ensure that the requests v.1.2.0 module is available
requests = None
init_tries = 3
while init_tries > 0:
try:
import requests_1_2 as requests
init_tries = 0
except:
print "!!! Init failure %s of 3 ..." % (4 - init_tries)
sys.exc_clear()
req12_setup()
init_tries -= 1
if not requests:
print '!!! Please check your network connection and try again.'
return
try:
gsc = pyGroovClient()
except Exception:
sys.exc_clear()
print "* Error initializing GS client, make sure you're online."
return
console.clear()
print "* Client successfully initialized"
loop = True
while loop:
print "Enter a menu number choice:"
print "--------------------------"
print "1) Search and download"
print "0) Quit"
choice = ''.join([x for x in raw_input('> ').strip() if x in '0123456789'])
if (choice in ['0','']):
print "* Quit."
loop = False
elif (choice in ['1']):
do_search_and_download(gsc)
else:
print "* Unknown choice number, try again."
print ""
if __name__ == "__main__":
main()
@cclauss

This comment has been minimized.

Copy link

@cclauss cclauss commented Oct 11, 2013

http://omz-software.com/pythonista/forums/discussion/293/grooveshark-downloader-enjoy-#Item_13 is a report of some problems reported because of changes to GS web pages.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.