Skip to content

Instantly share code, notes, and snippets.

@pudquick
Last active December 17, 2015 11:59
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pudquick/5606582 to your computer and use it in GitHub Desktop.
Save pudquick/5606582 to your computer and use it in GitHub Desktop.
import re, hashlib, uuid, json, random, os, urllib2, os.path, time, sys, SimpleHTTPServer, SocketServer, string, console, webbrowser, shutil, zipfile
class SmarterHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
server_version = 'SimpleHTTP/0.6'
file_name = ''
def do_GET(self):
if self.path.startswith('/transfer'):
self.get_transfer()
else:
f = self.send_head()
if f:
self.copyfile(f, self.wfile)
f.close()
def get_transfer(self):
global did_download
try:
# Perform the actual file download
self.send_response(200)
# Content-Disposition: attachment; filename="fname.ext"
self.send_header('Content-Disposition', 'attachment; filename="%s"' % (self.file_name.split('/',1)[-1]))
self.send_header('Content-Length', '%s' % (os.path.getsize(self.file_name)))
self.send_header('Content-Type', 'application/octet-stream')
self.end_headers()
f = open(self.file_name, 'rb')
self.copyfile(f, self.wfile)
f.close()
except:
sys.exc_clear()
did_download = True
def log_message(self, format, *args):
return
def finish(self):
# Fix for iDownload when it early terminates a transfer to get file details
if not self.wfile.closed:
try:
self.wfile.flush()
except:
sys.exc_clear()
try:
self.wfile.close()
except:
sys.exc_clear()
try:
self.rfile.close()
except:
sys.exc_clear()
class SmarterHTTPD(SocketServer.ThreadingTCPServer):
keep_running = True
requests_left = None
did_timeout = False
def serve_limited(self, timeout=None, max_requests=None):
global ready_to_stop
self.timeout = timeout
if max_requests is None:
self.requests_left = None
else:
self.requests_left = abs(int(max_requests))
self.keep_running = True
self.did_timeout = False
while self.keep_running:
self.handle_request()
# print "Request handled."
if self.requests_left is not None:
self.requests_left -= 1
if self.requests_left <= 0:
self.keep_running = False
# print "EXIT: HIT MAX REQUESTS"
continue
if ready_to_stop:
self.keep_running = False
# print "EXIT: TOLD TO STOP"
continue
def handle_timeout(self):
self.did_timeout = True
def handle_error(self, request, client_address):
# Overidden, don't care to see any messages
return
def release(self):
try:
self.server_close()
except Exception:
sys.exc_clear()
try:
self.socket.close()
except Exception:
sys.exc_clear()
class pyGroovClient:
def __init__(self):
self.client_url = 'http://html5.grooveshark.com/'
self.s_client_url = self.client_url.replace('http', 'https')
# This user agent is in the top 7% of user agents on the web
self.user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31'
# Trigger client setup
pyGroovClient.setupClient(self)
def _chunk_report(self, bytes_so_far, chunk_size, total_size):
if (total_size != None):
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
print 'Downloaded %d of %d bytes (%0.2f%%)' % (bytes_so_far, total_size, percent)
if bytes_so_far >= total_size:
print ''
else:
print 'Downloaded %d bytes' % (bytes_so_far)
def _chunk_read(self, response, chunk_size=32768, report_hook=None, filename=None, streamDict=None):
# Delete old file if it's present
if os.path.exists(filename):
os.remove(filename)
# Set up the file handler
try:
f = open(filename, 'wb')
except Exception:
print '! Error:', sys.exc_info()[1]
raise
start_time = time.time()
did_hit_30 = False
time_so_far = 0
if response.info().has_key('Content-Length'):
total_size = response.info().getheader('Content-Length').strip()
total_size = int(total_size)
else:
# No size
total_size = None
if report_hook:
print '* Warning: No total file size available.'
bytes_so_far = 0
i = 0
while True:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
# Check time, notify when complete
now_time = time.time()
if not did_hit_30:
if abs(now_time - start_time) >= 30:
did_hit_30 = True
print "* Notifying 30 seconds of play ..."
self._markStreamKeyOver30Seconds(streamDict)
# Set remaining time to 0
time_so_far = 31.0
if not chunk:
break
else:
f.write(chunk)
if not i:
report_hook(bytes_so_far, chunk_size, total_size)
i = (i+1)%5
if not did_hit_30:
# We didn't hit 30 seconds during the download, better let parent method know
# For safety, since we know we didn't send the message and don't want a race condition, assume no greater than 29 seconds
time_so_far = min(29.0, time.time() - start_time)
try:
f.close()
except:
_ = False
if bytes_so_far > 0:
print '* Saved to:', filename
return (os.path.abspath(filename), time_so_far)
else:
print '* Error: 0 bytes downloaded, not saved.'
return (None, time_so_far)
def _download(self, src_url, fname='download.mp3', streamDict=None):
headers = {'User-Agent': self.user_agent, 'Cookie': 'PHPSESSID=%s' % self.session}
print 'INFO: This download will take a *minimum* of 30 seconds, to keep Grooveshark from banning you.'
print '* Downloading:', src_url
req = urllib2.Request(src_url, headers=headers)
response = urllib2.urlopen(req)
filename,time_spent = self._chunk_read(response, report_hook=self._chunk_report, filename=fname, streamDict=streamDict)
if filename:
if time_spent < 30.0:
# Need to sleep a little longer, then notify the 30 second download
print "* Waiting remaining seconds to reach 30 ..."
time.sleep(31.0 - time_spent)
print "* Notifying 30 seconds of play."
self._markStreamKeyOver30Seconds(streamDict)
print "* Completed."
return filename
else:
print "* Error, aborting."
if os.path.exists(fname):
os.remove(fname)
def setupClient(self):
# Generally only called by __init__, but can be called manually to create a new session
# Create a single web browsing session to retain cookies, headers, etc.
self.sess = requests.Session()
# Fake our user agent
self.sess.headers.update({'User-Agent': self.user_agent})
# Load the initial page to get a PHP session cookie and a few other configuration settings
_ = self.sess.get(self.client_url)
self.base_html = _.content
# Some of these help fake out Grooveshark so it doesn't know this is a software library.
# Download but ignore /build/app.min.css?####
_ = re.search(r'build/app\.min\.css\?[0-9]+', self.base_html).group()
_ = self.sess.get(self.client_url + _)
# Download but ignore /build/libs.min.js?####
_ = re.search(r'build/libs\.min\.js\?[0-9]+', self.base_html).group()
_ = self.sess.get(self.client_url + _)
# Download and keep /build/app.min.js?####
_ = re.search(r'build/app\.min\.js\?[0-9]+', self.base_html).group()
_ = self.sess.get(self.client_url + _)
self.app_js = _.content
# Exctract the app and base configuration blocks
app_snip = re.search(r'SERVICE_CREATE_TOKEN_FAIL.+?(var .+?lastRandomizer.+?;)', self.app_js).groups()[0]
base_snip = re.search(r'window\.GS\.config.+?(\{.+?\});', self.base_html).groups()[0]
# From app determine: client, clientRevision, revToken
self.client = re.search(r'client:[ ]*"(.+?)"', app_snip).groups()[0]
self.clientRevision = re.search(r'clientRevision:[ ]*"(.+?)"', app_snip).groups()[0]
self.revToken = re.search(r'="([^"]+?)"', app_snip).groups()[0]
# From base determine: privacy, country
self.privacy = re.search(r'"Privacy":[ ]*([0-9]+?)', base_snip).groups()[0]
self.country = re.search(r'"country":[ ]*(\{.+?\})', base_snip).groups()[0]
# From sess determine: session, secretKey (it's present in base, but we're using sess's cookies to do future requests - should be same)
self.session = self.sess.cookies['PHPSESSID']
self.secretKey = hashlib.md5(self.session).hexdigest()
# Generate a UUID
self.uuid = str(uuid.uuid4()).upper()
# Get our communication token
self._getCommunicatonToken()
def search(self, searchStr):
# Song search, in order of best match
try:
return self._getResultsFromSearch(searchStr.replace('"', '\\"'))['result']['result']['Songs']
except:
return []
def download(self, songDict, filepath):
stream_info = self._getStreamKeyFromSongIDEx(songDict)
if not stream_info:
# Song was either removed by Grooveshark or is not available to HTML5/mobile/non-Flash client
return False
# A download is available, download it - but mark it as downloaded first (HTML5 client does this)
_ = self._markSongDownloadedEx(stream_info)
# Then download it :)
download_url = 'http://%s/stream.php?streamKey=%s' % (stream_info['ip'],stream_info['streamKey'])
return self._download(download_url, fname=filepath, streamDict=stream_info)
def _getCommunicatonToken(self):
# Use the secure client for the token
gCT_json = '{"header":{"client":"%s","clientRevision":"%s","privacy":%s,"country":%s,"uuid":"%s","session":"%s"},"method":"getCommunicationToken","parameters":{"secretKey":"%s"}}'
_ = self.sess.post(self.s_client_url + '/more.php?getCommunicationToken', data=gCT_json % (self.client, self.clientRevision, self.privacy, self.country, self.uuid, self.session, self.secretKey))
# Use json lib to future proof against newer versions of requests
self.token = json.loads(_.content)['result']
def _prepToken(self, method):
rnd = hashlib.md5(str(random.random())).hexdigest()[:6]
return rnd + hashlib.sha1(':'.join([method, self.token, self.revToken, rnd])).hexdigest()
def _buildAPIcall(self, method, parameters):
core_msg = '{"header":{"client":"%s","clientRevision":"%s","privacy":%s,"country":%s,"uuid":"%s","session":"%s","token":"%s"},"method":"%s","parameters":%s}'
return core_msg % (self.client, self.clientRevision, self.privacy, self.country, self.uuid, self.session, self._prepToken(method), method, parameters)
def _doAPIcall(self, method, parameters):
_ = self.sess.post(self.client_url + 'more.php?%s' % method, data=self._buildAPIcall(method, parameters))
return _.content
def _getResultsFromSearch(self, searchStr):
params = '{"query":"%s","type":["Songs","Playlists","Albums"],"guts":0,"ppOverride":""}'
return json.loads(self._doAPIcall('getResultsFromSearch', params % searchStr))
def _getStreamKeyFromSongIDEx(self, songDict):
params = '{"prefetch":false,"mobile":true,"songID":%s,"country":%s}'
return json.loads(self._doAPIcall('getStreamKeyFromSongIDEx', params % (songDict['SongID'], self.country)))['result']
def _markSongDownloadedEx(self, streamDict):
# streamKey, streamServerID, songID
params = '{"streamKey":"%s","streamServerID":%s,"songID":%s}'
return json.loads(self._doAPIcall('markSongDownloadedEx', params % (streamDict['streamKey'],streamDict['streamServerID'],streamDict['SongID'])))['result']
def _markStreamKeyOver30Seconds(self, streamDict):
params = '{"streamKey":"%s","streamServerID":%s,"songID":%s}'
return json.loads(self._doAPIcall('markStreamKeyOver30Seconds', params % (streamDict['streamKey'],streamDict['streamServerID'],streamDict['SongID'])))['result']
# iDownloads
def sanitize(filename):
safe = string.letters + string.digits + "()-.,_+{}'"
return ''.join([['_',x][x in safe] for x in filename])
def do_search_and_download(gsClient):
print "\nEnter your search:"
searchStr = raw_input('> ').strip()
if not searchStr:
print "* Cancelled."
return
print "* Searching ..."
results = gsClient.search(searchStr)
top8 = results[:8]
if not top8:
print "! No results found for:", searchStr
return
print "* Found, enter the number for download:"
for i,x in enumerate(top8):
print "%s) %s - %s - %s" % (i+1, x['SongName'], x['ArtistName'], x['AlbumName'])
print '0) Cancel'
choice = ''.join([x for x in raw_input('> ').strip() if x in '0123456789'])
if (choice in ['0','']):
print "* Cancelled."
return
else:
nChoice = int(choice) - 1
cDict = top8[nChoice]
print "*Downloading: %s" % cDict['SongName']
fname = "gs_dl/" + sanitize("%s - %s - %s.mp3" % (cDict['ArtistName'], cDict['AlbumName'],cDict['SongName']))
if not os.path.exists('gs_dl'):
os.makedirs('gs_dl')
if gsClient.download(cDict, fname) == False:
print "* Song not available (removed or not for mobile)."
return
# Prep webserver
global ready_to_stop, did_download
ready_to_stop = False
did_download = False
port = 8000
handler = SmarterHTTPRequestHandler
# Configure transfer settings
handler.file_name = fname
httpd = SmarterHTTPD(("", port), handler, False)
httpd.allow_reuse_address = True
httpd.server_bind()
httpd.server_activate()
download_url = 'http://127.0.0.1:8000/transfer'
download_url = download_url.replace('http://', 'iDownloads://')
print '* Transferring to browser ...'
webbrowser.open(download_url)
# print download_url
httpd.serve_limited(timeout=3,max_requests=8)
httpd.release()
if did_download:
print '* Transfer complete, deleting local copy.'
else:
print '* Transfer did not complete, deleting local copy.'
try:
os.remove(fname)
except:
_ = False
return
def _unzip(a_zip=None, path='.', altpath='unzipped'):
if a_zip is None:
return
filename = os.path.abspath(a_zip)
if not os.path.isfile(filename):
return
# PK magic marker check
f = open(filename, 'rb')
try:
pk_check = f.read(2)
except Exception:
pk_check = ''
finally:
f.close()
if pk_check != 'PK':
print "unzip: %s: does not appear to be a zip file" % a_zip
else:
altpath = os.path.join(os.path.dirname(filename), altpath)
location = os.path.abspath(altpath)
if not os.path.exists(location):
os.makedirs(location)
zipfp = open(filename, 'rb')
try:
zipf = zipfile.ZipFile(zipfp)
# check for a leading directory common to all files and remove it
dirnames = [os.path.join(os.path.dirname(x), '') for x in zipf.namelist()]
common_dir = os.path.commonprefix(dirnames or ['/'])
# Check to make sure there aren't 2 or more sub directories with the same prefix
if not common_dir.endswith('/'):
common_dir = os.path.join(os.path.dirname(common_dir), '')
for name in zipf.namelist():
data = zipf.read(name)
fn = name
if common_dir:
if fn.startswith(common_dir):
fn = fn.split(common_dir, 1)[-1]
elif fn.startswith('/' + common_dir):
fn = fn.split('/' + common_dir, 1)[-1]
fn = fn.lstrip('/')
fn = os.path.join(location, fn)
dirf = os.path.dirname(fn)
if not os.path.exists(dirf):
os.makedirs(dirf)
if fn.endswith('/'):
# A directory
if not os.path.exists(fn):
os.makedirs(fn)
else:
fp = open(fn, 'wb')
try:
fp.write(data)
finally:
fp.close()
except Exception:
zipfp.close()
print "unzip: %s: zip file is corrupt" % a_zip
return
zipfp.close()
return os.path.abspath(location)
def req12_setup():
import requests as old_req
relative_dir = os.path.abspath(os.path.dirname(__file__))
curdir = os.getcwd()
os.chdir(relative_dir)
print '!!! requests-1.2.0 not installed, downloading rev.d06908d ...'
zip_url = 'https://github.com/kennethreitz/requests/archive/v1.2.0.zip'
print ' * Downloading: requests_1.2.0.zip (565KB)...'
f = open('requests_1.2.0.zip', 'wb')
try:
f.write(old_req.get(zip_url).content)
except Exception:
sys.exc_clear()
f.close()
# Unload built-in requests module
del old_req
print "!!! zip downloaded, extracting ..."
try:
shutil.rmtree('requests_zip', ignore_errors=True)
except Exception:
sys.exc_clear()
_ = _unzip('requests_1.2.0.zip', altpath='requests_zip')
print "!!! Extraction complete, re-arranging ..."
try:
shutil.rmtree('requests_1_2', ignore_errors=True)
except Exception:
sys.exc_clear()
os.rename('requests_zip/requests', 'requests_1_2')
print "!!! Re-arranging complete, cleaning up ..."
try:
os.remove('requests_1.2.0.zip')
shutil.rmtree('requests_zip', ignore_errors=True)
except Exception:
sys.exc_clear()
os.chdir(curdir)
def main():
global requests
# Ensure that the requests v.1.2.0 module is available
requests = None
init_tries = 3
while init_tries > 0:
try:
import requests_1_2 as requests
init_tries = 0
except:
print "!!! Init failure %s of 3 ..." % (4 - init_tries)
sys.exc_clear()
req12_setup()
init_tries -= 1
if not requests:
print '!!! Please check your network connection and try again.'
return
try:
gsc = pyGroovClient()
except Exception:
sys.exc_clear()
print "* Error initializing GS client, make sure you're online."
return
console.clear()
print "* Client successfully initialized"
loop = True
while loop:
print "Enter a menu number choice:"
print "--------------------------"
print "1) Search and download"
print "0) Quit"
choice = ''.join([x for x in raw_input('> ').strip() if x in '0123456789'])
if (choice in ['0','']):
print "* Quit."
loop = False
elif (choice in ['1']):
do_search_and_download(gsc)
else:
print "* Unknown choice number, try again."
print ""
if __name__ == "__main__":
main()
@cclauss
Copy link

cclauss commented Oct 11, 2013

http://omz-software.com/pythonista/forums/discussion/293/grooveshark-downloader-enjoy-#Item_13 is a report of some problems reported because of changes to GS web pages.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment