Last active
December 17, 2015 11:59
-
-
Save pudquick/5606582 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re, hashlib, uuid, json, random, os, urllib2, os.path, time, sys, SimpleHTTPServer, SocketServer, string, console, webbrowser, shutil, zipfile | |
class SmarterHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): | |
server_version = 'SimpleHTTP/0.6' | |
file_name = '' | |
def do_GET(self): | |
if self.path.startswith('/transfer'): | |
self.get_transfer() | |
else: | |
f = self.send_head() | |
if f: | |
self.copyfile(f, self.wfile) | |
f.close() | |
def get_transfer(self): | |
global did_download | |
try: | |
# Perform the actual file download | |
self.send_response(200) | |
# Content-Disposition: attachment; filename="fname.ext" | |
self.send_header('Content-Disposition', 'attachment; filename="%s"' % (self.file_name.split('/',1)[-1])) | |
self.send_header('Content-Length', '%s' % (os.path.getsize(self.file_name))) | |
self.send_header('Content-Type', 'application/octet-stream') | |
self.end_headers() | |
f = open(self.file_name, 'rb') | |
self.copyfile(f, self.wfile) | |
f.close() | |
except: | |
sys.exc_clear() | |
did_download = True | |
def log_message(self, format, *args): | |
return | |
def finish(self): | |
# Fix for iDownload when it early terminates a transfer to get file details | |
if not self.wfile.closed: | |
try: | |
self.wfile.flush() | |
except: | |
sys.exc_clear() | |
try: | |
self.wfile.close() | |
except: | |
sys.exc_clear() | |
try: | |
self.rfile.close() | |
except: | |
sys.exc_clear() | |
class SmarterHTTPD(SocketServer.ThreadingTCPServer): | |
keep_running = True | |
requests_left = None | |
did_timeout = False | |
def serve_limited(self, timeout=None, max_requests=None): | |
global ready_to_stop | |
self.timeout = timeout | |
if max_requests is None: | |
self.requests_left = None | |
else: | |
self.requests_left = abs(int(max_requests)) | |
self.keep_running = True | |
self.did_timeout = False | |
while self.keep_running: | |
self.handle_request() | |
# print "Request handled." | |
if self.requests_left is not None: | |
self.requests_left -= 1 | |
if self.requests_left <= 0: | |
self.keep_running = False | |
# print "EXIT: HIT MAX REQUESTS" | |
continue | |
if ready_to_stop: | |
self.keep_running = False | |
# print "EXIT: TOLD TO STOP" | |
continue | |
def handle_timeout(self): | |
self.did_timeout = True | |
def handle_error(self, request, client_address): | |
# Overidden, don't care to see any messages | |
return | |
def release(self): | |
try: | |
self.server_close() | |
except Exception: | |
sys.exc_clear() | |
try: | |
self.socket.close() | |
except Exception: | |
sys.exc_clear() | |
class pyGroovClient: | |
def __init__(self): | |
self.client_url = 'http://html5.grooveshark.com/' | |
self.s_client_url = self.client_url.replace('http', 'https') | |
# This user agent is in the top 7% of user agents on the web | |
self.user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31' | |
# Trigger client setup | |
pyGroovClient.setupClient(self) | |
def _chunk_report(self, bytes_so_far, chunk_size, total_size): | |
if (total_size != None): | |
percent = float(bytes_so_far) / total_size | |
percent = round(percent*100, 2) | |
print 'Downloaded %d of %d bytes (%0.2f%%)' % (bytes_so_far, total_size, percent) | |
if bytes_so_far >= total_size: | |
print '' | |
else: | |
print 'Downloaded %d bytes' % (bytes_so_far) | |
def _chunk_read(self, response, chunk_size=32768, report_hook=None, filename=None, streamDict=None): | |
# Delete old file if it's present | |
if os.path.exists(filename): | |
os.remove(filename) | |
# Set up the file handler | |
try: | |
f = open(filename, 'wb') | |
except Exception: | |
print '! Error:', sys.exc_info()[1] | |
raise | |
start_time = time.time() | |
did_hit_30 = False | |
time_so_far = 0 | |
if response.info().has_key('Content-Length'): | |
total_size = response.info().getheader('Content-Length').strip() | |
total_size = int(total_size) | |
else: | |
# No size | |
total_size = None | |
if report_hook: | |
print '* Warning: No total file size available.' | |
bytes_so_far = 0 | |
i = 0 | |
while True: | |
chunk = response.read(chunk_size) | |
bytes_so_far += len(chunk) | |
# Check time, notify when complete | |
now_time = time.time() | |
if not did_hit_30: | |
if abs(now_time - start_time) >= 30: | |
did_hit_30 = True | |
print "* Notifying 30 seconds of play ..." | |
self._markStreamKeyOver30Seconds(streamDict) | |
# Set remaining time to 0 | |
time_so_far = 31.0 | |
if not chunk: | |
break | |
else: | |
f.write(chunk) | |
if not i: | |
report_hook(bytes_so_far, chunk_size, total_size) | |
i = (i+1)%5 | |
if not did_hit_30: | |
# We didn't hit 30 seconds during the download, better let parent method know | |
# For safety, since we know we didn't send the message and don't want a race condition, assume no greater than 29 seconds | |
time_so_far = min(29.0, time.time() - start_time) | |
try: | |
f.close() | |
except: | |
_ = False | |
if bytes_so_far > 0: | |
print '* Saved to:', filename | |
return (os.path.abspath(filename), time_so_far) | |
else: | |
print '* Error: 0 bytes downloaded, not saved.' | |
return (None, time_so_far) | |
def _download(self, src_url, fname='download.mp3', streamDict=None): | |
headers = {'User-Agent': self.user_agent, 'Cookie': 'PHPSESSID=%s' % self.session} | |
print 'INFO: This download will take a *minimum* of 30 seconds, to keep Grooveshark from banning you.' | |
print '* Downloading:', src_url | |
req = urllib2.Request(src_url, headers=headers) | |
response = urllib2.urlopen(req) | |
filename,time_spent = self._chunk_read(response, report_hook=self._chunk_report, filename=fname, streamDict=streamDict) | |
if filename: | |
if time_spent < 30.0: | |
# Need to sleep a little longer, then notify the 30 second download | |
print "* Waiting remaining seconds to reach 30 ..." | |
time.sleep(31.0 - time_spent) | |
print "* Notifying 30 seconds of play." | |
self._markStreamKeyOver30Seconds(streamDict) | |
print "* Completed." | |
return filename | |
else: | |
print "* Error, aborting." | |
if os.path.exists(fname): | |
os.remove(fname) | |
def setupClient(self): | |
# Generally only called by __init__, but can be called manually to create a new session | |
# Create a single web browsing session to retain cookies, headers, etc. | |
self.sess = requests.Session() | |
# Fake our user agent | |
self.sess.headers.update({'User-Agent': self.user_agent}) | |
# Load the initial page to get a PHP session cookie and a few other configuration settings | |
_ = self.sess.get(self.client_url) | |
self.base_html = _.content | |
# Some of these help fake out Grooveshark so it doesn't know this is a software library. | |
# Download but ignore /build/app.min.css?#### | |
_ = re.search(r'build/app\.min\.css\?[0-9]+', self.base_html).group() | |
_ = self.sess.get(self.client_url + _) | |
# Download but ignore /build/libs.min.js?#### | |
_ = re.search(r'build/libs\.min\.js\?[0-9]+', self.base_html).group() | |
_ = self.sess.get(self.client_url + _) | |
# Download and keep /build/app.min.js?#### | |
_ = re.search(r'build/app\.min\.js\?[0-9]+', self.base_html).group() | |
_ = self.sess.get(self.client_url + _) | |
self.app_js = _.content | |
# Exctract the app and base configuration blocks | |
app_snip = re.search(r'SERVICE_CREATE_TOKEN_FAIL.+?(var .+?lastRandomizer.+?;)', self.app_js).groups()[0] | |
base_snip = re.search(r'window\.GS\.config.+?(\{.+?\});', self.base_html).groups()[0] | |
# From app determine: client, clientRevision, revToken | |
self.client = re.search(r'client:[ ]*"(.+?)"', app_snip).groups()[0] | |
self.clientRevision = re.search(r'clientRevision:[ ]*"(.+?)"', app_snip).groups()[0] | |
self.revToken = re.search(r'="([^"]+?)"', app_snip).groups()[0] | |
# From base determine: privacy, country | |
self.privacy = re.search(r'"Privacy":[ ]*([0-9]+?)', base_snip).groups()[0] | |
self.country = re.search(r'"country":[ ]*(\{.+?\})', base_snip).groups()[0] | |
# From sess determine: session, secretKey (it's present in base, but we're using sess's cookies to do future requests - should be same) | |
self.session = self.sess.cookies['PHPSESSID'] | |
self.secretKey = hashlib.md5(self.session).hexdigest() | |
# Generate a UUID | |
self.uuid = str(uuid.uuid4()).upper() | |
# Get our communication token | |
self._getCommunicatonToken() | |
def search(self, searchStr): | |
# Song search, in order of best match | |
try: | |
return self._getResultsFromSearch(searchStr.replace('"', '\\"'))['result']['result']['Songs'] | |
except: | |
return [] | |
def download(self, songDict, filepath): | |
stream_info = self._getStreamKeyFromSongIDEx(songDict) | |
if not stream_info: | |
# Song was either removed by Grooveshark or is not available to HTML5/mobile/non-Flash client | |
return False | |
# A download is available, download it - but mark it as downloaded first (HTML5 client does this) | |
_ = self._markSongDownloadedEx(stream_info) | |
# Then download it :) | |
download_url = 'http://%s/stream.php?streamKey=%s' % (stream_info['ip'],stream_info['streamKey']) | |
return self._download(download_url, fname=filepath, streamDict=stream_info) | |
def _getCommunicatonToken(self): | |
# Use the secure client for the token | |
gCT_json = '{"header":{"client":"%s","clientRevision":"%s","privacy":%s,"country":%s,"uuid":"%s","session":"%s"},"method":"getCommunicationToken","parameters":{"secretKey":"%s"}}' | |
_ = self.sess.post(self.s_client_url + '/more.php?getCommunicationToken', data=gCT_json % (self.client, self.clientRevision, self.privacy, self.country, self.uuid, self.session, self.secretKey)) | |
# Use json lib to future proof against newer versions of requests | |
self.token = json.loads(_.content)['result'] | |
def _prepToken(self, method): | |
rnd = hashlib.md5(str(random.random())).hexdigest()[:6] | |
return rnd + hashlib.sha1(':'.join([method, self.token, self.revToken, rnd])).hexdigest() | |
def _buildAPIcall(self, method, parameters): | |
core_msg = '{"header":{"client":"%s","clientRevision":"%s","privacy":%s,"country":%s,"uuid":"%s","session":"%s","token":"%s"},"method":"%s","parameters":%s}' | |
return core_msg % (self.client, self.clientRevision, self.privacy, self.country, self.uuid, self.session, self._prepToken(method), method, parameters) | |
def _doAPIcall(self, method, parameters): | |
_ = self.sess.post(self.client_url + 'more.php?%s' % method, data=self._buildAPIcall(method, parameters)) | |
return _.content | |
def _getResultsFromSearch(self, searchStr): | |
params = '{"query":"%s","type":["Songs","Playlists","Albums"],"guts":0,"ppOverride":""}' | |
return json.loads(self._doAPIcall('getResultsFromSearch', params % searchStr)) | |
def _getStreamKeyFromSongIDEx(self, songDict): | |
params = '{"prefetch":false,"mobile":true,"songID":%s,"country":%s}' | |
return json.loads(self._doAPIcall('getStreamKeyFromSongIDEx', params % (songDict['SongID'], self.country)))['result'] | |
def _markSongDownloadedEx(self, streamDict): | |
# streamKey, streamServerID, songID | |
params = '{"streamKey":"%s","streamServerID":%s,"songID":%s}' | |
return json.loads(self._doAPIcall('markSongDownloadedEx', params % (streamDict['streamKey'],streamDict['streamServerID'],streamDict['SongID'])))['result'] | |
def _markStreamKeyOver30Seconds(self, streamDict): | |
params = '{"streamKey":"%s","streamServerID":%s,"songID":%s}' | |
return json.loads(self._doAPIcall('markStreamKeyOver30Seconds', params % (streamDict['streamKey'],streamDict['streamServerID'],streamDict['SongID'])))['result'] | |
# iDownloads | |
def sanitize(filename): | |
safe = string.letters + string.digits + "()-.,_+{}'" | |
return ''.join([['_',x][x in safe] for x in filename]) | |
def do_search_and_download(gsClient): | |
print "\nEnter your search:" | |
searchStr = raw_input('> ').strip() | |
if not searchStr: | |
print "* Cancelled." | |
return | |
print "* Searching ..." | |
results = gsClient.search(searchStr) | |
top8 = results[:8] | |
if not top8: | |
print "! No results found for:", searchStr | |
return | |
print "* Found, enter the number for download:" | |
for i,x in enumerate(top8): | |
print "%s) %s - %s - %s" % (i+1, x['SongName'], x['ArtistName'], x['AlbumName']) | |
print '0) Cancel' | |
choice = ''.join([x for x in raw_input('> ').strip() if x in '0123456789']) | |
if (choice in ['0','']): | |
print "* Cancelled." | |
return | |
else: | |
nChoice = int(choice) - 1 | |
cDict = top8[nChoice] | |
print "*Downloading: %s" % cDict['SongName'] | |
fname = "gs_dl/" + sanitize("%s - %s - %s.mp3" % (cDict['ArtistName'], cDict['AlbumName'],cDict['SongName'])) | |
if not os.path.exists('gs_dl'): | |
os.makedirs('gs_dl') | |
if gsClient.download(cDict, fname) == False: | |
print "* Song not available (removed or not for mobile)." | |
return | |
# Prep webserver | |
global ready_to_stop, did_download | |
ready_to_stop = False | |
did_download = False | |
port = 8000 | |
handler = SmarterHTTPRequestHandler | |
# Configure transfer settings | |
handler.file_name = fname | |
httpd = SmarterHTTPD(("", port), handler, False) | |
httpd.allow_reuse_address = True | |
httpd.server_bind() | |
httpd.server_activate() | |
download_url = 'http://127.0.0.1:8000/transfer' | |
download_url = download_url.replace('http://', 'iDownloads://') | |
print '* Transferring to browser ...' | |
webbrowser.open(download_url) | |
# print download_url | |
httpd.serve_limited(timeout=3,max_requests=8) | |
httpd.release() | |
if did_download: | |
print '* Transfer complete, deleting local copy.' | |
else: | |
print '* Transfer did not complete, deleting local copy.' | |
try: | |
os.remove(fname) | |
except: | |
_ = False | |
return | |
def _unzip(a_zip=None, path='.', altpath='unzipped'): | |
if a_zip is None: | |
return | |
filename = os.path.abspath(a_zip) | |
if not os.path.isfile(filename): | |
return | |
# PK magic marker check | |
f = open(filename, 'rb') | |
try: | |
pk_check = f.read(2) | |
except Exception: | |
pk_check = '' | |
finally: | |
f.close() | |
if pk_check != 'PK': | |
print "unzip: %s: does not appear to be a zip file" % a_zip | |
else: | |
altpath = os.path.join(os.path.dirname(filename), altpath) | |
location = os.path.abspath(altpath) | |
if not os.path.exists(location): | |
os.makedirs(location) | |
zipfp = open(filename, 'rb') | |
try: | |
zipf = zipfile.ZipFile(zipfp) | |
# check for a leading directory common to all files and remove it | |
dirnames = [os.path.join(os.path.dirname(x), '') for x in zipf.namelist()] | |
common_dir = os.path.commonprefix(dirnames or ['/']) | |
# Check to make sure there aren't 2 or more sub directories with the same prefix | |
if not common_dir.endswith('/'): | |
common_dir = os.path.join(os.path.dirname(common_dir), '') | |
for name in zipf.namelist(): | |
data = zipf.read(name) | |
fn = name | |
if common_dir: | |
if fn.startswith(common_dir): | |
fn = fn.split(common_dir, 1)[-1] | |
elif fn.startswith('/' + common_dir): | |
fn = fn.split('/' + common_dir, 1)[-1] | |
fn = fn.lstrip('/') | |
fn = os.path.join(location, fn) | |
dirf = os.path.dirname(fn) | |
if not os.path.exists(dirf): | |
os.makedirs(dirf) | |
if fn.endswith('/'): | |
# A directory | |
if not os.path.exists(fn): | |
os.makedirs(fn) | |
else: | |
fp = open(fn, 'wb') | |
try: | |
fp.write(data) | |
finally: | |
fp.close() | |
except Exception: | |
zipfp.close() | |
print "unzip: %s: zip file is corrupt" % a_zip | |
return | |
zipfp.close() | |
return os.path.abspath(location) | |
def req12_setup(): | |
import requests as old_req | |
relative_dir = os.path.abspath(os.path.dirname(__file__)) | |
curdir = os.getcwd() | |
os.chdir(relative_dir) | |
print '!!! requests-1.2.0 not installed, downloading rev.d06908d ...' | |
zip_url = 'https://github.com/kennethreitz/requests/archive/v1.2.0.zip' | |
print ' * Downloading: requests_1.2.0.zip (565KB)...' | |
f = open('requests_1.2.0.zip', 'wb') | |
try: | |
f.write(old_req.get(zip_url).content) | |
except Exception: | |
sys.exc_clear() | |
f.close() | |
# Unload built-in requests module | |
del old_req | |
print "!!! zip downloaded, extracting ..." | |
try: | |
shutil.rmtree('requests_zip', ignore_errors=True) | |
except Exception: | |
sys.exc_clear() | |
_ = _unzip('requests_1.2.0.zip', altpath='requests_zip') | |
print "!!! Extraction complete, re-arranging ..." | |
try: | |
shutil.rmtree('requests_1_2', ignore_errors=True) | |
except Exception: | |
sys.exc_clear() | |
os.rename('requests_zip/requests', 'requests_1_2') | |
print "!!! Re-arranging complete, cleaning up ..." | |
try: | |
os.remove('requests_1.2.0.zip') | |
shutil.rmtree('requests_zip', ignore_errors=True) | |
except Exception: | |
sys.exc_clear() | |
os.chdir(curdir) | |
def main(): | |
global requests | |
# Ensure that the requests v.1.2.0 module is available | |
requests = None | |
init_tries = 3 | |
while init_tries > 0: | |
try: | |
import requests_1_2 as requests | |
init_tries = 0 | |
except: | |
print "!!! Init failure %s of 3 ..." % (4 - init_tries) | |
sys.exc_clear() | |
req12_setup() | |
init_tries -= 1 | |
if not requests: | |
print '!!! Please check your network connection and try again.' | |
return | |
try: | |
gsc = pyGroovClient() | |
except Exception: | |
sys.exc_clear() | |
print "* Error initializing GS client, make sure you're online." | |
return | |
console.clear() | |
print "* Client successfully initialized" | |
loop = True | |
while loop: | |
print "Enter a menu number choice:" | |
print "--------------------------" | |
print "1) Search and download" | |
print "0) Quit" | |
choice = ''.join([x for x in raw_input('> ').strip() if x in '0123456789']) | |
if (choice in ['0','']): | |
print "* Quit." | |
loop = False | |
elif (choice in ['1']): | |
do_search_and_download(gsc) | |
else: | |
print "* Unknown choice number, try again." | |
print "" | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
http://omz-software.com/pythonista/forums/discussion/293/grooveshark-downloader-enjoy-#Item_13 is a report of some problems reported because of changes to GS web pages.