Created
December 4, 2013 21:01
-
-
Save soap-DEIM/7795468 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import urllib | |
from HTMLParser import HTMLParser | |
# configure area | |
# set search keyword and format | |
# { 'name' : [episode_list] } | |
keyword_list = {'blacklist':[8], 'Marvels Agents of S.H.I.E.L.D.':[6]} | |
# fmt should be one of following: "MP4","WEB-DL","HDTV","1080P","720P" | |
fmt_list = ['1080P'] | |
# { 'name' : 'resource_url' } | |
url_res = { | |
'blacklist':'http://www.yyets.com/resource/29964', | |
'Marvels Agents of S.H.I.E.L.D.':'http://www.yyets.com/resource/30675' | |
} | |
# end configure | |
class ThunderParser(HTMLParser): | |
def __init__(self, fmt, episode): | |
HTMLParser.__init__(self) | |
self.found_episode = False | |
self.found_fmt = False | |
self.found_sub = False | |
self.fmt = fmt | |
self.episode = episode | |
def handle_starttag(self, tag, attrs): | |
if tag == 'li': | |
for name, value in attrs: | |
if name == 'episode': | |
for e in self.episode: | |
if value == str(e): | |
self.found_episode = True | |
if name == 'format': | |
for f in self.fmt: | |
if value == str(f): | |
self.found_fmt = True | |
if self.found_episode and self.found_fmt and tag == 'a': | |
for name, value in attrs: | |
if name == 'title' or name == 'thunderhref': | |
print value + '\n' | |
if name == 'class' and value == 'subtitle': | |
self.found_sub = True | |
if self.found_sub and name == 'href': | |
url_sub = value | |
def handle_endtag(self, tag): | |
if tag == 'li': | |
self.found_episode = False | |
self.found_fmt = False | |
if tag == 'a': | |
self.found_sub = False | |
class SubParser(HTMLParser): | |
def __init__(self, fmt, episode): | |
HTMLParser.__init__(self) | |
self.found_name = False | |
self.found_link = False | |
self.found_fmt = False | |
self.episode = episode | |
self.fmt = fmt | |
self.id = 0 | |
def handle_starttag(self, tag, attrs): | |
if tag == 'strong': | |
for name, value in attrs: | |
if name == 'class' and value == 'f14 list_title': | |
self.found_name = True | |
if tag == 'div': | |
for name, value in attrs: | |
if name == 'class' and value == 'all_search_li2': | |
self.found_link = True | |
if self.found_link and tag == 'a': | |
for name, value in attrs: | |
if name == 'href': | |
self.id = value.split('/')[-1] | |
def handle_data(self, data): | |
if self.found_name: | |
for f in self.fmt: | |
if re.search(f, data, re.IGNORECASE) != None: | |
for e in self.episode: | |
if re.search(str(e), data) != None: | |
self.found_fmt = True | |
print data | |
def handle_endtag(self, tag): | |
if tag == 'strong': | |
self.found_name = False | |
if tag == 'div': | |
if self.found_link and self.found_fmt: | |
sub_list.append("http://www.yyets.com/subtitle/index/download?id=" + self.id) | |
self.found_link = False | |
self.found_fmt = False | |
# sub downloader | |
def download(file_name, url): | |
u = urllib.urlopen(url) | |
f = open(file_name, 'wb') | |
meta = u.info() | |
file_size = int(meta.getheaders("Content-Length")[0]) | |
print "Downloading: %s Bytes: %s" % (file_name, file_size) | |
file_size_dl = 0 | |
block_sz = 8192 | |
while True: | |
buffer = u.read(block_sz) | |
if not buffer: | |
break | |
file_size_dl += len(buffer) | |
f.write(buffer) | |
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) | |
status = status + chr(8)*(len(status)+1) | |
print status | |
f.close() | |
for key in keyword_list.keys(): | |
sub_list = [] | |
#for e in keyword_list[key]: | |
print 'Searching ' + key + '\n' | |
# search for thunder download link | |
print '[Thunder Links]' | |
tp = ThunderParser(fmt_list, keyword_list[key]) | |
ht = urllib.urlopen(url_res[key]).read() | |
tp.feed(ht) | |
# search for subtitle | |
print '\n[Downloading Subtitle...]' | |
sp = SubParser(fmt_list, keyword_list[key]) | |
ht = urllib.urlopen('http://www.yyets.com/search/index?keyword=' + key + '&order=uptime').read() | |
sp.feed(ht) | |
for i in sub_list: | |
print i | |
#download(i) | |
print '\n\n' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment