Skip to content

Instantly share code, notes, and snippets.

@soap-DEIM
Created December 4, 2013 21:01
Show Gist options
  • Save soap-DEIM/7795468 to your computer and use it in GitHub Desktop.
Save soap-DEIM/7795468 to your computer and use it in GitHub Desktop.
import re
import urllib
from HTMLParser import HTMLParser
# configure area
# set search keyword and format
# { 'name' : [episode_list] }
keyword_list = {'blacklist':[8], 'Marvels Agents of S.H.I.E.L.D.':[6]}
# fmt should be one of following: "MP4","WEB-DL","HDTV","1080P","720P"
fmt_list = ['1080P']
# { 'name' : 'resource_url' }
url_res = {
'blacklist':'http://www.yyets.com/resource/29964',
'Marvels Agents of S.H.I.E.L.D.':'http://www.yyets.com/resource/30675'
}
# end configure
class ThunderParser(HTMLParser):
def __init__(self, fmt, episode):
HTMLParser.__init__(self)
self.found_episode = False
self.found_fmt = False
self.found_sub = False
self.fmt = fmt
self.episode = episode
def handle_starttag(self, tag, attrs):
if tag == 'li':
for name, value in attrs:
if name == 'episode':
for e in self.episode:
if value == str(e):
self.found_episode = True
if name == 'format':
for f in self.fmt:
if value == str(f):
self.found_fmt = True
if self.found_episode and self.found_fmt and tag == 'a':
for name, value in attrs:
if name == 'title' or name == 'thunderhref':
print value + '\n'
if name == 'class' and value == 'subtitle':
self.found_sub = True
if self.found_sub and name == 'href':
url_sub = value
def handle_endtag(self, tag):
if tag == 'li':
self.found_episode = False
self.found_fmt = False
if tag == 'a':
self.found_sub = False
class SubParser(HTMLParser):
def __init__(self, fmt, episode):
HTMLParser.__init__(self)
self.found_name = False
self.found_link = False
self.found_fmt = False
self.episode = episode
self.fmt = fmt
self.id = 0
def handle_starttag(self, tag, attrs):
if tag == 'strong':
for name, value in attrs:
if name == 'class' and value == 'f14 list_title':
self.found_name = True
if tag == 'div':
for name, value in attrs:
if name == 'class' and value == 'all_search_li2':
self.found_link = True
if self.found_link and tag == 'a':
for name, value in attrs:
if name == 'href':
self.id = value.split('/')[-1]
def handle_data(self, data):
if self.found_name:
for f in self.fmt:
if re.search(f, data, re.IGNORECASE) != None:
for e in self.episode:
if re.search(str(e), data) != None:
self.found_fmt = True
print data
def handle_endtag(self, tag):
if tag == 'strong':
self.found_name = False
if tag == 'div':
if self.found_link and self.found_fmt:
sub_list.append("http://www.yyets.com/subtitle/index/download?id=" + self.id)
self.found_link = False
self.found_fmt = False
# sub downloader
def download(file_name, url):
u = urllib.urlopen(url)
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
print "Downloading: %s Bytes: %s" % (file_name, file_size)
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status
f.close()
for key in keyword_list.keys():
sub_list = []
#for e in keyword_list[key]:
print 'Searching ' + key + '\n'
# search for thunder download link
print '[Thunder Links]'
tp = ThunderParser(fmt_list, keyword_list[key])
ht = urllib.urlopen(url_res[key]).read()
tp.feed(ht)
# search for subtitle
print '\n[Downloading Subtitle...]'
sp = SubParser(fmt_list, keyword_list[key])
ht = urllib.urlopen('http://www.yyets.com/search/index?keyword=' + key + '&order=uptime').read()
sp.feed(ht)
for i in sub_list:
print i
#download(i)
print '\n\n'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment