soap-DEIM/yyets.py

## yyets.py
import re
import urllib
from HTMLParser import HTMLParser

# configure area
# set search keyword and format

# { 'name' : [episode_list] }
keyword_list = {'blacklist':[8], 'Marvels Agents of S.H.I.E.L.D.':[6]}
# fmt should be one of following: "MP4","WEB-DL","HDTV","1080P","720P"
fmt_list = ['1080P']
# { 'name' : 'resource_url' }
url_res = {
            'blacklist':'http://www.yyets.com/resource/29964',
            'Marvels Agents of S.H.I.E.L.D.':'http://www.yyets.com/resource/30675'
            }

# end configure


class ThunderParser(HTMLParser):

    def __init__(self, fmt, episode):
        HTMLParser.__init__(self)
        self.found_episode = False
        self.found_fmt = False
        self.found_sub = False
        self.fmt = fmt
        self.episode = episode

    def handle_starttag(self, tag, attrs):
        if tag == 'li':
            for name, value in attrs:
                if name == 'episode':
                    for e in self.episode:
                        if value == str(e):
                            self.found_episode = True
                if name == 'format':
                    for f in self.fmt:
                        if value == str(f):
                            self.found_fmt = True
        if self.found_episode and self.found_fmt and tag == 'a':
            for name, value in attrs:
                if name == 'title' or name == 'thunderhref':
                    print value + '\n'
                if name == 'class' and value == 'subtitle':
                    self.found_sub = True
                if self.found_sub and name == 'href':
                    url_sub = value

    def handle_endtag(self, tag):
        if tag == 'li':
            self.found_episode = False
            self.found_fmt = False
        if tag == 'a':
            self.found_sub = False


class SubParser(HTMLParser):

    def __init__(self, fmt, episode):
        HTMLParser.__init__(self)
        self.found_name = False
        self.found_link = False
        self.found_fmt = False
        self.episode = episode
        self.fmt = fmt
        self.id = 0

    def handle_starttag(self, tag, attrs):
        if tag == 'strong':
            for name, value in attrs:
                if name == 'class' and value == 'f14 list_title':
                    self.found_name = True
        if tag == 'div':
            for name, value in attrs:
                if name == 'class' and value == 'all_search_li2':
                    self.found_link = True
        if self.found_link and tag == 'a':
            for name, value in attrs:
                if name == 'href':
                    self.id = value.split('/')[-1]

    def handle_data(self, data):
        if self.found_name:
            for f in self.fmt:
                if re.search(f, data, re.IGNORECASE) != None:
                    for e in self.episode:
                        if re.search(str(e), data) != None:
                            self.found_fmt = True
                            print data

    def handle_endtag(self, tag):
        if tag == 'strong':
            self.found_name = False
        if tag == 'div':
            if self.found_link and self.found_fmt:
                sub_list.append("http://www.yyets.com/subtitle/index/download?id=" + self.id)
            self.found_link = False
            self.found_fmt = False


# sub downloader
def download(file_name, url):
    u = urllib.urlopen(url)
    f = open(file_name, 'wb')
    meta = u.info()
    file_size = int(meta.getheaders("Content-Length")[0])
    print "Downloading: %s Bytes: %s" % (file_name, file_size)

    file_size_dl = 0
    block_sz = 8192

    while True:
        buffer = u.read(block_sz)
        if not buffer:
            break

        file_size_dl += len(buffer)
        f.write(buffer)
        status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
        status = status + chr(8)*(len(status)+1)
        print status

    f.close()


for key in keyword_list.keys():

    sub_list = []

    #for e in keyword_list[key]:
    print 'Searching ' + key + '\n'
    # search for thunder download link
    print '[Thunder Links]'
    tp = ThunderParser(fmt_list, keyword_list[key])
    ht = urllib.urlopen(url_res[key]).read()
    tp.feed(ht)

    # search for subtitle
    print '\n[Downloading Subtitle...]'
    sp = SubParser(fmt_list, keyword_list[key])
    ht = urllib.urlopen('http://www.yyets.com/search/index?keyword=' + key + '&order=uptime').read()
    sp.feed(ht)
    for i in sub_list:
        print i
        #download(i)

    print '\n\n'
	import re
	import urllib
	from HTMLParser import HTMLParser

	# configure area
	# set search keyword and format

	# { 'name' : [episode_list] }
	keyword_list = {'blacklist':[8], 'Marvels Agents of S.H.I.E.L.D.':[6]}
	# fmt should be one of following: "MP4","WEB-DL","HDTV","1080P","720P"
	fmt_list = ['1080P']
	# { 'name' : 'resource_url' }
	url_res = {
	'blacklist':'http://www.yyets.com/resource/29964',
	'Marvels Agents of S.H.I.E.L.D.':'http://www.yyets.com/resource/30675'
	}

	# end configure


	class ThunderParser(HTMLParser):

	def __init__(self, fmt, episode):
	HTMLParser.__init__(self)
	self.found_episode = False
	self.found_fmt = False
	self.found_sub = False
	self.fmt = fmt
	self.episode = episode

	def handle_starttag(self, tag, attrs):
	if tag == 'li':
	for name, value in attrs:
	if name == 'episode':
	for e in self.episode:
	if value == str(e):
	self.found_episode = True
	if name == 'format':
	for f in self.fmt:
	if value == str(f):
	self.found_fmt = True
	if self.found_episode and self.found_fmt and tag == 'a':
	for name, value in attrs:
	if name == 'title' or name == 'thunderhref':
	print value + '\n'
	if name == 'class' and value == 'subtitle':
	self.found_sub = True
	if self.found_sub and name == 'href':
	url_sub = value

	def handle_endtag(self, tag):
	if tag == 'li':
	self.found_episode = False
	self.found_fmt = False
	if tag == 'a':
	self.found_sub = False


	class SubParser(HTMLParser):

	def __init__(self, fmt, episode):
	HTMLParser.__init__(self)
	self.found_name = False
	self.found_link = False
	self.found_fmt = False
	self.episode = episode
	self.fmt = fmt
	self.id = 0

	def handle_starttag(self, tag, attrs):
	if tag == 'strong':
	for name, value in attrs:
	if name == 'class' and value == 'f14 list_title':
	self.found_name = True
	if tag == 'div':
	for name, value in attrs:
	if name == 'class' and value == 'all_search_li2':
	self.found_link = True
	if self.found_link and tag == 'a':
	for name, value in attrs:
	if name == 'href':
	self.id = value.split('/')[-1]

	def handle_data(self, data):
	if self.found_name:
	for f in self.fmt:
	if re.search(f, data, re.IGNORECASE) != None:
	for e in self.episode:
	if re.search(str(e), data) != None:
	self.found_fmt = True
	print data

	def handle_endtag(self, tag):
	if tag == 'strong':
	self.found_name = False
	if tag == 'div':
	if self.found_link and self.found_fmt:
	sub_list.append("http://www.yyets.com/subtitle/index/download?id=" + self.id)
	self.found_link = False
	self.found_fmt = False


	# sub downloader
	def download(file_name, url):
	u = urllib.urlopen(url)
	f = open(file_name, 'wb')
	meta = u.info()
	file_size = int(meta.getheaders("Content-Length")[0])
	print "Downloading: %s Bytes: %s" % (file_name, file_size)

	file_size_dl = 0
	block_sz = 8192

	while True:
	buffer = u.read(block_sz)
	if not buffer:
	break

	file_size_dl += len(buffer)
	f.write(buffer)
	status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
	status = status + chr(8)*(len(status)+1)
	print status

	f.close()



	for key in keyword_list.keys():

	sub_list = []

	#for e in keyword_list[key]:
	print 'Searching ' + key + '\n'
	# search for thunder download link
	print '[Thunder Links]'
	tp = ThunderParser(fmt_list, keyword_list[key])
	ht = urllib.urlopen(url_res[key]).read()
	tp.feed(ht)

	# search for subtitle
	print '\n[Downloading Subtitle...]'
	sp = SubParser(fmt_list, keyword_list[key])
	ht = urllib.urlopen('http://www.yyets.com/search/index?keyword=' + key + '&order=uptime').read()
	sp.feed(ht)
	for i in sub_list:
	print i
	#download(i)

	print '\n\n'