soap-DEIM/subtitle_checker.py

## subtitle_checker.py
import re
import urllib
from HTMLParser import HTMLParser

# configure area
# set search keyword and format

keyword_list = ['blacklist', 'Marvels Agents of S.H.I.E.L.D.']
fmt_list = ['WEB-DL', '720P', '1080P']

# end configure

# fmt should be "WEB-DL" or "HDTV"
class Parser(HTMLParser):

    def __init__(self, fmt):
        HTMLParser.__init__(self)
        self.name = False
        self.fmt = fmt

    def handle_starttag(self, tag, attrs):
        if tag == 'strong':
            for name, value in attrs:
                if name == 'class' and value == 'f14 list_title':
                    self.name = True

    def handle_data(self, data):
        if self.name:
                for f in self.fmt:
                        if re.search(f, data, re.IGNORECASE) != None:
                                print data

    def handle_endtag(self, tag):
        if tag == 'strong':
            self.name = False

p = Parser(fmt_list)
for k in keyword_list:
        k = urllib.quote_plus(k)
        url = 'http://www.yyets.com/search/index?keyword=' + k + '&type=subtitle&order=uptime'
        ht = urllib.urlopen(url).read()
        p.feed(ht)
        print '-----------------------'
	import re
	import urllib
	from HTMLParser import HTMLParser

	# configure area
	# set search keyword and format

	keyword_list = ['blacklist', 'Marvels Agents of S.H.I.E.L.D.']
	fmt_list = ['WEB-DL', '720P', '1080P']

	# end configure

	# fmt should be "WEB-DL" or "HDTV"
	class Parser(HTMLParser):

	def __init__(self, fmt):
	HTMLParser.__init__(self)
	self.name = False
	self.fmt = fmt

	def handle_starttag(self, tag, attrs):
	if tag == 'strong':
	for name, value in attrs:
	if name == 'class' and value == 'f14 list_title':
	self.name = True

	def handle_data(self, data):
	if self.name:
	for f in self.fmt:
	if re.search(f, data, re.IGNORECASE) != None:
	print data

	def handle_endtag(self, tag):
	if tag == 'strong':
	self.name = False

	p = Parser(fmt_list)
	for k in keyword_list:
	k = urllib.quote_plus(k)
	url = 'http://www.yyets.com/search/index?keyword=' + k + '&type=subtitle&order=uptime'
	ht = urllib.urlopen(url).read()
	p.feed(ht)
	print '-----------------------'