Skip to content

Instantly share code, notes, and snippets.

@6LYTH3
Last active August 29, 2015 14:06
Show Gist options
  • Save 6LYTH3/a40d51ad5e5aeebf396b to your computer and use it in GitHub Desktop.
Save 6LYTH3/a40d51ad5e5aeebf396b to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import urllib2
from HTMLParser import HTMLParser
class Find(HTMLParser):
__dataTable = 0
__data = []
__recording = False
def __init__(self):
HTMLParser.__init__(self)
self.container = []
def handle_starttag(self, tag, attrs):
if tag == 'td':
for name, value in attrs:
if name == 'class' and (value == 'tableDataLight' or value == 'tableData'):
self.__recording = True
def handle_endtag(self, tag):
if tag == 'td':
self.__recording = False
def handle_data(self, data):
if self.__recording:
self.__data.append(data)
self.__dataTable += 1
if self.__dataTable == 6:
self.container.append(self.__data)
self.__data = []
self.__dataTable = 0
### Static Function ###
def getRawdata(site, family, container):
raw = []
for data in container:
target = [x for x in data if site in data and family in data]
if target:
raw.append(target)
return raw
def isSame(raw):
for i in range(len(raw) - 1):
if raw[i][4] == raw[i+1][4]:
print 'Same'
if __name__ == '__main__':
response = urllib2.urlopen('file:///Users/apple/Code/Python/learn/urllib-test/test.html')
html = response.read()
siteTarget = 'TTK'
familyTarget = '1A9'
raw = []
lessThenOne = 1
p = Find()
p.feed(html)
# Get Raw Data
raw = getRawdata(siteTarget, familyTarget, p.container)
if len(raw) is not lessThenOne:
isSame(raw)
p.close()
response.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment