Skip to content

Instantly share code, notes, and snippets.

@hzqtc
Last active December 16, 2015 08:59
Show Gist options
  • Save hzqtc/5409775 to your computer and use it in GitHub Desktop.
Save hzqtc/5409775 to your computer and use it in GitHub Desktop.
Download Jiandan OOXX gallery. http://jandan.net/ooxx
#!/usr/bin/env python
# Example:
# curl -s http://jandan.net/ooxx | ./jdooxx.py -o 5 -r 2.0 -u | wget -nv -i -
# This will download all pictures with oo >= 5 in the last page of Jiandan OOXX.
import HTMLParser
import sys
import getopt
class OOXXImage(object):
def __init__(self):
self.oo = 0
self.xx = 0
self.url = ""
def test(self, ooMin, xxMax, ratio):
if self.xx == 0:
self.xx = 1
return self.oo >= ooMin and self.xx <= xxMax and float(self.oo) / float(self.xx) >= ratio
class JDOOXXParser(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.withinPostLi = False
self.fetchData = None
self.currentImage = OOXXImage()
self.imageList = []
def handle_starttag(self, tag, attrs):
attrMap = dict(attrs)
if self.withinPostLi == True:
if tag == "img" and "class" not in attrMap:
self.currentImage.url = attrMap["src"]
elif tag == "span" and "id" in attrMap and attrMap["id"].startswith("cos_support-"):
self.fetchData = "oo"
elif tag == "span" and "id" in attrMap and attrMap["id"].startswith("cos_unsupport-"):
self.fetchData = "xx"
if tag == "li" and "id" in attrMap and attrMap["id"].startswith("comment-"):
self.withinPostLi = True
def handle_endtag(self, tag):
if tag == "li" and self.withinPostLi == True:
self.withinPostLi = False
self.imageList.append(self.currentImage)
self.currentImage = OOXXImage()
def handle_data(self, data):
if self.fetchData == "oo":
self.currentImage.oo = int(data)
self.fetchData = None
elif self.fetchData == "xx":
self.currentImage.xx = int(data)
self.fetchData = None
def usage():
print "A parser for Jiandan (http://jandan.net/ooxx) MM Gallery."
print "Read from standard input and print image informations."
print " -h, --help Print this infomation."
print " -o, --oomin=INT Set the minimum OO value; images with a lower OO value will be excluded."
print " -x, --xxmax=INT Set the maximum XX value; images with a greater XX value will be excluded."
print " -r, --ratio=FLOAT Set the OO/XX ratio; images with lower ratio will be excluded"
print " -u, --urlonly Only print the URLs; both OO and XX value will be omitted."
if __name__ == "__main__":
try:
opts, args = getopt.getopt(sys.argv[1:], "ho:x:r:u", ["help", "oomin=", "xxmax=", "ratio=", "urlonly"])
except getopt.GetoptError as err:
print str(err)
usage()
sys.exit(2)
ooMin = 0
xxMax = 1000
ratio = -1
urlOnly = False
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-o", "--oomin"):
ooMin = int(a)
elif o in ("-x", "--xxmax"):
xxMax = int(a)
elif o in ("-r", "--ratio"):
ratio = float(a)
elif o in ("-u", "--urlonly"):
urlOnly = True
parser = JDOOXXParser()
parser.feed(sys.stdin.read().decode("utf-8"))
for img in parser.imageList:
if img.test(ooMin, xxMax, ratio):
if urlOnly:
print img.url
else:
print "oo = %3d, xx = %3d, %s" % (img.oo, img.xx, img.url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment