Skip to content

Instantly share code, notes, and snippets.

@angelworm
Last active December 16, 2015 18:09
Show Gist options
  • Save angelworm/5475900 to your computer and use it in GitHub Desktop.
Save angelworm/5475900 to your computer and use it in GitHub Desktop.
searchTag::String->[Tumbnail] getLargeImage::Id->URL
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import urllib
import urlparse
import re
import lxml.html
class Thumbnail:
"""pixiv thumbnails and some information"""
def __init__(self, id_, title_, author_, pageURL_, imgURL_):
self.id = id_
self.title = title_
self.author = author_
self.imgURL = imgURL_
self.pageURL = pageURL_
def __str__(self):
return "id: " + str(self.id) + ", img: " + self.imgURL
def description(self):
return "id: " + str(self.id) + ", img: " + self.imgURL + ", title: " + u"「" + self.title + u"」 / " + self.author
def getPage(url):
req = urllib2.Request(url)
req.add_header('Referer', 'http://www.pixiv.net')
req.add_header('Accept-Language', 'ja')
return unicode(urllib2.urlopen(req).read(), "utf-8")
def getLargeImage(id_):
url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id="+str(id_)
root = lxml.html.fromstring(getPage(url)).xpath('//img[@border="0"]')[0]
return [root.attrib['src']]
def searchTag(word, full=True):
def makeImageData(tags):
a = tags.xpath(".//a")
h2 = tags.xpath(".//h1")[0].text
img = tags.xpath(".//img[1]/@src")[0]
p = re.compile('.*/member_illust\.php\?mode=medium&illust_id=(\d+)')
pageURL= urlparse.urljoin("http://www.pixiv.net/", a[0].attrib['href'])
id_ = int(p.match(pageURL).group(1))
title = h2
author = a[1].text
imgURL = img
return Thumbnail(id_, title, author, pageURL, imgURL)
#部分一致
qword = urllib.quote_plus(word.encode('utf-8'))
if full:
url = "http://www.pixiv.net/search.php?s_mode=s_tag_full&word="+qword
else:
url = "http://www.pixiv.net/search.php?s_mode=s_tag&word="+qword
dom = lxml.html.fromstring(getPage(url))
return map(makeImageData, dom.xpath('//li[@class="image-item"]'))
def test():
return searchTag(u"雀")
def pr(img):
print img
if __name__ == '__main__':
print "loading"
print getLargeImage("22818522")
#map(pr, test())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment