Last active
December 16, 2015 18:09
-
-
Save angelworm/5475900 to your computer and use it in GitHub Desktop.
searchTag::String->[Tumbnail]
getLargeImage::Id->URL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import urllib2 | |
import urllib | |
import urlparse | |
import re | |
import lxml.html | |
class Thumbnail: | |
"""pixiv thumbnails and some information""" | |
def __init__(self, id_, title_, author_, pageURL_, imgURL_): | |
self.id = id_ | |
self.title = title_ | |
self.author = author_ | |
self.imgURL = imgURL_ | |
self.pageURL = pageURL_ | |
def __str__(self): | |
return "id: " + str(self.id) + ", img: " + self.imgURL | |
def description(self): | |
return "id: " + str(self.id) + ", img: " + self.imgURL + ", title: " + u"「" + self.title + u"」 / " + self.author | |
def getPage(url): | |
req = urllib2.Request(url) | |
req.add_header('Referer', 'http://www.pixiv.net') | |
req.add_header('Accept-Language', 'ja') | |
return unicode(urllib2.urlopen(req).read(), "utf-8") | |
def getLargeImage(id_): | |
url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id="+str(id_) | |
root = lxml.html.fromstring(getPage(url)).xpath('//img[@border="0"]')[0] | |
return [root.attrib['src']] | |
def searchTag(word, full=True): | |
def makeImageData(tags): | |
a = tags.xpath(".//a") | |
h2 = tags.xpath(".//h1")[0].text | |
img = tags.xpath(".//img[1]/@src")[0] | |
p = re.compile('.*/member_illust\.php\?mode=medium&illust_id=(\d+)') | |
pageURL= urlparse.urljoin("http://www.pixiv.net/", a[0].attrib['href']) | |
id_ = int(p.match(pageURL).group(1)) | |
title = h2 | |
author = a[1].text | |
imgURL = img | |
return Thumbnail(id_, title, author, pageURL, imgURL) | |
#部分一致 | |
qword = urllib.quote_plus(word.encode('utf-8')) | |
if full: | |
url = "http://www.pixiv.net/search.php?s_mode=s_tag_full&word="+qword | |
else: | |
url = "http://www.pixiv.net/search.php?s_mode=s_tag&word="+qword | |
dom = lxml.html.fromstring(getPage(url)) | |
return map(makeImageData, dom.xpath('//li[@class="image-item"]')) | |
def test(): | |
return searchTag(u"雀") | |
def pr(img): | |
print img | |
if __name__ == '__main__': | |
print "loading" | |
print getLargeImage("22818522") | |
#map(pr, test()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment