Skip to content

Instantly share code, notes, and snippets.

@angelworm
Created October 17, 2012 09:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save angelworm/3904744 to your computer and use it in GitHub Desktop.
Save angelworm/3904744 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from google.appengine.ext import webapp
from google.appengine.ext.webapp import util
import datetime
from google.appengine.ext import db
from google.appengine.api import users
import BeautifulSoup
import urllib2
import urllib
#import utf8
import sys
import re
import oauth2 as oauth
import mimetypes
import base64
import logging
import random
BASEURL = "http://pixivtotumblr2.appspot.com/"
#BASEURL = "http://localhost:8083/"
class IDdata(db.Model):
word = db.StringProperty(required=True)
id = db.IntegerProperty(required=True)
class Image:
"""pixiv image and medium sized images"""
def __init__(self, tag):
a = tag.findAll("a")
h2 = tag.findAll("h2")[0].string
img = tag.findAll("img")[0]['src']
p = re.compile('/member_illust\.php\?mode=medium&illust_id=(\d+)')
self.id = int(p.match(a[0]['href']).group(1))
self.title = u"「" + h2.string + u"」 / " + a[1].string
self.imgURL = img[0:-5]+"m."+img[-3:]
def __str__(self):
return "id: " + str(self.id) + ", img: " + self.imgURL
def getLatestID(word):
tmp = IDdata.all().filter("word =", word).order('id').get()
if(tmp == None):
return 0
else:
return tmp.id
def writeLatestID(word,idnum):
tmp = IDdata.all().filter("word =", word).order('id').get()
if(tmp == None):
tmp = IDdata(word=word,id=idnum)
else:
tmp.id=idnum
tmp.put()
def searchPixiv(word):
def getPage(url):
return urllib2.urlopen(url).read()
def makeImageData(tags):
return Image(tags)
dom = BeautifulSoup.BeautifulSoup(getPage("http://www.pixiv.net/tags.php?tag="+ urllib.quote_plus(word.encode('utf8'))))
lis = dom.findAll(attrs={'id' : "search-result"})[0].findAll(attrs = {'class' : "image"})
return map(makeImageData,lis)
def downloadPixivImage(url):
req = urllib2.Request(url)
req.add_header('Referer', 'http://www.pixiv.net')
return urllib2.urlopen(req).read()
def postToTumblr(img):
global BASEURL
tumblrConsumerKey = ""
tumblrConsumerSecretKey = ""
tumblrAccessToken = ["",
"",
""]
tumblrAccessTokenSecret=["",
"",
""]
postURL = "http://api.tumblr.com/v2/blog/angelworm.tumblr.com/post"
pageURL = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id="+str(img.id)
caption = '<a href=\"'+(pageURL)+'\">'+img.title+'</a>'
account = random.randint(0,len(tumblrAccessToken)-1)
consumer = oauth.Consumer(key=tumblrConsumerKey, secret=tumblrConsumerSecretKey)
token = oauth.Token(tumblrAccessToken[account], tumblrAccessTokenSecret[account])
client = oauth.Client(consumer, token)
params = {
'type': 'photo',
'link': pageURL,
'caption':caption.encode('utf-8'),
'source': BASEURL + "img/" + base64.b64encode(img.imgURL) + img.imgURL[-4:]
}
client.set_signature_method(oauth.SignatureMethod_HMAC_SHA1())
response = client.request(postURL, "POST", urllib.urlencode(params))
# return BASEURL + "img/" + base64.b64encode(img.imgURL) + img.imgURL[-4:]
return response
def main_():
global BASEURL
searchList = [u"スライスチーズ", u"アボカド"]
r = []
for key in searchList:
latestID = getLatestID(key)
imageList = searchPixiv(key)
imageList = filter(lambda a:a.id>latestID, sorted(imageList, lambda a,b:a.id>b.id))
if(len(imageList) < 1):
logging.info(key + ": nothing to post.")
continue
writeLatestID(key, imageList[0].id)
r = []
for i in imageList:
try:
res = str(postToTumblr(i))
r.append(res)
logging.info(str(res))
except:
logging.error("error occoured:"+ str(sys.exc_info()))
logging.info(key + ": posted " + str(len(r)) + " posts.")
return str(r)
class MainHandler(webapp.RequestHandler):
def get(self):
self.response.out.write('Hello world!')
class ImageHandler(webapp.RequestHandler):
def get(self,id):
url = base64.b64decode(urllib.unquote(id[:-4]))
self.response.headers["Content-Type"] = mimetypes.guess_type(id)[0]
self.response.out.write(downloadPixivImage(url))
class CronHandler(webapp.RequestHandler):
def get(self):
self.response.out.write(main_())
def main():
application = webapp.WSGIApplication([('/', MainHandler),
('/img/(.*)', ImageHandler),
('/cron', CronHandler)],
debug=True)
util.run_wsgi_app(application)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment