angelworm/gist:3904744

## gistfile1.txt
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from google.appengine.ext import webapp
from google.appengine.ext.webapp import util
import datetime
from google.appengine.ext import db
from google.appengine.api import users
import BeautifulSoup
import urllib2
import urllib
#import utf8
import sys
import re
import oauth2 as oauth
import mimetypes
import base64
import logging
import random

BASEURL = "http://pixivtotumblr2.appspot.com/"
#BASEURL = "http://localhost:8083/"

class IDdata(db.Model):
    word = db.StringProperty(required=True)
    id = db.IntegerProperty(required=True)

class Image:
    """pixiv image and medium sized images"""

    def __init__(self, tag):
        a   = tag.findAll("a")
        h2  = tag.findAll("h2")[0].string
        img = tag.findAll("img")[0]['src']
        p   = re.compile('/member_illust\.php\?mode=medium&illust_id=(\d+)')

        self.id = int(p.match(a[0]['href']).group(1))
        self.title = u"「" + h2.string + u"」 / " + a[1].string
        self.imgURL = img[0:-5]+"m."+img[-3:]

    def __str__(self):
        return "id: " + str(self.id) + ", img: " + self.imgURL

def getLatestID(word):
    tmp = IDdata.all().filter("word =", word).order('id').get()
    if(tmp == None):
        return 0
    else:
        return tmp.id

def writeLatestID(word,idnum):
    tmp = IDdata.all().filter("word =", word).order('id').get()
    if(tmp == None):
        tmp = IDdata(word=word,id=idnum)
    else:
        tmp.id=idnum
    tmp.put()

def searchPixiv(word):
    def getPage(url):
        return urllib2.urlopen(url).read()

    def makeImageData(tags):
        return Image(tags)

    dom = BeautifulSoup.BeautifulSoup(getPage("http://www.pixiv.net/tags.php?tag="+ urllib.quote_plus(word.encode('utf8'))))
    lis = dom.findAll(attrs={'id' : "search-result"})[0].findAll(attrs = {'class' : "image"})
    return map(makeImageData,lis)

def downloadPixivImage(url):
    req = urllib2.Request(url)
    req.add_header('Referer', 'http://www.pixiv.net')
    return urllib2.urlopen(req).read()

def postToTumblr(img):
    global BASEURL
    tumblrConsumerKey = ""
    tumblrConsumerSecretKey = ""
    tumblrAccessToken = ["",
                         "",
                         ""]
    tumblrAccessTokenSecret=["",
                             "",
                             ""]

    postURL = "http://api.tumblr.com/v2/blog/angelworm.tumblr.com/post"
    pageURL = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id="+str(img.id)
    caption =  '<a href=\"'+(pageURL)+'\">'+img.title+'</a>'

    account = random.randint(0,len(tumblrAccessToken)-1)

    consumer = oauth.Consumer(key=tumblrConsumerKey, secret=tumblrConsumerSecretKey)
    token = oauth.Token(tumblrAccessToken[account], tumblrAccessTokenSecret[account])
    client = oauth.Client(consumer, token)

    params = {
        'type': 'photo',
        'link': pageURL,
        'caption':caption.encode('utf-8'),
        'source': BASEURL + "img/" + base64.b64encode(img.imgURL) + img.imgURL[-4:]
        }

    client.set_signature_method(oauth.SignatureMethod_HMAC_SHA1())
    response = client.request(postURL, "POST", urllib.urlencode(params))
#    return BASEURL + "img/" + base64.b64encode(img.imgURL) + img.imgURL[-4:]
    return response

def main_():
    global BASEURL

    searchList = [u"スライスチーズ", u"アボカド"]
    r = []

    for key in searchList:
        latestID = getLatestID(key)
        imageList = searchPixiv(key)
        imageList = filter(lambda a:a.id>latestID, sorted(imageList, lambda a,b:a.id>b.id))

        if(len(imageList) < 1):
            logging.info(key + ": nothing to post.")
            continue

        writeLatestID(key, imageList[0].id)

        r = []
        for i in imageList:
            try:
                res = str(postToTumblr(i))
                r.append(res)
                logging.info(str(res))
            except:
                logging.error("error occoured:"+ str(sys.exc_info()))

        logging.info(key + ": posted " + str(len(r)) + " posts.")

    return str(r)

class MainHandler(webapp.RequestHandler):
    def get(self):
        self.response.out.write('Hello world!')

class ImageHandler(webapp.RequestHandler):
    def get(self,id):
        url = base64.b64decode(urllib.unquote(id[:-4]))
        self.response.headers["Content-Type"] = mimetypes.guess_type(id)[0]
        self.response.out.write(downloadPixivImage(url))

class CronHandler(webapp.RequestHandler):
    def get(self):
        self.response.out.write(main_())

def main():
    application = webapp.WSGIApplication([('/', MainHandler),
                                          ('/img/(.*)', ImageHandler),
                                          ('/cron', CronHandler)],
                                         debug=True)
    util.run_wsgi_app(application)


if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	# -- coding: utf-8 --
	#
	# Copyright 2007 Google Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	from google.appengine.ext import webapp
	from google.appengine.ext.webapp import util
	import datetime
	from google.appengine.ext import db
	from google.appengine.api import users
	import BeautifulSoup
	import urllib2
	import urllib
	#import utf8
	import sys
	import re
	import oauth2 as oauth
	import mimetypes
	import base64
	import logging
	import random

	BASEURL = "http://pixivtotumblr2.appspot.com/"
	#BASEURL = "http://localhost:8083/"

	class IDdata(db.Model):
	word = db.StringProperty(required=True)
	id = db.IntegerProperty(required=True)

	class Image:
	"""pixiv image and medium sized images"""

	def __init__(self, tag):
	a = tag.findAll("a")
	h2 = tag.findAll("h2")[0].string
	img = tag.findAll("img")[0]['src']
	p = re.compile('/member_illust\.php\?mode=medium&illust_id=(\d+)')

	self.id = int(p.match(a[0]['href']).group(1))
	self.title = u"「" + h2.string + u"」 / " + a[1].string
	self.imgURL = img[0:-5]+"m."+img[-3:]

	def __str__(self):
	return "id: " + str(self.id) + ", img: " + self.imgURL

	def getLatestID(word):
	tmp = IDdata.all().filter("word =", word).order('id').get()
	if(tmp == None):
	return 0
	else:
	return tmp.id

	def writeLatestID(word,idnum):
	tmp = IDdata.all().filter("word =", word).order('id').get()
	if(tmp == None):
	tmp = IDdata(word=word,id=idnum)
	else:
	tmp.id=idnum
	tmp.put()

	def searchPixiv(word):
	def getPage(url):
	return urllib2.urlopen(url).read()

	def makeImageData(tags):
	return Image(tags)

	dom = BeautifulSoup.BeautifulSoup(getPage("http://www.pixiv.net/tags.php?tag="+ urllib.quote_plus(word.encode('utf8'))))
	lis = dom.findAll(attrs={'id' : "search-result"})[0].findAll(attrs = {'class' : "image"})
	return map(makeImageData,lis)

	def downloadPixivImage(url):
	req = urllib2.Request(url)
	req.add_header('Referer', 'http://www.pixiv.net')
	return urllib2.urlopen(req).read()

	def postToTumblr(img):
	global BASEURL
	tumblrConsumerKey = ""
	tumblrConsumerSecretKey = ""
	tumblrAccessToken = ["",
	"",
	""]
	tumblrAccessTokenSecret=["",
	"",
	""]

	postURL = "http://api.tumblr.com/v2/blog/angelworm.tumblr.com/post"
	pageURL = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id="+str(img.id)
	caption = '<a href=\"'+(pageURL)+'\">'+img.title+'</a>'

	account = random.randint(0,len(tumblrAccessToken)-1)

	consumer = oauth.Consumer(key=tumblrConsumerKey, secret=tumblrConsumerSecretKey)
	token = oauth.Token(tumblrAccessToken[account], tumblrAccessTokenSecret[account])
	client = oauth.Client(consumer, token)

	params = {
	'type': 'photo',
	'link': pageURL,
	'caption':caption.encode('utf-8'),
	'source': BASEURL + "img/" + base64.b64encode(img.imgURL) + img.imgURL[-4:]
	}

	client.set_signature_method(oauth.SignatureMethod_HMAC_SHA1())
	response = client.request(postURL, "POST", urllib.urlencode(params))
	# return BASEURL + "img/" + base64.b64encode(img.imgURL) + img.imgURL[-4:]
	return response

	def main_():
	global BASEURL

	searchList = [u"スライスチーズ", u"アボカド"]
	r = []

	for key in searchList:
	latestID = getLatestID(key)
	imageList = searchPixiv(key)
	imageList = filter(lambda a:a.id>latestID, sorted(imageList, lambda a,b:a.id>b.id))

	if(len(imageList) < 1):
	logging.info(key + ": nothing to post.")
	continue

	writeLatestID(key, imageList[0].id)

	r = []
	for i in imageList:
	try:
	res = str(postToTumblr(i))
	r.append(res)
	logging.info(str(res))
	except:
	logging.error("error occoured:"+ str(sys.exc_info()))

	logging.info(key + ": posted " + str(len(r)) + " posts.")

	return str(r)

	class MainHandler(webapp.RequestHandler):
	def get(self):
	self.response.out.write('Hello world!')

	class ImageHandler(webapp.RequestHandler):
	def get(self,id):
	url = base64.b64decode(urllib.unquote(id[:-4]))
	self.response.headers["Content-Type"] = mimetypes.guess_type(id)[0]
	self.response.out.write(downloadPixivImage(url))

	class CronHandler(webapp.RequestHandler):
	def get(self):
	self.response.out.write(main_())

	def main():
	application = webapp.WSGIApplication([('/', MainHandler),
	('/img/(.*)', ImageHandler),
	('/cron', CronHandler)],
	debug=True)
	util.run_wsgi_app(application)


	if __name__ == '__main__':
	main()