Skip to content

Instantly share code, notes, and snippets.

@srikanthlogic
Created August 11, 2011 17:43
Show Gist options
  • Save srikanthlogic/1140266 to your computer and use it in GitHub Desktop.
Save srikanthlogic/1140266 to your computer and use it in GitHub Desktop.
AppEngine Handler code to perform Interwiki based URL Redirection
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import cgi
import datetime
import wsgiref.handlers
from BeautifulSoup import BeautifulSoup
import urllib2
from google.appengine.ext import webapp
class MainPage(webapp.RequestHandler):
def get(self):
self.response.out.write('<html><title>Interwiki Redirect Service</title><body>')
self.response.out.write('<h1><center>Interwiki Redirect Service.</center></h1><br/><br/>')
self.response.out.write('<ul><li>Service to provide redirects to Language Wikipedias using Interwiki links existing in English Wikipedia</li>')
self.response.out.write('<li>Use URL structure http://interwikiredir.appspot.com/[langcode]/[EnWikiArticleName] to redirect to [langcode].wikipedia.org/wiki/[langWikiArticleName]</li>')
self.response.out.write('<li>Ex:- http://interwikiredir.appspot.com/ta/Chennai redirects to Chennai article on Tamil wikipedia </li> ')
self.response.out.write('<li>Powers tawp.in -- Tamil Wiki URL shortner </li></ul> ')
self.response.out.write('<br/><br/><center>Contact : <a href="http://twitter.com/logic">@logic</a> OR <a href="http://en.wikipedia.org/wiki/User:Logicwiki">en:User:Logicwiki</a></center> ')
self.response.out.write('</body></html>')
class EnToLangInterWiki(webapp.RequestHandler):
def get(self,langcode,article):
url = 'http://en.wikipedia.org/wiki/' + article
request = urllib2.Request(url)
request.add_header('User-Agent','Srikanth Logic 1.0')
opener = urllib2.build_opener()
error_page = 'http://' + langcode + '.wikipedia.org/wiki/ARTICLEDOESNOTEXIST'
try:
soup = BeautifulSoup(opener.open(request).read())
except:
self.redirect(error_page)
return
interwiki = "interwiki-" + langcode
interwikiFA = interwiki + " FA"
interwikiGA = interwiki + " GA"
t = soup.find('li',attrs={"class":interwiki})
if (t == None):
t = soup.find('li',attrs={"class":interwikiFA})
if (t == None):
t = soup.find('li',attrs={"class":interwikiGA})
if (t != None):
self.redirect(t.a['href'])
else:
self.redirect(error_page)
class EnToTaInterWiki(webapp.RequestHandler):
def get(self,article):
url = 'http://en.wikipedia.org/wiki/' + article
request = urllib2.Request(url)
request.add_header('User-Agent','Srikanth Logic 1.0')
opener = urllib2.build_opener()
try:
soup = BeautifulSoup(opener.open(request).read())
except:
self.redirect('http://defn.me/r/ta/articledoesnotexist')
return
interwiki = "interwiki-ta"
interwikiFA = interwiki + " FA"
interwikiGA = interwiki + " GA"
t = soup.find('li',attrs={"class":interwiki})
if (t == None):
t = soup.find('li',attrs={"class":interwikiFA})
if (t == None):
t = soup.find('li',attrs={"class":interwikiGA})
if (t != None):
self.redirect(t.a['href'])
else:
self.redirect('http://defn.me/r/ta/articledoesnotexist')
application = webapp.WSGIApplication([
('/', MainPage),('/tawp/(.*)',EnToTaInterWiki),('/(.*)/(.*)',EnToLangInterWiki)
], debug=True)
def main():
wsgiref.handlers.CGIHandler().run(application)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment