-
-
Save srikanthlogic/b37a1d07888597f7c9ac to your computer and use it in GitHub Desktop.
Stats collection bot code using Pywikipeda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# | |
# Statistics Bot - Inserting statistics about number of new edits, new | |
# articles, regiestered users, deletions and protections in the last | |
# 24 hours. | |
# | |
# Copyright (C) 2009, 2011 Osama Khalid | |
# | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
# | |
# Please report bugs or help imporving this program by connecting to | |
# <osamak@gnu.org>. | |
# Translated to English by srik.lak@gmail.com | |
# | |
import cPickle | |
import datetime | |
import time | |
import simplejson | |
import wikipedia | |
class StatisticsBot: | |
def __init__(self): | |
# although it can be used directly on other wikis, this is the | |
# default opition. | |
self.site = wikipedia.getSite(fam='wikipedia', code='ta') | |
# Needed dates in MediaWiki format. | |
now = datetime.datetime.now() | |
self.today_date = datetime.datetime.strftime(now, "%Y-%m-%dT00:00:00Z") | |
self.tomorrow_date = datetime.datetime.strftime(now + datetime.timedelta(days=1), "%Y-%m-%dT00:00:00Z") | |
months = [u"January", u"February", u"March", u"April", | |
u"May", u"June", u"July", u"August", | |
u"September", u"October", u"November", u"December"] | |
month = months[now.month - 1] #Current month | |
self.stats_title = u"User:Logicwiki/Stats/" + month + " " + str(now.year) | |
def run(self): | |
meta_stats = self.get_meta_stats() | |
deletion_stats = self.get_deletion_stats() | |
protection_stats = self.get_protection_stats() | |
stats = meta_stats | |
stats['deletions'] = deletion_stats | |
stats['protections'] = protection_stats | |
old_stats_diff = self.get_old_stats_diff(stats) | |
self.save_stats(stats) | |
formatted_stats = self.format_stats(stats, old_stats_diff) | |
self.put_stats(formatted_stats) | |
def get_api(self, predata): # (C) 2008 Betacommand, MIT License | |
while True: | |
try: | |
response, json = self.site.postForm(self.site.apipath(), predata) | |
except wikipedia.ServerError, e: | |
wikipedia.output(u'Warning! %s: %s' % (self.site, e)) | |
continue | |
data = simplejson.loads(json) | |
return data | |
def get_meta_stats(self): | |
print "Getting meta statistics" | |
predata = {#api.php?action=query&meta=siteinfo&siprop=statistics&format=jsonfm | |
'action': 'query', | |
'meta': 'siteinfo', | |
'siprop': 'statistics', | |
'format': 'json', | |
} | |
while True: | |
meta_stats = self.get_api(predata) | |
if meta_stats != None: | |
break | |
return meta_stats['query']['statistics'] | |
def get_deletion_stats(self): | |
print "Getting deletion statistics" | |
predata = {#api.php?action=query&list=logevents&leprop=title&letype=delete&leend=2009-07-01T00:00:00Z&lestart=2009-07-02T00:00:00Z&lelimit=5000&format=jsonfm | |
'action': 'query', | |
'list': 'logevents', | |
'leprop': 'title', | |
'letype': 'delete', | |
'leend': self.today_date, | |
'lestart': self.tomorrow_date, | |
'lelimit': '5000', | |
'format': 'json', | |
} | |
while True: | |
deletion_stats = self.get_api(predata) | |
if deletion_stats != None: | |
break | |
try: | |
return len(deletion_stats['query']['logevents']) | |
except KeyError: | |
print deletion_stats #for debugging | |
raise KeyError | |
def get_protection_stats(self): | |
print "Getting protection statistics" | |
predata = {#api.php?action=query&list=logevents&leprop=title&letype=protect&leend=2009-07-01T00:00:00Z&lestart=2009-07-02T00:00:00Z&lelimit=5000&format=jsonfm | |
'action': 'query', | |
'list': 'logevents', | |
'leprop': 'title', | |
'letype': 'protect', | |
'leend': self.today_date, | |
'lestart': self.tomorrow_date, | |
'lelimit': '5000', | |
'format': 'json', | |
} | |
while True: | |
protection_stats = self.get_api(predata) | |
if protection_stats != None and 'query' in protection_stats: | |
break | |
return len(protection_stats['query']['logevents']) | |
def get_old_stats_diff(self, stats): | |
try: | |
old_stats_file = cPickle.load(open('./statistic.db','r')) | |
old_stats_diff = { | |
'old_pages': stats['pages'] - old_stats_file['pages'], | |
'old_articles': stats['articles'] - old_stats_file['articles'], | |
'old_edits': stats['edits'] - old_stats_file['edits'], | |
'old_users': stats['users'] - old_stats_file['users'], | |
'old_images': stats['images'] - old_stats_file['images'], | |
'old_deletions': stats['deletions'] - old_stats_file['deletions'], | |
'old_protections': stats['protections'] - old_stats_file['protections'], | |
} | |
except IOError: | |
old_stats_diff = { | |
'old_pages': "", | |
'old_articles': "", | |
'old_edits': "", | |
'old_users': "", | |
'old_images': "", | |
'old_deletions': "", | |
'old_protections': "", | |
} | |
return old_stats_diff | |
def save_stats(self, stats): | |
with open('./statistic.db','w') as stats_database: | |
cPickle.dump(stats, stats_database) | |
def format_stats(self, stats, old_stats_diff): | |
stats_template = u"""{{User:Logicwiki/Stats/template | |
|Time={{subst:CURRENTTIME}}، {{subst:CURRENTDAY}} {{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}} | |
|Pages = %(pages)s | |
|Diff in Pages = %(old_pages)s | |
|Articles = %(articles)s | |
|Diff in Articles = %(old_articles)s | |
|Edits= %(edits)s | |
|Diff in Edits = %(old_edits)s | |
|Users= %(users)s | |
|Diff in Users = %(old_users)s | |
|Images = %(images)s | |
|Diff in Images = %(old_images)s | |
|Deletions = %(deletions)s | |
|Diff in Deletion = %(old_deletions)s | |
|Protections = %(protections)s | |
|Diff in Protection = %(old_protections)s | |
}}""" | |
stats.update(old_stats_diff) | |
return stats_template % stats | |
def put_stats(self, formatted_stats): | |
stats_page = wikipedia.Page(self.site, self.stats_title) | |
try: | |
stats_text = stats_page.get() | |
except wikipedia.NoPage: | |
stats_text = "" | |
if stats_text == "" or stats_text.find(u"<!---Place new stats here--->") == -1: | |
stats_text = u"{{User:Logicwiki/Statistics/template}}\n\n"\ | |
u"{| class=\"wikitable sortable\" style=\"width:90%\""\ | |
u"\n|-\n! Date(Time)\n! Pages\n! Articles\n"\ | |
u"! Edits\n! Users\n! Files\n! Deletion\n"\ | |
u"! Protection\n<!---Place new stats here--->\n|}" | |
stats_text = stats_text.replace(u"<!---Place new stats here--->", | |
formatted_stats + u"\n<!---Place new stats here--->") | |
stats_page.put(stats_text, | |
comment=u"Bot adding today's statistics") | |
if __name__ == '__main__': | |
try: | |
bot = StatisticsBot() | |
bot.run() | |
finally: | |
wikipedia.stopme() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment