Skip to content

Instantly share code, notes, and snippets.

@srikanthlogic
Created December 17, 2011 21:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save srikanthlogic/b37a1d07888597f7c9ac to your computer and use it in GitHub Desktop.
Save srikanthlogic/b37a1d07888597f7c9ac to your computer and use it in GitHub Desktop.
Stats collection bot code using Pywikipeda
# -*- coding: utf-8 -*-
#
# Statistics Bot - Inserting statistics about number of new edits, new
# articles, regiestered users, deletions and protections in the last
# 24 hours.
#
# Copyright (C) 2009, 2011 Osama Khalid
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Please report bugs or help imporving this program by connecting to
# <osamak@gnu.org>.
# Translated to English by srik.lak@gmail.com
#
import cPickle
import datetime
import time
import simplejson
import wikipedia
class StatisticsBot:
def __init__(self):
# although it can be used directly on other wikis, this is the
# default opition.
self.site = wikipedia.getSite(fam='wikipedia', code='ta')
# Needed dates in MediaWiki format.
now = datetime.datetime.now()
self.today_date = datetime.datetime.strftime(now, "%Y-%m-%dT00:00:00Z")
self.tomorrow_date = datetime.datetime.strftime(now + datetime.timedelta(days=1), "%Y-%m-%dT00:00:00Z")
months = [u"January", u"February", u"March", u"April",
u"May", u"June", u"July", u"August",
u"September", u"October", u"November", u"December"]
month = months[now.month - 1] #Current month
self.stats_title = u"User:Logicwiki/Stats/" + month + " " + str(now.year)
def run(self):
meta_stats = self.get_meta_stats()
deletion_stats = self.get_deletion_stats()
protection_stats = self.get_protection_stats()
stats = meta_stats
stats['deletions'] = deletion_stats
stats['protections'] = protection_stats
old_stats_diff = self.get_old_stats_diff(stats)
self.save_stats(stats)
formatted_stats = self.format_stats(stats, old_stats_diff)
self.put_stats(formatted_stats)
def get_api(self, predata): # (C) 2008 Betacommand, MIT License
while True:
try:
response, json = self.site.postForm(self.site.apipath(), predata)
except wikipedia.ServerError, e:
wikipedia.output(u'Warning! %s: %s' % (self.site, e))
continue
data = simplejson.loads(json)
return data
def get_meta_stats(self):
print "Getting meta statistics"
predata = {#api.php?action=query&meta=siteinfo&siprop=statistics&format=jsonfm
'action': 'query',
'meta': 'siteinfo',
'siprop': 'statistics',
'format': 'json',
}
while True:
meta_stats = self.get_api(predata)
if meta_stats != None:
break
return meta_stats['query']['statistics']
def get_deletion_stats(self):
print "Getting deletion statistics"
predata = {#api.php?action=query&list=logevents&leprop=title&letype=delete&leend=2009-07-01T00:00:00Z&lestart=2009-07-02T00:00:00Z&lelimit=5000&format=jsonfm
'action': 'query',
'list': 'logevents',
'leprop': 'title',
'letype': 'delete',
'leend': self.today_date,
'lestart': self.tomorrow_date,
'lelimit': '5000',
'format': 'json',
}
while True:
deletion_stats = self.get_api(predata)
if deletion_stats != None:
break
try:
return len(deletion_stats['query']['logevents'])
except KeyError:
print deletion_stats #for debugging
raise KeyError
def get_protection_stats(self):
print "Getting protection statistics"
predata = {#api.php?action=query&list=logevents&leprop=title&letype=protect&leend=2009-07-01T00:00:00Z&lestart=2009-07-02T00:00:00Z&lelimit=5000&format=jsonfm
'action': 'query',
'list': 'logevents',
'leprop': 'title',
'letype': 'protect',
'leend': self.today_date,
'lestart': self.tomorrow_date,
'lelimit': '5000',
'format': 'json',
}
while True:
protection_stats = self.get_api(predata)
if protection_stats != None and 'query' in protection_stats:
break
return len(protection_stats['query']['logevents'])
def get_old_stats_diff(self, stats):
try:
old_stats_file = cPickle.load(open('./statistic.db','r'))
old_stats_diff = {
'old_pages': stats['pages'] - old_stats_file['pages'],
'old_articles': stats['articles'] - old_stats_file['articles'],
'old_edits': stats['edits'] - old_stats_file['edits'],
'old_users': stats['users'] - old_stats_file['users'],
'old_images': stats['images'] - old_stats_file['images'],
'old_deletions': stats['deletions'] - old_stats_file['deletions'],
'old_protections': stats['protections'] - old_stats_file['protections'],
}
except IOError:
old_stats_diff = {
'old_pages': "",
'old_articles': "",
'old_edits': "",
'old_users': "",
'old_images': "",
'old_deletions': "",
'old_protections': "",
}
return old_stats_diff
def save_stats(self, stats):
with open('./statistic.db','w') as stats_database:
cPickle.dump(stats, stats_database)
def format_stats(self, stats, old_stats_diff):
stats_template = u"""{{User:Logicwiki/Stats/template
|Time={{subst:CURRENTTIME}}، {{subst:CURRENTDAY}} {{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}
|Pages = %(pages)s
|Diff in Pages = %(old_pages)s
|Articles = %(articles)s
|Diff in Articles = %(old_articles)s
|Edits= %(edits)s
|Diff in Edits = %(old_edits)s
|Users= %(users)s
|Diff in Users = %(old_users)s
|Images = %(images)s
|Diff in Images = %(old_images)s
|Deletions = %(deletions)s
|Diff in Deletion = %(old_deletions)s
|Protections = %(protections)s
|Diff in Protection = %(old_protections)s
}}"""
stats.update(old_stats_diff)
return stats_template % stats
def put_stats(self, formatted_stats):
stats_page = wikipedia.Page(self.site, self.stats_title)
try:
stats_text = stats_page.get()
except wikipedia.NoPage:
stats_text = ""
if stats_text == "" or stats_text.find(u"<!---Place new stats here--->") == -1:
stats_text = u"{{User:Logicwiki/Statistics/template}}\n\n"\
u"{| class=\"wikitable sortable\" style=\"width:90%\""\
u"\n|-\n! Date(Time)\n! Pages\n! Articles\n"\
u"! Edits\n! Users\n! Files\n! Deletion\n"\
u"! Protection\n<!---Place new stats here--->\n|}"
stats_text = stats_text.replace(u"<!---Place new stats here--->",
formatted_stats + u"\n<!---Place new stats here--->")
stats_page.put(stats_text,
comment=u"Bot adding today's statistics")
if __name__ == '__main__':
try:
bot = StatisticsBot()
bot.run()
finally:
wikipedia.stopme()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment