Instantly share code, notes, and snippets.

anonymous /pyofficialwebsite Secret
Created Dec 29, 2014

Embed
What would you like to do?
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Prototype for bot from User:Avono https://en.wikipedia.org/wiki/User:Avono
__version__ = '$Id$'
#
import pywikibot
from pywikibot import pagegenerators
from pywikibot import i18n
import pagegenerators
import re
import random
import mwparserfromhell as mwp
import json
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
def processtemplate(template):
for param in template.params:
template.remove(param)
return unicode(template)
class BasicBot:
"""An incomplete sample bot."""
# Edit summary message that should be used is placed on /i18n subdirectory.
# The file containing these messages should have the same name as the caller
# script (i.e. basic.py in this case)
def __init__(self, generator, dry=True):
"""
Constructor.
Parameters:
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
@param dry: If True, doesn't do any real changes, but only shows
what would have been changed.
@type dry: boolean.
"""
self.generator = generator
self.dry = dry
# Set the edit summary message
self.site = pywikibot.Site("en","wikipedia") # Site for en.wikipedia
self.summary = None
def run(self):
""" Process each page from the generator. """
for step in range(100): # Only go through 100 entries
self.treat(next(self.generator))
def treat(self, page):
""" Load the given page, does some changes, and saves it. """
qTitle = page.title() # Title of WikiData Item
item = json.loads(page.text) # content of WikiData page
try:
title = item.get("sitelinks").get("enwiki").get("title") # Title of the page in en.wikipedia.org
if title!=None:
wikipage =pywikibot.Page(self.site,title) # Wikipedia page in en.wikipedia.org
code= mwp.parse(wikipage.get()) # wiki markup parser
websites = [temp for temp in code.filter_templates() if temp.name == "Official website"] # filter only "Official website" templates
try:
if len(websites)!=0:
for web in websites:
processtemplate(web)
self.save(unicode(code),wikipage)
#print item.sitelinks.get("enwiki"),"%", item.claims["P856"][0].getTarget(),"%", "".join([unicode(temp) for temp in websites]),"%","".join([transcludetemplate(temp) for temp in websites])
except(Exception):
# Ignore if exceptions is raised
pass
except(AttributeError):
# Ignore if page is not present in en.wikipedia.org (raised by title = item.get("sitelinks").get("enwiki").get("title") if page not present)
pass
def load(self, page):
""" Load the text of the given page. """
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist; skipping."
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
else:
return text
return None
def save(self, text, page, comment=None, minorEdit=True,
botflag=True):
# only save if something was changed
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' % comment)
if not self.dry:
if pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(comment=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist entry %s'
% (page.title(), error.url))
else:
return True
return False
def main(*args):
genFactory = pagegenerators.GeneratorFactory(pywikibot.Site("en","wikipedia").data_repository())
genFactory.handleArg("-ref Property:P856") # Should return all pages on WikiData that contain the Property "Official website"
genFactory.namespaces = [0] # Only contain Aritlces in the mainspace
gen = genFactory.getCombinedGenerator()
bot = BasicBot(gen)
bot.run()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment