Skip to content

Instantly share code, notes, and snippets.

@vssun
Last active November 8, 2017 02:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vssun/6c0bb7acc6c7e7a4d789ca8e80ef95cc to your computer and use it in GitHub Desktop.
Save vssun/6c0bb7acc6c7e7a4d789ca8e80ef95cc to your computer and use it in GitHub Desktop.
For downloading subpages of a wikipage as individual files. (Based on sample basic bot script (basic.py)
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
An incomplete sample script.
This is not a complete bot; rather, it is a template from which simple
bots can be made. You can rename it to mybot.py, then edit it in
whatever way you want.
The following parameters are supported:
&params;
-dry If given, doesn't do any real changes, but only shows
what would have been changed.
"""
#
# (C) Pywikibot team, 2006-2014
#
# Distributed under the terms of the MIT license.
#
from __future__ import unicode_literals
__version__ = '$Id: 23dac2badba93914592c50e95d72c53d7d2d7ea7 $'
#
import codecs
import pywikibot
from pywikibot import pagegenerators
from pywikibot import i18n
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
class BasicBot:
"""An incomplete sample bot."""
# Edit summary message that should be used is placed on /i18n subdirectory.
# The file containing these messages should have the same name as the caller
# script (i.e. basic.py in this case)
def __init__(self, generator, dry):
"""
Constructor.
Parameters:
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
@param dry: If True, doesn't do any real changes, but only shows
what would have been changed.
@type dry: boolean.
"""
self.generator = generator
self.dry = dry
# Set the edit summary message
site = pywikibot.Site()
self.summary = i18n.twtranslate(site, 'basic-changing')
def run(self):
"""Process each page from the generator."""
for page in self.generator:
self.treat(page)
def treat(self, page):
"""Load the given page, does some changes, and saves it."""
text = self.load(page)
if not text:
return
#myFile=open("output/"
################################################################
# NOTE: Here you can modify the text in whatever way you want. #
################################################################
# If you find out that you do not want to edit this page, just return.
# Example: This puts the text 'Test' at the beginning of the page.
#text = 'Test ' + text
if not self.save(text, page, self.summary):
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
pywikibot.output(page.title())
if page.title().find("/")>=0:
myFileName=page.title()[page.title().find("/")+1:]
pywikibot.output(myFileName)
myFile=codecs.open("output/" + myFileName,"w+","utf-8")
myFile.write(text)
myFile.close()
pywikibot.output(u'File created')
def load(self, page):
"""Load the text of the given page."""
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist; skipping."
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
else:
return text
return None
def save(self, text, page, comment=None, minorEdit=True,
botflag=True):
"""Update the given page with new text."""
# only save if something was changed
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' % comment)
if not self.dry:
if pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist entry %s'
% (page.title(), error.url))
else:
return True
return False
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If dry is True, doesn't do any real changes, but only show
# what would have been changed.
dry = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-dry"):
dry = True
else:
genFactory.handleArg(arg)
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = BasicBot(gen, dry)
bot.run()
else:
pywikibot.showHelp()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment