Skip to content

Instantly share code, notes, and snippets.

@Hydriz Hydriz/README.md
Created Apr 22, 2017

Embed
What would you like to do?
Bot for removing interlanguage links on Wiktionaries

Removeiw bot

This bot is for removing interlanguage links on pages and was created due to the deployment of the Cognate extension to Wiktionaries (see T150182 on Wikimedia's Phabricator).

To install, download the "setup.sh" and "removeiw.py" files to your local directory and edit the PYWIKIPEDIA variable to point to your pywikibot installation.

To run, use the following command in your pywikibot installation:

python pwb.py removeiw -always -family:wiktionary -lang:xxx -start:!

The -always parameter ensures that the bot runs without any user confirmation. Also, change -lang:xxx to the specific wiki that you wish to work on.

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Script to remove interlanguage links.
This script removes all interlanguage links on pages.
These command line parameters can be used to specify which pages to work on:
&params;
Furthermore, the following command line parameter is supported:
-always: Always save the page without any user confirmation.
-summary: Use your own edit summary for cleaning the page.
"""
from __future__ import unicode_literals, absolute_import
__version__ = '$Id$'
import pywikibot
from pywikibot import pagegenerators, output
from pywikibot.bot import ExistingPageBot, SingleSiteBot, suggest_help
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp,
}
namespaces = [0]
class IWBot(ExistingPageBot, SingleSiteBot):
"""The bot for interwiki."""
def __init__(self, **kwargs):
"""Construct the bot."""
self.availableOptions.update({
'always': False,
'summary': None,
'ignore_ns': False, # used by interwikidata_tests only
})
super(IWBot, self).__init__(**kwargs)
if not self.getOption('summary'):
self.options['summary'] = pywikibot.i18n.twtranslate(
self.site, 'interwikidata-clean-summary')
def treat_page(self):
"""Check page."""
if (self.current_page.namespace() not in namespaces and
not self.getOption('ignore_ns')):
output('{page} is not in allowed namespaces, skipping'
.format(page=self.current_page.title(
asLink=True)))
return False
self.iwlangs = pywikibot.textlib.getLanguageLinks(
self.current_page.text, insite=self.current_page.site)
if not self.iwlangs:
output('No interlanguagelinks on {page}'.format(
page=self.current_page.title(asLink=True)))
return False
self.clean_page()
def clean_page(self):
"""Clean interwiki links from the page."""
if not self.iwlangs:
return
output('Cleaning up the page')
new_text = pywikibot.textlib.removeLanguageLinks(
self.current_page.text, site=self.current_page.site)
self.put_current(new_text, summary=self.getOption('summary'))
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
local_args = pywikibot.handle_args(args)
genFactory = pagegenerators.GeneratorFactory()
options = {}
for arg in local_args:
if genFactory.handleArg(arg):
continue
option, sep, value = arg.partition(':')
option = option[1:] if option.startswith('-') else None
if option == 'summary':
options[option] = value
else:
options[option] = True
site = pywikibot.Site()
generator = genFactory.getCombinedGenerator()
if generator:
generator = pagegenerators.PreloadingGenerator(generator)
bot = IWBot(generator=generator, site=site, **options)
bot.run()
else:
suggest_help(missing_generator=True)
return False
if __name__ == '__main__':
main()
# Script for setting up the necessary environment for removeiw.py to work.
#
# This script assumes that you have already downloaded "removeiw.py" in the
# same directory as this script.
#
# Configure $PYWIKIPEDIA to point to your pywikibot installation.
PYWIKIPEDIA="/home/user/pywikibot"
mv removeiw.py $PYWIKIPEDIA/scripts
cd $PYWIKIPEDIA/scripts/i18n
cp -R interwikidata removeiw
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.