Skip to content

Instantly share code, notes, and snippets.

@Hydriz
Created April 22, 2017 03:22
Show Gist options
  • Save Hydriz/62b8decc1d0c90446643585b2eeb8faf to your computer and use it in GitHub Desktop.
Save Hydriz/62b8decc1d0c90446643585b2eeb8faf to your computer and use it in GitHub Desktop.
Bot for removing interlanguage links on Wiktionaries

Removeiw bot

This bot is for removing interlanguage links on pages and was created due to the deployment of the Cognate extension to Wiktionaries (see T150182 on Wikimedia's Phabricator).

To install, download the "setup.sh" and "removeiw.py" files to your local directory and edit the PYWIKIPEDIA variable to point to your pywikibot installation.

To run, use the following command in your pywikibot installation:

python pwb.py removeiw -always -family:wiktionary -lang:xxx -start:!

The -always parameter ensures that the bot runs without any user confirmation. Also, change -lang:xxx to the specific wiki that you wish to work on.

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Script to remove interlanguage links.
This script removes all interlanguage links on pages.
These command line parameters can be used to specify which pages to work on:
&params;
Furthermore, the following command line parameter is supported:
-always: Always save the page without any user confirmation.
-summary: Use your own edit summary for cleaning the page.
"""
from __future__ import unicode_literals, absolute_import
__version__ = '$Id$'
import pywikibot
from pywikibot import pagegenerators, output
from pywikibot.bot import ExistingPageBot, SingleSiteBot, suggest_help
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp,
}
namespaces = [0]
class IWBot(ExistingPageBot, SingleSiteBot):
"""The bot for interwiki."""
def __init__(self, **kwargs):
"""Construct the bot."""
self.availableOptions.update({
'always': False,
'summary': None,
'ignore_ns': False, # used by interwikidata_tests only
})
super(IWBot, self).__init__(**kwargs)
if not self.getOption('summary'):
self.options['summary'] = pywikibot.i18n.twtranslate(
self.site, 'interwikidata-clean-summary')
def treat_page(self):
"""Check page."""
if (self.current_page.namespace() not in namespaces and
not self.getOption('ignore_ns')):
output('{page} is not in allowed namespaces, skipping'
.format(page=self.current_page.title(
asLink=True)))
return False
self.iwlangs = pywikibot.textlib.getLanguageLinks(
self.current_page.text, insite=self.current_page.site)
if not self.iwlangs:
output('No interlanguagelinks on {page}'.format(
page=self.current_page.title(asLink=True)))
return False
self.clean_page()
def clean_page(self):
"""Clean interwiki links from the page."""
if not self.iwlangs:
return
output('Cleaning up the page')
new_text = pywikibot.textlib.removeLanguageLinks(
self.current_page.text, site=self.current_page.site)
self.put_current(new_text, summary=self.getOption('summary'))
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
local_args = pywikibot.handle_args(args)
genFactory = pagegenerators.GeneratorFactory()
options = {}
for arg in local_args:
if genFactory.handleArg(arg):
continue
option, sep, value = arg.partition(':')
option = option[1:] if option.startswith('-') else None
if option == 'summary':
options[option] = value
else:
options[option] = True
site = pywikibot.Site()
generator = genFactory.getCombinedGenerator()
if generator:
generator = pagegenerators.PreloadingGenerator(generator)
bot = IWBot(generator=generator, site=site, **options)
bot.run()
else:
suggest_help(missing_generator=True)
return False
if __name__ == '__main__':
main()
# Script for setting up the necessary environment for removeiw.py to work.
#
# This script assumes that you have already downloaded "removeiw.py" in the
# same directory as this script.
#
# Configure $PYWIKIPEDIA to point to your pywikibot installation.
PYWIKIPEDIA="/home/user/pywikibot"
mv removeiw.py $PYWIKIPEDIA/scripts
cd $PYWIKIPEDIA/scripts/i18n
cp -R interwikidata removeiw
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment