Skip to content

Instantly share code, notes, and snippets.

@AugurNZ
Created August 19, 2013 22:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AugurNZ/6274885 to your computer and use it in GitHub Desktop.
Save AugurNZ/6274885 to your computer and use it in GitHub Desktop.
This is my user-config.py for Pywikipediabot. See the <a href="http://meta.wikimedia.org/wiki/User_talk:AugurNZ" target="_blank">discussion</a> (if it hasn't already been archived) for more details.
# -*- coding: utf-8 -*-
# This is an automatically generated file. You can find more configuration
# parameters in 'config.py' file.
# The family of sites we are working on. wikipedia.py will import
# families/xxx_family.py so if you want to change this variable,
# you need to write such a file.
family = 'thetestwiki'
# The language code of the site we're working on.
mylang = 'en'
# The dictionary usernames should contain a username for each site where you
# have a bot account.
usernames['thetestwiki']['en'] = u'Augurnz-AbotTOIR'
############## LOGFILE SETTINGS ##############
# Defines for which scripts a logfile should be enabled. Logfiles will be
# saved in the 'logs' subdirectory.
# Example:
# log = ['interwiki', 'weblinkchecker', 'table2wiki']
# It is also possible to enable logging for all scripts, using this line:
# log = ['*']
# To disable all logging, use this:
# log = []
# Per default, logging of interwiki.py is enabled because its logfiles can
# be used to generate so-called warnfiles.
# This setting can be overridden by the -log or -nolog command-line arguments.
log = ['interwiki']
# maximal size of a logfile in kilobytes. If the size reached that limit the
# logfile will be renamed (if logfilecount is not 0) and the old file is filled
# again. logfilesize must be an integer value
logfilesize = 1024
# Number of rotating logfiles are created. The older files get the higher
# number. If logfilecount is 0, no logfile will be archived but the current
# logfile will be overwritten if the file size reached the logfilesize above.
# If logfilecount is -1 there are no rotating logfiles but the files where
# renamed if the logfile is full. The newest file gets the highest number until
# some logfiles where deleted.
logfilecount = 5
# logging handler to use, you can choose between: 'TRFH' (TimedRotatingFile-
# Handler), 'RFH' (RotatingFileHandler), ... more might come.
loghandler = 'TRFH'
############## INTERWIKI SETTINGS ##############
# Should interwiki.py report warnings for missing links between foreign
# languages?
interwiki_backlink = True
# Should interwiki.py display every new link it discovers?
interwiki_shownew = True
# Should interwiki.py output a graph PNG file on conflicts?
# You need pydot for this: http://dkbza.org/pydot.html
interwiki_graph = False
# Specifies that the robot should process that amount of subjects at a time,
# only starting to load new pages in the original language when the total
# falls below that number. Default is to process (at least) 100 subjects at
# once.
interwiki_min_subjects = 100
# If interwiki graphs are enabled, which format(s) should be used?
# Supported formats include png, jpg, ps, and svg. See:
# http://www.graphviz.org/doc/info/output.html
# If you want to also dump the dot files, you can use this in your
# user-config.py:
# interwiki_graph_formats = ['dot', 'png']
# If you need a PNG image with an HTML image map, use this:
# interwiki_graph_formats = ['png', 'cmap']
# If you only need SVG images, use:
# interwiki_graph_formats = ['svg']
interwiki_graph_formats = ['png']
# You can post the contents of your autonomous_problems.dat to the wiki,
# e.g. to http://de.wikipedia.org/wiki/Wikipedia:Interwiki-Konflikte .
# This allows others to assist you in resolving interwiki problems.
# To help these people, you can upload the interwiki graphs to your
# webspace somewhere. Set the base URL here, e.g.:
# 'http://www.example.org/~yourname/interwiki-graphs/'
interwiki_graph_url = None
# Save file with local articles without interwikis.
without_interwiki = False
# Experimental feature:
# Store the page contents on disk (/cache/ directory) instead of loading
# them in RAM.
interwiki_contents_on_disk = False
############## SETTINGS TO AVOID SERVER OVERLOAD ##############
#
# Slow down the robot such that it never requests a second page within
# 'minthrottle' seconds. This can be lengthened if the server is slow,
# but never more than 'maxthrottle' seconds. However - if you are running
# more than one bot in parallel the times are lengthened.
minthrottle = 10
maxthrottle = 60
# Slow down the robot such that it never makes a second page edit within
# 'put_throttle' seconds.
put_throttle = 10
############## SOLVE_DISAMBIGUATION SETTINGS ############
#
# Set disambiguation_comment[FAMILY][LANG] to a non-empty string to override
# the default edit comment for the solve_disambiguation bot.
# Use %s to represent the name of the disambiguation page being treated.
# Example:
#
# disambiguation_comment['wikipedia']['en'] = \
# "Robot-assisted disambiguation ([[WP:DPL|you can help!]]): %s"
disambiguation_comment = {}
sort_ignore_case = False
############## IMAGE RELATED SETTINGS ##############
# If you set this to True, images will be uploaded to Wikimedia
# Commons by default.
upload_to_commons = False
############## TABLE CONVERSION BOT SETTINGS ##############
# will split long paragraphs for better reading the source.
# only table2wiki.py use it by now
splitLongParagraphs = False
# sometimes HTML-tables are indented for better reading.
# That can do very ugly results.
deIndentTables = True
# table2wiki.py works quite stable, so you might switch to True
table2wikiAskOnlyWarnings = True
table2wikiSkipWarnings = False
############## WEBLINK CHECKER SETTINGS ##############
# How many external links should weblinkchecker.py check at the same time?
# If you have a fast connection, you might want to increase this number so
# that slow servers won't slow you down.
max_external_links = 50
report_dead_links_on_talk = False
############## DATABASE SETTINGS ##############
db_hostname = 'localhost'
db_username = 'wikiuser'
db_password = ''
############## SEARCH ENGINE SETTINGS ##############
# Some scripts allow querying Google via the Google Web API. To use this feature,
# you must install the pyGoogle module from http://pygoogle.sf.net/ and have a
# Google Web API license key. Note that Google doesn't give out license keys
# anymore.
# --------------------
# Google web API is obsoleted for long time, now we can use Google AJAX Search API,
# You can signup an API key from http://code.google.com/apis/ajaxsearch/signup.html.
google_key = ''
# using Google AJAX Search API, it requires the referer website, this variable saves the referer web address
# when you sign up with the key.
google_api_refer = ''
# Some scripts allow using the Yahoo! Search Web Services. To use this feature,
# you must install the pYsearch module from http://pysearch.sourceforge.net/
# and get a Yahoo AppID from http://developer.yahoo.com
yahoo_appid = ''
# To use Windows Live Search web service you must get an AppID from
# http://search.msn.com/developer
msn_appid = ''
# Using the Flickr api
flickr = {
'api_key': u'', # Provide your key!
'api_secret': u'', # Api secret of your key (optional)
'review': False, # Do we use automatically make our uploads reviewed?
'reviewer': u'', # If so, under what reviewer name?
}
# Using the Panoramio api
panoramio = {
'review': False, # Do we use automatically make our uploads reviewed?
'reviewer': u'', # If so, under what reviewer name?
}
# for all connections: proxy handle
# to use it, proxy['host'] have to support HTTP and include port number (e.g. localhost:8080)
# if the proxy server needs authentication, set ('ID', 'PASSWORD') to proxy['auth'].
proxy = {
'host': None,
'auth': None,
}
############## COPYRIGHT SETTINGS ##############
# Enable/disable search engine in copyright.py script
copyright_google = True
copyright_yahoo = True
copyright_msn = False
# Perform a deep check, loading URLs to search if 'Wikipedia' is present.
# This may be useful to increase the number of correct results. If you haven't
# a fast connection, you might want to keep them disabled.
copyright_check_in_source_google = False
copyright_check_in_source_yahoo = False
copyright_check_in_source_msn = False
# Web pages may contain a Wikipedia text without the word 'Wikipedia' but with
# the typical '[edit]' tag as a result of a copy & paste procedure. You want
# no report for this kind of URLs, even if they are copyright violations.
# However, when enabled, these URLs are logged in a file.
copyright_check_in_source_section_names = False
# Limit number of queries for page.
copyright_max_query_for_page = 25
# Skip a specified number of queries
copyright_skip_query = 0
# Number of attempts on connection error.
copyright_connection_tries = 10
# Behavior if an exceeded error occur.
#
# Possibilities:
#
# 0 = None
# 1 = Disable search engine
# 2 = Sleep (default)
# 3 = Stop
copyright_exceeded_in_queries = 2
copyright_exceeded_in_queries_sleep_hours = 6
# Append last modified date of URL to script result
copyright_show_date = True
# Append length of URL to script result
copyright_show_length = True
# By default the script tries to identify and skip text that contains a large
# comma separated list or only numbers. But sometimes that might be the
# only part unmodified of a slightly edited and not otherwise reported
# copyright violation. You can disable this feature to try to increase the
# number of results.
copyright_economize_query = True
############## FURTHER SETTINGS ##############
# Use the experimental disk cache to prevent huge memory usage
use_diskcache = False
# Retry loading a page on failure (back off 1 minute, 2 minutes, 4 minutes
# up to 30 minutes)
retry_on_fail = True
# Defines what actions the bots are NOT allowed to do (e.g. 'edit') on wikipedia
# servers. Allows simulation runs of bots to be carried out without changing any
# page on the server side. This setting may be overridden in user_config.py.
actions_to_block = ['edit', 'watch', 'move', 'delete', 'undelete', 'protect',
'emailuser',
'submit', # for non-API
'wbcreateclaim', 'wbsetclaimvalue', # for Wikidata
'wbsetclaim', 'wbeditentity', #
'wbsetlabel', 'wbsetdescription', #
'wbsetaliases', 'wbsetsitelink', #
'wbsetreference', 'wblinktitles', #
'wbremoveclaims', 'wbremovereferences'] #
# How many pages should be put to a queue in asynchroneous mode.
# If maxsize is <= 0, the queue size is infinite.
# Increasing this value will increase memory space but could speed up
# processing. As higher this value this effect will decrease.
max_queue_size = 64
# Define the line separator. Pages retrieved via API have "\n" whereas
# pages fetched from screen (mostly) have "\r\n". Interwiki and category
# separator settings in family files should use multiplied of this.
# LS is a shortcut alias.
line_separator = LS = u'\r\n'
# Added by Robert Frittmann -- How much time should elapse between each
# "put" command. Slows down the write access of the bot.
put_throttle = 5
# End of configuration section
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment