Created
August 19, 2013 22:20
-
-
Save AugurNZ/6274885 to your computer and use it in GitHub Desktop.
This is my user-config.py for Pywikipediabot. See the <a href="http://meta.wikimedia.org/wiki/User_talk:AugurNZ" target="_blank">discussion</a> (if it hasn't already been archived) for more details.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# This is an automatically generated file. You can find more configuration | |
# parameters in 'config.py' file. | |
# The family of sites we are working on. wikipedia.py will import | |
# families/xxx_family.py so if you want to change this variable, | |
# you need to write such a file. | |
family = 'thetestwiki' | |
# The language code of the site we're working on. | |
mylang = 'en' | |
# The dictionary usernames should contain a username for each site where you | |
# have a bot account. | |
usernames['thetestwiki']['en'] = u'Augurnz-AbotTOIR' | |
############## LOGFILE SETTINGS ############## | |
# Defines for which scripts a logfile should be enabled. Logfiles will be | |
# saved in the 'logs' subdirectory. | |
# Example: | |
# log = ['interwiki', 'weblinkchecker', 'table2wiki'] | |
# It is also possible to enable logging for all scripts, using this line: | |
# log = ['*'] | |
# To disable all logging, use this: | |
# log = [] | |
# Per default, logging of interwiki.py is enabled because its logfiles can | |
# be used to generate so-called warnfiles. | |
# This setting can be overridden by the -log or -nolog command-line arguments. | |
log = ['interwiki'] | |
# maximal size of a logfile in kilobytes. If the size reached that limit the | |
# logfile will be renamed (if logfilecount is not 0) and the old file is filled | |
# again. logfilesize must be an integer value | |
logfilesize = 1024 | |
# Number of rotating logfiles are created. The older files get the higher | |
# number. If logfilecount is 0, no logfile will be archived but the current | |
# logfile will be overwritten if the file size reached the logfilesize above. | |
# If logfilecount is -1 there are no rotating logfiles but the files where | |
# renamed if the logfile is full. The newest file gets the highest number until | |
# some logfiles where deleted. | |
logfilecount = 5 | |
# logging handler to use, you can choose between: 'TRFH' (TimedRotatingFile- | |
# Handler), 'RFH' (RotatingFileHandler), ... more might come. | |
loghandler = 'TRFH' | |
############## INTERWIKI SETTINGS ############## | |
# Should interwiki.py report warnings for missing links between foreign | |
# languages? | |
interwiki_backlink = True | |
# Should interwiki.py display every new link it discovers? | |
interwiki_shownew = True | |
# Should interwiki.py output a graph PNG file on conflicts? | |
# You need pydot for this: http://dkbza.org/pydot.html | |
interwiki_graph = False | |
# Specifies that the robot should process that amount of subjects at a time, | |
# only starting to load new pages in the original language when the total | |
# falls below that number. Default is to process (at least) 100 subjects at | |
# once. | |
interwiki_min_subjects = 100 | |
# If interwiki graphs are enabled, which format(s) should be used? | |
# Supported formats include png, jpg, ps, and svg. See: | |
# http://www.graphviz.org/doc/info/output.html | |
# If you want to also dump the dot files, you can use this in your | |
# user-config.py: | |
# interwiki_graph_formats = ['dot', 'png'] | |
# If you need a PNG image with an HTML image map, use this: | |
# interwiki_graph_formats = ['png', 'cmap'] | |
# If you only need SVG images, use: | |
# interwiki_graph_formats = ['svg'] | |
interwiki_graph_formats = ['png'] | |
# You can post the contents of your autonomous_problems.dat to the wiki, | |
# e.g. to http://de.wikipedia.org/wiki/Wikipedia:Interwiki-Konflikte . | |
# This allows others to assist you in resolving interwiki problems. | |
# To help these people, you can upload the interwiki graphs to your | |
# webspace somewhere. Set the base URL here, e.g.: | |
# 'http://www.example.org/~yourname/interwiki-graphs/' | |
interwiki_graph_url = None | |
# Save file with local articles without interwikis. | |
without_interwiki = False | |
# Experimental feature: | |
# Store the page contents on disk (/cache/ directory) instead of loading | |
# them in RAM. | |
interwiki_contents_on_disk = False | |
############## SETTINGS TO AVOID SERVER OVERLOAD ############## | |
# | |
# Slow down the robot such that it never requests a second page within | |
# 'minthrottle' seconds. This can be lengthened if the server is slow, | |
# but never more than 'maxthrottle' seconds. However - if you are running | |
# more than one bot in parallel the times are lengthened. | |
minthrottle = 10 | |
maxthrottle = 60 | |
# Slow down the robot such that it never makes a second page edit within | |
# 'put_throttle' seconds. | |
put_throttle = 10 | |
############## SOLVE_DISAMBIGUATION SETTINGS ############ | |
# | |
# Set disambiguation_comment[FAMILY][LANG] to a non-empty string to override | |
# the default edit comment for the solve_disambiguation bot. | |
# Use %s to represent the name of the disambiguation page being treated. | |
# Example: | |
# | |
# disambiguation_comment['wikipedia']['en'] = \ | |
# "Robot-assisted disambiguation ([[WP:DPL|you can help!]]): %s" | |
disambiguation_comment = {} | |
sort_ignore_case = False | |
############## IMAGE RELATED SETTINGS ############## | |
# If you set this to True, images will be uploaded to Wikimedia | |
# Commons by default. | |
upload_to_commons = False | |
############## TABLE CONVERSION BOT SETTINGS ############## | |
# will split long paragraphs for better reading the source. | |
# only table2wiki.py use it by now | |
splitLongParagraphs = False | |
# sometimes HTML-tables are indented for better reading. | |
# That can do very ugly results. | |
deIndentTables = True | |
# table2wiki.py works quite stable, so you might switch to True | |
table2wikiAskOnlyWarnings = True | |
table2wikiSkipWarnings = False | |
############## WEBLINK CHECKER SETTINGS ############## | |
# How many external links should weblinkchecker.py check at the same time? | |
# If you have a fast connection, you might want to increase this number so | |
# that slow servers won't slow you down. | |
max_external_links = 50 | |
report_dead_links_on_talk = False | |
############## DATABASE SETTINGS ############## | |
db_hostname = 'localhost' | |
db_username = 'wikiuser' | |
db_password = '' | |
############## SEARCH ENGINE SETTINGS ############## | |
# Some scripts allow querying Google via the Google Web API. To use this feature, | |
# you must install the pyGoogle module from http://pygoogle.sf.net/ and have a | |
# Google Web API license key. Note that Google doesn't give out license keys | |
# anymore. | |
# -------------------- | |
# Google web API is obsoleted for long time, now we can use Google AJAX Search API, | |
# You can signup an API key from http://code.google.com/apis/ajaxsearch/signup.html. | |
google_key = '' | |
# using Google AJAX Search API, it requires the referer website, this variable saves the referer web address | |
# when you sign up with the key. | |
google_api_refer = '' | |
# Some scripts allow using the Yahoo! Search Web Services. To use this feature, | |
# you must install the pYsearch module from http://pysearch.sourceforge.net/ | |
# and get a Yahoo AppID from http://developer.yahoo.com | |
yahoo_appid = '' | |
# To use Windows Live Search web service you must get an AppID from | |
# http://search.msn.com/developer | |
msn_appid = '' | |
# Using the Flickr api | |
flickr = { | |
'api_key': u'', # Provide your key! | |
'api_secret': u'', # Api secret of your key (optional) | |
'review': False, # Do we use automatically make our uploads reviewed? | |
'reviewer': u'', # If so, under what reviewer name? | |
} | |
# Using the Panoramio api | |
panoramio = { | |
'review': False, # Do we use automatically make our uploads reviewed? | |
'reviewer': u'', # If so, under what reviewer name? | |
} | |
# for all connections: proxy handle | |
# to use it, proxy['host'] have to support HTTP and include port number (e.g. localhost:8080) | |
# if the proxy server needs authentication, set ('ID', 'PASSWORD') to proxy['auth']. | |
proxy = { | |
'host': None, | |
'auth': None, | |
} | |
############## COPYRIGHT SETTINGS ############## | |
# Enable/disable search engine in copyright.py script | |
copyright_google = True | |
copyright_yahoo = True | |
copyright_msn = False | |
# Perform a deep check, loading URLs to search if 'Wikipedia' is present. | |
# This may be useful to increase the number of correct results. If you haven't | |
# a fast connection, you might want to keep them disabled. | |
copyright_check_in_source_google = False | |
copyright_check_in_source_yahoo = False | |
copyright_check_in_source_msn = False | |
# Web pages may contain a Wikipedia text without the word 'Wikipedia' but with | |
# the typical '[edit]' tag as a result of a copy & paste procedure. You want | |
# no report for this kind of URLs, even if they are copyright violations. | |
# However, when enabled, these URLs are logged in a file. | |
copyright_check_in_source_section_names = False | |
# Limit number of queries for page. | |
copyright_max_query_for_page = 25 | |
# Skip a specified number of queries | |
copyright_skip_query = 0 | |
# Number of attempts on connection error. | |
copyright_connection_tries = 10 | |
# Behavior if an exceeded error occur. | |
# | |
# Possibilities: | |
# | |
# 0 = None | |
# 1 = Disable search engine | |
# 2 = Sleep (default) | |
# 3 = Stop | |
copyright_exceeded_in_queries = 2 | |
copyright_exceeded_in_queries_sleep_hours = 6 | |
# Append last modified date of URL to script result | |
copyright_show_date = True | |
# Append length of URL to script result | |
copyright_show_length = True | |
# By default the script tries to identify and skip text that contains a large | |
# comma separated list or only numbers. But sometimes that might be the | |
# only part unmodified of a slightly edited and not otherwise reported | |
# copyright violation. You can disable this feature to try to increase the | |
# number of results. | |
copyright_economize_query = True | |
############## FURTHER SETTINGS ############## | |
# Use the experimental disk cache to prevent huge memory usage | |
use_diskcache = False | |
# Retry loading a page on failure (back off 1 minute, 2 minutes, 4 minutes | |
# up to 30 minutes) | |
retry_on_fail = True | |
# Defines what actions the bots are NOT allowed to do (e.g. 'edit') on wikipedia | |
# servers. Allows simulation runs of bots to be carried out without changing any | |
# page on the server side. This setting may be overridden in user_config.py. | |
actions_to_block = ['edit', 'watch', 'move', 'delete', 'undelete', 'protect', | |
'emailuser', | |
'submit', # for non-API | |
'wbcreateclaim', 'wbsetclaimvalue', # for Wikidata | |
'wbsetclaim', 'wbeditentity', # | |
'wbsetlabel', 'wbsetdescription', # | |
'wbsetaliases', 'wbsetsitelink', # | |
'wbsetreference', 'wblinktitles', # | |
'wbremoveclaims', 'wbremovereferences'] # | |
# How many pages should be put to a queue in asynchroneous mode. | |
# If maxsize is <= 0, the queue size is infinite. | |
# Increasing this value will increase memory space but could speed up | |
# processing. As higher this value this effect will decrease. | |
max_queue_size = 64 | |
# Define the line separator. Pages retrieved via API have "\n" whereas | |
# pages fetched from screen (mostly) have "\r\n". Interwiki and category | |
# separator settings in family files should use multiplied of this. | |
# LS is a shortcut alias. | |
line_separator = LS = u'\r\n' | |
# Added by Robert Frittmann -- How much time should elapse between each | |
# "put" command. Slows down the write access of the bot. | |
put_throttle = 5 | |
# End of configuration section |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment