Skip to content

Instantly share code, notes, and snippets.

@ciencia
Last active February 26, 2022 11:53
Show Gist options
  • Save ciencia/f3d02298cdc10823628df46b3a13d42e to your computer and use it in GitHub Desktop.
Save ciencia/f3d02298cdc10823628df46b3a13d42e to your computer and use it in GitHub Desktop.
Grabbers
## INSTALL MEDIAWIKI DATABASE
php maintenance/install.php --server="http://www.mynewwiki.net" --dbname=grabber --dbserver="localhost:/mnt/tmpgrab/mysql/mysql.sock" --installdbuser=currentuser --installdbpass=password --lang=es --pass=aaaaa --dbuser=grabber --dbpass=grabber --scriptpath=/ WikiName "Admin"
## TRUNCATE TABLES PRIOR TO GRABBER
mysql --defaults-file=/etc/my-memory.cnf -p
truncate table user;
truncate table page;
truncate table revision;
truncate table text;
truncate table job;
truncate table user_groups;
truncate table pagelinks;
truncate table externallinks;
## CREATE EXTERNAL STORAGE
mysql --defaults-file=/etc/my-disk.cnf -p
-- Blobs table for external storage
CREATE TABLE /*$wgDBprefix*/blobs (
blob_id integer UNSIGNED NOT NULL AUTO_INCREMENT,
blob_text longblob,
PRIMARY KEY (blob_id)
) ENGINE=InnoDB;
## GRABBERS - HAVE FUN!
# Set end date accordingly and save it
php grabText.php --url=http://es.pokemon.wikia.com/api.php --enddate=20170609155229 --namespaces=0 --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabText`date +'%y%m%d%H%M%S'`.log
# To update text live, remember to use the same start timestamp than the old end
php grabNewText.php --url=http://es.pokemon.wikia.com/api.php --startdate=20170609155229 --enddate=20170728094220 --namespaces=0 --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabNewText`date +'%y%m%d%H%M%S'`.log
# Wanna add more namespaces? Use enddate the same as the highest enddate of other grabbers
php grabText.php --url=http://es.pokemon.wikia.com/api.php --enddate=20170728094220 --namespaces="14|6" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabText`date +'%y%m%d%H%M%S'`.log
# In that case review the namespaces! You must include all namespaces where you have grabbed text previously
php grabNewText.php --url=http://es.pokemon.wikia.com/api.php --startdate=20170728094220 --enddate=20170729073845 --namespaces="0|14|6" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabNewText`date +'%y%m%d%H%M%S'`.log
# Logs, check filters
php grabLogs.php --url=http://es.pokemon.wikia.com/api.php --end=20170729073845 --logtypes="block|protect|rights|delete|upload|move|import|patrol|merge|suppress|abusefilter|renameuser|newusers" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabLogs`date +'%y%m%d%H%M%S'`.log
# Files
php grabFiles.php --url=http://es.pokemon.wikia.com/api.php --wikia --username=BOT --password=PASSORD --enddate=20170727152601 | tee /home/currentuser/outputGrabFiles20170727.log
# Still updating files or logs?
sudo -u php-fpm-wikidex php grabNewFiles.php --url=http://es.pokemon.wikia.com/api.php --wikia --username=BOT --password=PASSORD --startdate=20170801203002 --enddate=20170802201102 2>&1 | tee /home/currentuser/outputGrabNewFiles`date +'%y%m%d%H%M%S'`.log
sudo -u php-fpm-wikidex php grabLogs.php --url=http://es.pokemon.wikia.com/api.php --start=20170801203002 --end=20170802201102 --logtypes="block|protect|rights|delete|upload|move|import|patrol|merge|suppress|abusefilter|renameuser|newusers" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabLogs`date +'%y%m%d%H%M%S'`.log
# This can be done multiple times to populate some metadata. However, the database must be on its final location, otherwise the ramdisk may be too small for all the tables!
sudo -u php-fpm php populateUserTable.php
nice sudo -u php-fpm php ../maintenance/rebuildrecentchanges.php --from=20170801203002 --to=20170802201102
nice sudo -u php-fpm php ../maintenance/refreshLinks-mejor.php --changes-since=20170801203002
nice sudo -u php-fpm php ../maintenance/runJobs.php
## Database settings
$wgDBtype = "mysql";
$wgDBserver = "localhost:/mnt/tmpgrab/mysql/mysql.sock";
$wgDBname = "grabber";
$wgDBuser = "grabber";
$wgDBpassword = "grabber";
# MySQL table options to use during installation or update
$wgDBTableOptions = "ENGINE=InnoDB, DEFAULT CHARSET=binary";
# Periodically send a pingback to https://www.mediawiki.org/ with basic data
# about this MediaWiki instance. The Wikimedia Foundation shares this data
# with MediaWiki developers to help guide future development efforts.
$wgPingback = false;
# Site language code, should be one of the list in ./languages/data/Names.php
$wgLanguageCode = "es";
# REMEMBER TO SET UP NAMESPACES PROPERLY BEFORE IMPORT!!!
# Extra namespaces or some such
$wgExtraNamespaces[110] = "Forum";
$wgExtraNamespaces[111] = "Forum_talk";
$wgExtraNamespaces[420] = "Layer";
$wgExtraNamespaces[421] = "Layer_talk";
$wgExtraNamespaces[828] = "Module";
$wgExtraNamespaces[829] = "Module_talk";
$wgExtraNamespaces[1200] = "Muro";
$wgExtraNamespaces[1201] = "Hilo";
$wgExtraNamespaces[1202] = "Bienvenida_del_Muro";
$wgExtraNamespaces[2000] = "Subforo";
$wgExtraNamespaces[2001] = "Tema_del_foro";
$wgExtraNamespaces[2002] = "Tema";
# Namespace aliases
$wgNamespaceAliases["Forum"] = 110;
$wgNamespaceAliases["Forum talk"] = 111;
$wgNamespaceAliases["Message Wall"] = 1200;
$wgNamespaceAliases["Thread"] = 1201;
$wgNamespaceAliases["Message Wall Greeting"] = 1202;
$wgNamespaceAliases["Board"] = 2000;
$wgNamespaceAliases["Board Thread"] = 2001;
$wgNamespaceAliases["Topic"] = 2002;
$wgNamespaceAliases["Usuaria"] = 2;
$wgNamespaceAliases["Usuario Discusión"] = 3;
$wgNamespaceAliases["Usuaria Discusión"] = 3;
# This is necessary to prevent warnings in maintenance scripts about missing SERVER_NAME T172060
$wgOverrideHostname = 'grabber-wikidex';
# EXTERNAL STORAGE
$wgExternalStores = array('DB');
$wgExternalServers = array( 'text1' => array (
array( 'host' => 'localhost:/run/mysql/mysql-disk.sock', 'user' => 'grabber_text1', 'password' =>'grabber_text1', 'dbname' => 'grabber_text1', 'type' => "mysql", 'load' => 1 ),
) );
$wgDefaultExternalStore = array ( 'DB://text1' );
$wgCompressRevisions = true;
# The following options will be passed to all MariaDB clients
[client]
# password = your_password
#port = 3308
socket = /run/mysql/mysql-disk.sock
#protocol = TCP
[mysqld]
pid-file=/run/mysql/mysql-disk.pid
innodb_file_format=Barracuda
innodb_file_per_table=ON
innodb_compression_level=0
innodb_data_home_dir = /home/mysql-disk/mysql/ibdata
innodb_data_file_path = ibdata1:10M:autoextend
innodb_log_group_home_dir = /home/mysql-disk/mysql/iblog
innodb_buffer_pool_size = 80M
innodb_log_file_size = 50M
innodb_log_buffer_size = 64M
innodb_flush_log_at_trx_commit = 0
innodb_lock_wait_timeout = 25
innodb_log_compressed_pages=OFF
# logging must be off. Remain commented out
# log_bin=mysql-bin
# binlog_format=mixed
# Remove leading # if you want to store your database elsewhere
datadir = /home/mysql-disk/mysql
# required unique id between 1 and 2^32 - 1
# defaults to 1 if master-host is not set
# but will not function as a master if omitted
server-id = 1
# These are commonly set, remove the # and set as required.
#port = 3308
socket = /run/mysql/mysql-disk.sock
skip-networking
sql_mode=NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES
max_allowed_packet=24M
# The following options will be passed to all MariaDB clients
[client]
# password = your_password
#port = 3308
socket = /mnt/tmpgrab/mysql/mysql.sock
#protocol = TCP
[mysqld]
pid-file=/mnt/tmpgrab/mysql/mysql.pid
# Using newer file format that supports dynamic and compressed row formats.
# If you are using replication you have to make sure, that these options are
# set everywhere the same way (probably comment them out is the easiest way)
innodb_file_format=Barracuda
innodb_file_per_table=ON
innodb_data_home_dir = /mnt/tmpgrab/mysql/ibdata
innodb_data_file_path = ibdata1:10M:autoextend
innodb_log_group_home_dir = /mnt/tmpgrab/mysql/iblog
innodb_buffer_pool_size = 50M
innodb_flush_log_at_trx_commit = 0
innodb_lock_wait_timeout = 25
innodb_log_compressed_pages=OFF
# logging must be off. Remain commented out
# log_bin=mysql-bin
# binlog_format=mixed
# Remove leading # if you want to store your database elsewhere
datadir = /mnt/tmpgrab/mysql
# required unique id between 1 and 2^32 - 1
# defaults to 1 if master-host is not set
# but will not function as a master if omitted
server-id = 1
# These are commonly set, remove the # and set as required.
#port = 3308
socket = /mnt/tmpgrab/mysql/mysql.sock
skip-networking
sql_mode=NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES
max_allowed_packet=24M
CREATE USER 'currentuser'@'localhost' IDENTIFIED BY 'password';
GRANT ALL ON *.* TO 'currentuser'@'localhost' WITH GRANT OPTION;
create database grabber_text1;
CREATE USER 'grabber_text1'@'localhost' IDENTIFIED BY 'grabber_text1';
GRANT SELECT,INSERT,UPDATE,DELETE ON grabber_text1 . * TO 'grabber_text1'@'localhost';
CREATE USER 'currentuser'@'localhost' IDENTIFIED BY 'password';
GRANT ALL ON *.* TO 'currentuser'@'localhost' WITH GRANT OPTION;
create database grabber;
CREATE USER 'grabber'@'localhost' IDENTIFIED BY 'grabber';
GRANT SELECT,INSERT,UPDATE,DELETE ON grabber . * TO 'grabber'@'localhost';
#!/bin/bash
MYSQLBASE=/home/mysql-disk/mysql
MYSQLRUN=/run/mysql
MYSQLPID=$MYSQLRUN/mysql-disk.pid
MYSQLDEFAULTS=/etc/my-disk.cnf
function start() {
if [ ! -d $MYSQLRUN ]; then
mkdir $MYSQLRUN
chown -R mysql:mysql $MYSQLRUN
fi
if [ ! -d $MYSQLBASE ]; then
echo
echo "Setup mysql"
mkdir -p $MYSQLBASE
mkdir $MYSQLBASE/ibdata
mkdir $MYSQLBASE/iblog
chown -R mysql:mysql $MYSQLBASE
mysql_install_db --datadir=$MYSQLBASE --defaults-file=$MYSQLDEFAULTS --user=mysql
if [ $? -ne 0 ]; then exit 1; fi
fi
echo
echo "Starting mysqld"
mysqld_safe --defaults-file=$MYSQLDEFAULTS --datadir=$MYSQLBASE &
if [ $? -ne 0 ]; then exit 1; fi
sleep 5
echo
echo "Creating database and users"
mysql --defaults-file=$MYSQLDEFAULTS < ~/setup-mysql-disk.sql
echo
}
function stop() {
if [ -e $MYSQLPID ]; then
echo "Stopping mysqld"
mysqladmin --defaults-file=$MYSQLDEFAULTS shutdown
sleep 3
killproc -v -p $MYSQLPID /usr/bin/mysqld_safe
fi
}
case $1 in
start)
echo
start
echo "Completed."
;;
stop)
echo
stop
echo "Completed."
;;
*)
echo "Must specify start or stop."
exit 1
;;
esac
#!/bin/bash
MOUNTPOINT=/mnt/tmpgrab
MYSQLBASE=$MOUNTPOINT/mysql
MYSQLPID=$MYSQLBASE/mysql.pid
MYSQLDEFAULTS=/etc/my-memory.cnf
TMPFSMAXSIZE=500m #256m
function start() {
if [ ! -d $MOUNTPOINT ]; then
echo "Creating mount point $MOUNTPOINT"
mkdir $MOUNTPOINT
if [ $? -ne 0 ]; then exit 1; fi
chown root:root $MOUNTPOINT
chmod 755 $MOUNTPOINT
fi
findmnt --mtab --target=$MOUNTPOINT
if [ $? -ne 0 ]; then
echo "Mounting tmpfs $MOUNTPOINT"
mount -t tmpfs -o size=${TMPFSMAXSIZE} tmpfs $MOUNTPOINT
if [ $? -ne 0 ]; then exit 1; fi
fi
if [ ! -d $MYSQLBASE ]; then
echo
echo "Setup mysql"
mkdir $MYSQLBASE
mkdir $MYSQLBASE/ibdata
mkdir $MYSQLBASE/iblog
chown -R mysql:mysql $MYSQLBASE
mysql_install_db --datadir=$MYSQLBASE --defaults-file=$MYSQLDEFAULTS --user=mysql
if [ $? -ne 0 ]; then exit 1; fi
fi
echo
echo "Starting mysqld"
mysqld_safe --defaults-file=$MYSQLDEFAULTS --datadir=$MYSQLBASE &
if [ $? -ne 0 ]; then exit 1; fi
sleep 5
echo
echo "Creating database and users"
mysql --defaults-file=$MYSQLDEFAULTS < ~/setup-mysql-mem.sql
echo
}
function stop() {
if [ -e $MYSQLPID ]; then
echo "Stopping mysqld"
mysqladmin --defaults-file=$MYSQLDEFAULTS shutdown
sleep 3
killproc -v -p $MYSQLPID /usr/bin/mysqld_safe
fi
if [ -d $MYSQLBASE ]; then
rm -rf $MYSQLBASE
fi
if [ -d $MOUNTPOINT ]; then
echo "Unmounting $MOUNTPOINT"
umount $MOUNTPOINT
if [ $? -ne 0 ]; then exit 1; fi
fi
}
case $1 in
start)
echo
start
echo "Completed."
;;
stop)
echo
stop
echo "Completed."
;;
*)
echo "Must specify start or stop."
exit 1
;;
esac
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment