Last active

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Wikipedia bot to swap in address lines for name placeholders

View name.php
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
<?php
require_once( '../mw-peachy/Init.php' );
set_time_limit( 0 );
$site = Peachy::newWiki( 'livingbot' );
$template = 'EH listed building row';
$mysqli = new mysqli( "localhost", 'root' );
/* check connection */
if ( mysqli_connect_errno() ) {
printf( "Connect failed: %s\n", mysqli_connect_error() );
die();
}
$edits = array();
$mysqli->select_db( 'wlm' );
$result = $mysqli->query("SELECT `uid`,`name`,`Address` FROM `data-final` INNER JOIN `data-orig` ON Asset_number=uid WHERE `record_name` LIKE 'No name%'");
while( $row = $result->fetch_assoc() ){
$edits[ $row['uid'] ] = array(
'from' => $row['name'],
'to' => cleanup( $row['Address'] )
);
}
$lists = array();
$pages = $site->allpages( 0, 'Grade I', null, 'all', null, null, array(), array(), 'ascending', 'all', 500 );
foreach( $pages as $page ){
$lists[] = $page['title'];
}
$lists = array_intersect( $lists, $site->embeddedin( "Template:$template", 0, 500 ) );
foreach( $lists as $list ){
echo "Trying $list... \n";
$list = new Page( $site, $list );
$content = $list->get_text();
$sections = explode( '{{'.$template, $content );
$changes = false;
foreach( $sections as &$section ){
$uid = extractParameterFromTemplate( 'uid', $section );
if( $uid === false || !isset( $edits[$uid] ) ) continue;
$name = extractParameterFromTemplate( 'name', $section );
$from = $edits[$uid]['from'];
$to = $edits[$uid]['to'];
if( $name !== $from ) continue; // a human has tweaked this, don't overwrite
if( preg_match( '/ [NSEWC][NSEWC]?[0-9]+$/', $to, $matches ) ) {
// Move postcode
$to = str_replace( $matches[0], '', $to );
$section = preg_replace( "/(\| *location *= *[^|\n]+)/", "$1".$matches[0], $section );
}
$section = preg_replace( "/(\| *name *= *)$from/", "$1".$to, $section );
$changes = true;
}
if( !$changes ) continue;
$content = implode( '{{'.$template, $content );
$list->edit( $content, 'Bot trial ([[Wikipedia:Bots/Requests_for_approval/LivingBot_24|details]]): swap in address lines for name placeholders' );
}
function cleanup( $address ){
// Most commas are bad...
$address = str_replace( ', ', ',', $address );
$address = str_replace( ',', ' ', $address );
$address = str_replace( '-', '', $address ); // hyphen to dash
$address = titleCase( $address );
// But some are good
$address = preg_replace( '/^([0-9]+[a-z]?)( [0-9]+[a-z]?)( [0-9]+[a-z]? )/', "$1,$2,$3", $address );
$address = preg_replace( '/^([0-9]+[a-z]?)( [0-9]+[a-z]? )/', "$1,$2", $address );
return $address;
}
// converts a string into title case
// customised for addresses
function titleCase( $string ) {
if( strlen( $string ) == 0 ) return $string;
// list of words we don't want to capitalize
$smallWords = array(
'and',
'of'
);
// special words that should be written as-is
$specialWords = array(
'II',
'IV',
'VI',
'III',
'VII'
);
// split the string of letters and spaces only into an array
$allWords = explode( ' ', $string );
foreach ( $allWords as &$word ) {
if ( in_array( $word, $specialWords ) || preg_match( '/^[NSEWC][NSEWC]?[0-9]+$/i', $word ) ) {
$word = strtoupper( $word );
} elseif( in_array( strtolower( $word ), $smallWords ) ){
$word = strtolower( $word );
} else {
$word = ucfirst( strtolower( $word ) );
}
}
// convert the array back to a string
$allWords = implode( ' ', $allWords );
$allWords = strtoupper( $allWords[0] ) . substr( $allWords, 1 );
return $allWords;
}
function extractParameterFromTemplate( $parameters, $template ){
if( !is_array( $parameters ) ) $parameters = array( $parameters );
// Trim links
$template = preg_replace( '/\[\[([^]]+\|)?/', '', $template );
$template = str_replace( ']]', '', $template );
foreach( $parameters as $parameter ){
if( !preg_match('/\| *' . $parameter . " *= *(\[\[([^]]+\|)?)?([^|\n]*)/", $template, $matches ) ) continue;
$result = trim( array_pop( $matches ) );
if ( strlen( $result ) > 0 ) return $result;
}
return false;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.