Created
July 4, 2011 18:19
-
-
Save whym/1063741 to your computer and use it in GitHub Desktop.
a category query tool for Wikimedia wikis, forked from CatScan 2.0β: https://fisheye.toolserver.org/browse/Magnus/catscan_rewrite.php?r=117
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?PHP | |
/* | |
Rewrite of CatScan for Wikimedia Deutschland | |
(c) 2009 by Magnus Manske | |
Released under GPL | |
*/ | |
error_reporting(E_ERROR|E_CORE_ERROR|E_ALL|E_COMPILE_ERROR); | |
ini_set('display_errors', 'On'); | |
ini_set ('memory_limit', 1024*1024*96); // 96 MB | |
ini_set('user_agent','CatScan 2 (Magnus)'); # Fake user agent | |
if (!function_exists('mb_ucfirst') && function_exists('mb_substr')) { | |
function mb_ucfirst($string) { | |
$string = mb_strtoupper(mb_substr($string, 0, 1)) . mb_substr($string, 1); | |
return $string; | |
} | |
} | |
if (!function_exists('mb_lcfirst') && function_exists('mb_substr')) { | |
function mb_lcfirst($string) { | |
$string = mb_strtolower(mb_substr($string, 0, 1)) . mb_substr($string, 1); | |
return $string; | |
} | |
} | |
class CatScan { | |
function CatScan ( $auto_fill = true ) { | |
$this->last_mysql_db = '' ; | |
$this->max_temp_objects = 30000 ; | |
$this->use_db_memory = 'STORAGE MEMORY' ; // Set to blank string for "normal" temporary table, or 'STORAGE MEMORY' for memory | |
$this->basecats = array () ; | |
$this->negcats = array () ; | |
$this->has_flagged_revisions = false ; | |
$this->depth = 0 ; | |
$this->interface_language = $this->getParameter ( 'interface_language' , 'en' ) ; | |
if ( $auto_fill ) $this->getParameters() ; | |
$this->i18n_url = 'http://meta.wikimedia.org/w/index.php?title=CatScan2/Interface' ; | |
$this->loadLocale () ; | |
$this->sql_comment = '/* catscan_rewrite */' ; | |
} | |
function loadLocale () { | |
// Load interface language from wiki page | |
$text = file_get_contents ( $this->i18n_url . "&action=raw" ) ; | |
$text = explode ( "\n" , $text ) ; | |
$loc = array () ; | |
$lang = '' ; | |
foreach ( $text AS $line ) { | |
$line = trim ( $line ) ; | |
if ( substr ( $line , 0 , 2 ) == '==' ) { | |
$lang = trim ( mb_strtolower ( str_replace ( '=' , '' , $line ) ) ) ; | |
$loc[$lang] = array () ; | |
} else if ( substr ( $line , 0 , 1 ) == ';' ) { | |
$line = trim ( substr ( $line , 1 ) ) ; | |
$l = explode ( ':' , $line , 2 ) ; | |
if ( count ( $l ) != 2 ) continue ; | |
$loc[$lang][trim(mb_strtolower($l[0]))] = $l[1] ; | |
} | |
} | |
$this->available_interface_languages = array_keys ( $loc ) ; | |
if ( isset ( $loc[$this->interface_language] ) ) $this->loc = $loc[$this->interface_language] ; | |
else $this->loc = $loc['en'] ; // Fallback | |
// Namespaces namespaces | |
$this->namespaces_en = array () ; | |
$this->namespaces = array () ; | |
$api_url = "http://{$this->language}.{$this->project}.org/w/api.php" ; | |
$url = "{$api_url}?action=query&meta=siteinfo&siprop=general|namespaces|namespacealiases|statistics&format=php" ; | |
$data = unserialize ( file_get_contents ( $url ) ) ; | |
$data = $data['query']['namespaces'] ; | |
// print "<pre>" ; print_r ( $data ) ; print "</pre>" ; | |
foreach ( $data AS $num => $d ) { | |
// print "<pre>" ; print_r ( $d ) ; print "</pre>" ; | |
unset ( $v ) ; | |
unset ( $vc ) ; | |
if ( isset ( $d['*'] ) ) $v = $d['*'] ; | |
if ( isset ( $d['canonical'] ) ) $vc = $d['canonical'] ; | |
if ( $num == 0 ) { | |
if ( !isset ( $v ) || $v == '' ) $v = $this->loc['namespace_0'] ; | |
if ( !isset ( $vc ) || $vc == '' ) $vc = $loc['en']['namespace_0'] ; | |
} | |
// print "$v / $vc<hr/>" ; | |
if ( isset ( $v ) ) $this->namespaces[$num] = $v ; | |
if ( isset ( $vc ) ) $this->namespaces_en[$num] = $vc ; | |
} | |
foreach ( $this->loc AS $k => $v ) { // Legacy cleanup | |
if ( substr ( $k , 0 , 10 ) != 'namespace_' ) continue ; | |
unset ( $this->loc[$k] ) ; | |
} | |
/* | |
foreach ( $loc['en'] AS $k => $v ) { | |
if ( substr ( $k , 0 , 10 ) != 'namespace_' ) continue ; | |
$nsn = substr ( $k , 10 ) ; | |
$this->namespaces_en[$nsn] = $v ; | |
// unset ( $this->loc[$k] ) ; | |
} | |
foreach ( $this->loc AS $k => $v ) { | |
if ( substr ( $k , 0 , 10 ) != 'namespace_' ) continue ; | |
$nsn = substr ( $k , 10 ) ; | |
$this->namespaces[$nsn] = $v ; | |
unset ( $this->loc[$k] ) ; | |
} | |
*/ | |
$this->sort_modes = array ( 'none' , 'title' , 'ns_title' , 'size' , 'date' , 'filesize' , 'uploaddate' ) ; | |
$this->formats = array ( | |
'html' => 'HTML' , | |
'csv' => 'CSV' , | |
'tsv' => 'TSV' , | |
'wiki' => 'Wiki' , | |
'php' => 'PHP' , | |
'xml' => 'XML' , | |
'json' => 'JSON' , | |
) ; | |
// Initialize language | |
if ( $this->language == $this->interface_language ) { | |
if ( isset ( $loc[$this->interface_language] ) ) { | |
if ( isset ( $loc[$this->interface_language]['auto_lang'] ) ) { | |
$this->language = $loc[$this->interface_language]['auto_lang'] ; | |
} | |
} | |
} | |
} | |
function iDie ( $text ) { | |
if ( $this->format == 'xml' ) { | |
header('Content-type: text/xml; charset=utf-8'); | |
print "<error>" ; | |
print htmlspecialchars ( $text ) ; | |
print "</error>" ; | |
} else if ( $this->format == 'wiki' or $this->format == 'csv' or $this->format == 'tsv' ) { | |
header('Content-type: text/plain; charset=utf-8'); | |
print "ERROR : $text " ; | |
} else { | |
header('Content-type: text/html; charset=utf-8'); | |
print $text ; | |
} | |
exit () ; | |
} | |
// ATTENTION! THIS NEEDS TO BE ADAPTED TO LOCATION!!! | |
function getDBpassword () { | |
$this->mysql_user = "whym" ; | |
$this->db2temp = "u_" . $this->mysql_user ; // Needs to exist on all servers! | |
if ( !file_exists ( "../.my.cnf" ) ) $this->iDie ( "Could not acquire DB password" ) ; | |
$t = file_get_contents ( "../.my.cnf" ) ; | |
$lines = explode ( "\n" , $t ) ; | |
foreach ( $lines AS $l ) { | |
$l = trim ( $l ) ; | |
if ( substr ( $l , 0 , 8 ) != 'password' ) continue ; | |
$l = explode ( '"' , $l ) ; | |
array_shift ( $l ) ; | |
$this->mysql_password = array_shift ( $l ) ; | |
} | |
} | |
function getDBname () { | |
$ret = $this->language ; | |
if ( $this->language == 'commons' ) $ret = 'commonswiki_p' ; | |
else if ( $this->project == 'wikipedia' ) $ret .= 'wiki_p' ; | |
else if ( $this->project == 'wikisource' ) $ret .= 'wikisource_p' ; | |
else if ( $this->project == 'wikibooks' ) $ret .= 'wikibooks_p' ; | |
else if ( $this->project == 'wikinews' ) $ret .= 'wikinews_p' ; | |
else if ( $this->project == 'wikiversity' ) $ret .= 'wikiversity_p' ; | |
else $this->iDie ( "Cannot construct database name for $language.$project - aborting." ) ; | |
return $ret ; | |
} | |
function getDBconnection () { | |
if ( isset ( $this->mysql_con ) ) return $this->mysql_con ; // Paranoia | |
if ( !isset ( $this->mysql_password ) ) $this->getDBpassword () ; | |
$project = $this->project ; | |
if ( $project == "wikipedia" ) $project = "wiki" ; | |
$nlang = str_replace ( 'classic' , 'classical' , $this->language ) ; | |
if ( $this->language == 'commons' ) $project = 'wiki' ; | |
$server = $nlang . $project . "-p.db.ts.wikimedia.org" ; | |
if ( !$this->mysql_con = @mysql_connect ( $server , $this->mysql_user , $this->mysql_password ) ) { | |
$this->iDie ( "Could not connect to $server : " . mysql_error() ) ; | |
} | |
} | |
function initializeDBHandle () { | |
$this->getDBconnection () ; | |
$this->db = $this->getDBname () ; | |
} | |
function getParameter ( $key , $default = '' ) { | |
if ( $default == '' and isset ( $this->default_parameter[$key] ) ) $default = $this->default_parameter[$key] ; | |
if ( !isset ( $_REQUEST[$key] ) ) return $default ; | |
$v = $_REQUEST[$key] ; | |
return $v ; | |
} | |
function getParameters () { | |
$this->templates = array ( 'templates_yes' , 'templates_any' , 'templates_no' ) ; | |
// Set default values | |
$this->default_parameter = array ( | |
'project' => 'wikipedia' , | |
'depth' => 0 , | |
'format' => 'html' , | |
'sortby' => 'none' , | |
'sortorder' => 'ascending' , | |
'atleast_count' => 0 , | |
'min_redlink_count' => 1 , | |
'show_redirects' => 'both' , | |
'edits' => array ( 'bots' => 'both' , 'anons' => 'both' , 'flagged' => 'both' ) , | |
) ; | |
// Get misc parameters | |
$this->language = $this->getParameter ( 'language' , $this->interface_language ) ; | |
$this->project = $this->getParameter ( 'project' ) ; | |
$this->depth = floor ( $this->getParameter ( 'depth' ) ) ; | |
$this->format = $this->getParameter ( 'format' ) ; | |
$this->before = $this->getParameter ( 'before' ) ; | |
$this->after = $this->getParameter ( 'after' ) ; | |
$this->fbefore = $this->getParameter ( 'fbefore' ) ; | |
$this->fafter = $this->getParameter ( 'fafter' ) ; | |
$this->larger = $this->getParameter ( 'larger' ) ; | |
$this->smaller = $this->getParameter ( 'smaller' ) ; | |
$this->minlinks = $this->getParameter ( 'minlinks' ) ; | |
$this->maxlinks = $this->getParameter ( 'maxlinks' ) ; | |
$this->sort_by = $this->getParameter ( 'sortby' ) ; | |
$this->sort_order = $this->getParameter ( 'sortorder' ) ; | |
$this->atleast_count = floor ( $this->getParameter ( 'atleast_count' ) ) ; | |
$this->ext_image_data = $this->getParameter ( 'ext_image_data' , !isset($_REQUEST['doit']) ) ; | |
$this->show_redlinks = isset ( $_REQUEST['show_redlinks'] ) ; | |
$this->show_redlinks_only = isset ( $_REQUEST['show_redlinks_only'] ) ; | |
$this->remove_template_redlinks = isset ( $_REQUEST['remove_template_redlinks'] ) ; | |
$this->article_redlinks_only = isset ( $_REQUEST['article_redlinks_only'] ) ; | |
$this->min_redlink_count = $this->getParameter ( 'min_redlink_count' ) ; | |
$this->show_redirects = $this->getParameter ( 'show_redirects' ) ; | |
$this->templates_use_talk_yes = isset ( $_REQUEST['templates_use_talk_yes'] ) ; | |
$this->templates_use_talk_any = isset ( $_REQUEST['templates_use_talk_any'] ) ; | |
$this->templates_use_talk_no = isset ( $_REQUEST['templates_use_talk_no'] ) ; | |
if ( $this->language == 'commons' ) $this->project = 'wikimedia' ; | |
foreach ( $this->templates AS $t ) { | |
$u = explode ( "\n" , trim ( $this->getParameter ( $t , '' ) ) ) ; | |
if ( count ( $u ) == 1 and $u[0] == '' ) $u = array () ; | |
$this->$t = $u ; | |
} | |
if ( isset ( $_REQUEST['ns'] ) ) $this->ns = $_REQUEST['ns'] ; | |
else $this->ns = array ( 0 => 1 ) ; | |
if ( isset ( $_REQUEST['comb'] ) ) $this->comb = $_REQUEST['comb'] ; | |
else $this->comb = array ( 'subset' => '1' ) ; | |
if ( isset ( $_REQUEST['edits'] ) ) $this->edits = $_REQUEST['edits'] ; | |
else $this->edits = $this->default_parameter['edits'] ; | |
foreach ( $this->default_parameter['edits'] AS $e ) { | |
if ( !isset ( $this->edits[$e] ) ) $this->edits[$e] = 'both' ; | |
} | |
// Get categories, either as multiline text or as category1/category2 | |
$categories = $this->getParameter ( 'categories' , '' ) ; | |
$categories = explode ( "\n" , $categories ) ; | |
$categories[] = $this->getParameter ( 'category1' , '' ) ; | |
$categories[] = $this->getParameter ( 'category2' , '' ) ; | |
foreach ( $categories AS $c ) { | |
$c = trim ( $c ) ; | |
if ( $c == '' ) continue ; | |
$this->basecats[] = $c ; | |
} | |
// Negative categories | |
$negcats = $this->getParameter ( 'negcats' , '' ) ; | |
$negcats = explode ( "\n" , $negcats ) ; | |
foreach ( $negcats AS $c ) { | |
$c = trim ( $c ) ; | |
if ( $c == '' ) continue ; | |
$this->negcats[] = $c ; | |
} | |
$this->basecats_orig = $this->basecats ; | |
$this->negcats_orig = $this->negcats ; | |
} | |
function makeDBsafe ( &$title ) { | |
$title = mb_ucfirst ( trim ( $title ) ) ; | |
$title = str_replace ( ' ' , '_' , $title ) ; | |
$title = str_replace ( '"' , '\"' , $title ) ; | |
} | |
function getDBsafe ( $title ) { | |
$this->makeDBsafe ( $title ) ; | |
return $title ; | |
} | |
function getTime () { | |
$ret = microtime() ; | |
$ret = explode ( ' ' , $ret ) ; | |
return $ret[0] + $ret[1] ; | |
} | |
function filterMasterTableNegative ( $ttname ) { | |
// Filter for negative categories | |
if ( count ( $this->negcats ) == 0 ) return ; | |
$sql = "DELETE FROM $ttname WHERE pid IN ( SELECT DISTINCT cl_from FROM {$this->db}.categorylinks,cat1_neg WHERE cl_to=catname )" ; | |
$this->query ( $this->db2temp , $sql ) ; | |
} | |
function createTemporaryMasterTable () { | |
$sql = "CREATE TEMPORARY TABLE temp ( pid INTEGER , ptitle VARCHAR(255) , pns INTEGER , ptouched VARCHAR(14) , prev INTEGER , plen INTEGER , pgroup INTEGER ) {$this->use_db_memory}" ; | |
$this->query ( $this->db2temp , $sql ) ; | |
$where = array () ; | |
// Time filter | |
$additional_table_from = '' ; | |
if ( $this->before != '' or $this->after != '' ) { | |
if ( $this->before != '' ) $where[] = 'rev_timestamp <= "' . $this->getDBsafe ( $this->before ) . '"' ; | |
if ( $this->after != '' ) $where[] = 'rev_timestamp >= "' . $this->getDBsafe ( $this->after ) . '"' ; | |
$where[] = 'rev_id = page_latest' ; | |
$additional_table_from = ",{$this->db}.revision" ; | |
} | |
// Time filter for first edit | |
if ( $this->fbefore != '' or $this->fafter != '' ) { | |
if ( $this->fbefore != '' ) $where[] = 'rev_timestamp <= "' . $this->getDBsafe ( $this->fbefore ) . '"' ; | |
if ( $this->fafter != '' ) $where[] = 'rev_timestamp >= "' . $this->getDBsafe ( $this->fafter ) . '"' ; | |
$where[] = 'rev_timestamp = (SELECT min(r2.rev_timestamp) FROM ' . "{$this->db}.revision r2 " . 'WHERE r2.rev_page = page_id)' ; | |
$additional_table_from = ",{$this->db}.revision" ; | |
} | |
// Size filter | |
if ( $this->larger != '' ) $where[] = 'page_len >= ' . $this->getDBsafe ( $this->larger ) ; | |
if ( $this->smaller != '' ) $where[] = 'page_len <= ' . $this->getDBsafe ( $this->smaller ) ; | |
if ( $this->show_redirects == 'yes' ) $where[] = 'page_is_redirect=1' ; | |
if ( $this->show_redirects == 'no' ) $where[] = 'page_is_redirect=0' ; | |
if ( count ( $this->basecats ) == 0 ) { | |
$this->template_primary = true ; | |
$this->template_group_limit = 0 ; | |
$tl = array () ; | |
if ( count ( $this->templates_yes ) > 0 ) { | |
foreach ( $this->templates_yes AS $t ) $tl[] = $this->getDBsafe ( $t ) ; | |
$this->template_group_limit = count ( $tl ) ; | |
} else { | |
foreach ( $this->templates_any AS $t ) $tl[] = $this->getDBsafe ( $t ) ; | |
} | |
$where[] = "page_id=tl_from" ; | |
$where[] = 'page_namespace IN (' . implode ( ',' , array_keys ( $this->ns ) ) . ')' ; | |
$where = implode ( " AND " , $where ) ; | |
$groupcnt = 0 ; | |
foreach ( $tl AS $t ) { | |
$sql = "INSERT INTO temp ( pid,ptitle,pns,ptouched,prev,plen,pgroup) " ; | |
$sql .= "SELECT DISTINCT page_id,page_title,page_namespace,page_touched,page_latest,page_len,$groupcnt " ; | |
$sql .= "FROM {$this->db}.page,{$this->db}.templatelinks {$additional_table_from} WHERE tl_title=\"$t\" AND tl_namespace=10 AND $where" ; | |
// print "$sql<hr/>" ; flush(); | |
$groupcnt++ ; | |
$this->query ( $this->db2temp , $sql ) ; | |
} | |
} else { | |
$this->template_primary = false ; | |
$where[] = "catname=cl_to" ; | |
$where[] = "page_id=cl_from" ; | |
$where[] = 'page_namespace IN (' . implode ( ',' , array_keys ( $this->ns ) ) . ')' ; | |
$where = implode ( " AND " , $where ) ; | |
$sql = "INSERT INTO temp ( pid,ptitle,pns,ptouched,prev,plen,pgroup) " ; | |
$sql .= "SELECT DISTINCT page_id,page_title,page_namespace,page_touched,page_latest,page_len,catgroup " ; | |
$sql .= "FROM {$this->db}.page,{$this->db}.categorylinks,cat1 {$additional_table_from} WHERE $where" ; | |
$this->query ( $this->db2temp , $sql ) ; | |
} | |
} | |
function getCombinationTempTable ( $type ) { | |
$ret = array ( 'n' => 'combination' , '*' => array() , 'a' => array () ) ; | |
$cnt = 0 ; | |
foreach ( $this->catgroups AS $base => $cg ) $cnt = $cg ; | |
$sql = "CREATE TEMPORARY TABLE temp2 ( pid INTEGER , ptitle VARCHAR(255) , pns INTEGER , ptouched VARCHAR(14) , prev INTEGER , plen INTEGER , pgroup INTEGER , groupcount INTEGER , linkcount INTEGER NULL ) {$this->use_db_memory}" ; | |
$this->query ( $this->db2temp , $sql ) ; | |
// Table names | |
$rev = "{$this->db}.revision" ; | |
$ug = "{$this->db}.user_groups" ; | |
$fp = "{$this->db}.flaggedpages" ; | |
$pl = "{$this->db}.pagelinks" ; | |
$table_page = "{$this->db}.page" ; | |
$table_tl = "{$this->db}.templatelinks" ; | |
$order = "pid,ptitle,pns,ptouched,prev,plen,pgroup" ; | |
if ( $this->template_primary ) { | |
$cnt = $this->template_group_limit ; | |
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt >= $cnt" ; | |
} else { | |
// Construct main query | |
if ( $type == 'subset' ) { | |
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt = $cnt" ; | |
} else if ( $type == 'union' ) { | |
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid" ; | |
} else if ( $type == 'diff' ) { | |
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt = 1" ; | |
} else if ( $type == 'atleast' ) { | |
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt >= {$this->atleast_count}" ; | |
} | |
} | |
$sql = "INSERT INTO temp2 ($order,groupcount) $sql" ; | |
// Run main query | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
// HERE BE EXPENSIVE FILTERS! | |
// Works by removing all entries from temp2 that do NOT match the filters | |
// Links | |
if ( $type == 'subset' and $this->minlinks . $this->maxlinks != '' ) { | |
$min = $this->getDBsafe ( $this->minlinks ) ; | |
$max = $this->getDBsafe ( $this->maxlinks ) ; | |
$sql = "UPDATE temp2 SET linkcount = ( SELECT count(*) AS lc FROM $pl WHERE pl_from=pid)" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
$sql = "DELETE FROM temp2 WHERE linkcount " ; | |
if ( $min == '' ) $sql .= "> $max" ; | |
else if ( $max == '' ) $sql .= "< $min" ; | |
else $sql .= "> $max OR linkcount < $min" ; | |
$res = $this->query ( $this->db2temp , $sql , $this->query ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
} | |
// Anons | |
if ( $this->edits['anons'] != 'both' ) { | |
if ( $this->edits['anons'] == 'yes' ) $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT rev_id FROM $rev WHERE rev_id=prev AND rev_user<>0)" ; | |
else $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT rev_id FROM $rev WHERE rev_id=prev AND rev_user=0)" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
} | |
// Bots | |
if ( $this->edits['bots'] != 'both' ) { | |
if ( $this->edits['bots'] == 'yes' ) $sql = "DELETE FROM temp2 WHERE prev NOT IN ( SELECT DISTINCT rev_id FROM $rev,$ug WHERE rev_id=prev AND rev_user=ug_user AND ug_group='bot')" ; | |
else $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT rev_id FROM $rev,$ug WHERE rev_id=prev AND rev_user=ug_user AND ug_group='bot')" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
} | |
// Flagged revisions | |
if ( $this->edits['flagged'] != 'both' ) { | |
if ( !$this->has_flagged_revisions ) $this->iDie ( "Flagged revisions not enabeled on {$this->language}.{$this->project}.org" ) ; | |
if ( $this->edits['flagged'] == 'yes' ) $sql = "DELETE FROM temp2 WHERE prev NOT IN ( SELECT DISTINCT fp_stable FROM $fp WHERE pid=fp_page_id AND fp_stable=prev AND fp_reviewed=1 )" ; | |
else $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT fp_stable FROM $fp WHERE pid=fp_page_id AND fp_stable=prev AND fp_reviewed=1 )" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
} | |
if ( !$this->template_primary ) { | |
// Templates : All | |
if ( count ( $this->templates_yes ) > 0 ) { | |
$tl = array () ; | |
foreach ( $this->templates_yes AS $t ) $tl[] = $this->getDBsafe ( $t ) ; | |
$tlc = count ( $tl ) ; | |
$tl = '"' . implode ( '","' , $tl ) . '"' ; | |
if ( $this->templates_use_talk_yes ) { | |
$sql = "DELETE FROM temp2 WHERE $tlc > ( SELECT count(DISTINCT tl_title) FROM $table_tl,$table_page WHERE MOD(pns,2)=0 AND tl_from=page_id AND page_title=ptitle AND page_namespace=pns+1 AND tl_title IN ( $tl ) )" ; | |
} else { | |
$sql = "DELETE FROM temp2 WHERE $tlc > ( SELECT count(DISTINCT tl_title) FROM $table_tl WHERE tl_from=pid AND tl_title IN ( $tl ) )" ; | |
} | |
$this->query ( $this->db2temp , $sql ) ; | |
} | |
} | |
// Templates : Any | |
if ( !$this->template_primary || count ( $this->templates_yes ) > 0 ) { | |
if ( count ( $this->templates_any ) > 0 ) { | |
$tl = array () ; | |
foreach ( $this->templates_any AS $t ) $tl[] = $this->getDBsafe ( $t ) ; | |
$tlc = count ( $tl ) ; | |
$tl = '"' . implode ( '","' , $tl ) . '"' ; | |
if ( $this->templates_use_talk_any ) { | |
$sql = "DELETE FROM temp2 WHERE 0 = ( SELECT count(*) FROM $table_tl,$table_page WHERE MOD(pns,2)=0 AND tl_from=page_id AND page_title=ptitle AND page_namespace=pns+1 AND tl_title IN ( $tl ) )" ; | |
} else { | |
$sql = "DELETE FROM temp2 WHERE 0 = ( SELECT count(*) FROM $table_tl WHERE tl_from=pid AND tl_title IN ( $tl ) )" ; | |
} | |
$this->query ( $this->db2temp , $sql ) ; | |
} | |
} | |
// Templates : None | |
if ( count ( $this->templates_no ) > 0 ) { | |
$tl = array () ; | |
foreach ( $this->templates_no AS $t ) $tl[] = $this->getDBsafe ( $t ) ; | |
$tlc = count ( $tl ) ; | |
$tl = '"' . implode ( '","' , $tl ) . '"' ; | |
if ( $this->templates_use_talk_no ) { | |
$sql = "DELETE FROM temp2 WHERE 0 < ( SELECT count(*) FROM $table_tl,$table_page WHERE MOD(pns,2)=0 AND tl_from=page_id AND page_title=ptitle AND page_namespace=pns+1 AND tl_title IN ( $tl ) )" ; | |
} else { | |
$sql = "DELETE FROM temp2 WHERE 0 < ( SELECT count(*) FROM $table_tl WHERE tl_from=pid AND tl_title IN ( $tl ) )" ; | |
} | |
$this->query ( $this->db2temp , $sql ) ; | |
} | |
// Call redlinks | |
if ( $this->show_redlinks ) { | |
$this->check_redlinks ( 'temp2' , $type ) ; | |
if ( $this->show_redlinks_only ) { | |
return array () ; | |
} | |
} | |
// Retrieve results | |
$sql = "SELECT * FROM temp2" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
$tmp = array () ; | |
while ( $o = mysql_fetch_object ( $res ) ) { | |
if ( isset ( $tmp[$o->pid] ) ) continue ; // Had that page already | |
if ( count ( $tmp ) > $this->max_temp_objects ) $this->iDie ( "Maximum potential result objects exceeded, aborting" ) ; | |
$tc = count ( $tmp ) ; | |
$tmp[$o->pid] = $tc ; | |
$n = array ( 'n' => 'page' , 'a' => array ( | |
'title' => $o->ptitle , | |
'id' => $o->pid , | |
'namespace' => $o->pns , | |
'len' => $o->plen , | |
'touched' => $o->ptouched , | |
'seen' => $o->groupcount , | |
) ) ; | |
$n['a']['nstext'] = $this->namespaces[$o->pns] ; | |
if ( '' <> $o->linkcount ) $n['a']['links'] = $o->linkcount ; | |
$ret['*'][$tc] = $n ; | |
} | |
$ret['a']['type'] = $type ; | |
if ( $type == 'atleast' ) $ret['a']['atleast'] = $this->atleast_count ; | |
if ( $this->ext_image_data ) { | |
$x = array ( 'img_size' , 'img_width' , 'img_height' , 'img_media_type' , 'img_major_mime' , 'img_minor_mime' , 'img_user_text' , 'img_timestamp' , 'img_sha1' ) ; | |
$qx = implode ( ',' , $x ) ; | |
$res = $this->query ( $this->db2temp , "SELECT pid,$qx FROM temp2,{$this->db}.image WHERE pns=6 AND img_name=ptitle" ) ; | |
while ( $o = mysql_fetch_object ( $res ) ) { | |
$id = $tmp[$o->pid] ; | |
foreach ( $x AS $y ) { | |
$ret['*'][$id]['a'][$y] = $o->$y ; | |
} | |
} | |
} | |
// Drop table | |
$sql = "DROP TABLE temp2" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
$ret['a']['count'] = count ( $ret['*'] ) ; | |
return $ret ; | |
} | |
function check_redlinks ( $table , $type ) { | |
# if ( !isset ( $this->redlinks_out ) ) { | |
# $this->redlinks_out = array () ; | |
# } | |
$pagelinks = "{$this->db}.pagelinks" ; | |
$page = "{$this->db}.page" ; | |
$sql = "CREATE TEMPORARY TABLE temp_redlinks ( ptitle VARCHAR (255) BINARY , namespace INTEGER , linkcount INTEGER , INDEX i1 ( ptitle , namespace ) ) {$this->use_db_memory}" ; | |
$this->query ( $this->db2temp , $sql ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ; | |
$sql = "INSERT INTO temp_redlinks ( ptitle , namespace , linkcount ) SELECT pl_title,pl_namespace,COUNT(pl_from) AS cnt FROM $pagelinks,$table WHERE pl_from=pid GROUP BY pl_title,pl_namespace HAVING cnt>={$this->min_redlink_count}" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
// Remove all non-article namespace links (optional) | |
if ( $this->article_redlinks_only ) { | |
$sql = "DELETE FROM temp_redlinks WHERE namespace<>0" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
} | |
// Remove all "redlinks" that exists | |
$sql = "DELETE FROM temp_redlinks WHERE EXISTS ( SELECT * FROM $page WHERE page_title=temp_redlinks.ptitle AND page_namespace=temp_redlinks.namespace )" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
// Remove template redlinks (optional) | |
if ( $this->remove_template_redlinks ) { | |
$sql = "DELETE FROM temp_redlinks WHERE EXISTS ( SELECT * FROM $page,$pagelinks WHERE pl_from=page_id AND page_namespace=10 AND pl_title=temp_redlinks.ptitle AND pl_namespace=temp_redlinks.namespace )" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
} | |
$type = "redlinks.$type" ; | |
$rl = array ( 'n' => "$type" , '*' => array() , 'a' => array ( 'type' => $type ) ) ; | |
$sql = "SELECT * FROM temp_redlinks" ; | |
$res = $this->query ( $this->db2temp , $sql ) ; | |
$spec1 = 'Special:' ; | |
$spec2 = $spec1 ; | |
if ( isset ( $this->loc['special_namespace'] ) ) $spec2 = $this->loc['special_namespace'] . ':' ; | |
while ( $o = mysql_fetch_object ( $res ) ) { | |
$t = mb_ucfirst ( $o->ptitle ) ; | |
if ( substr ( $t , 0 , strlen ( $spec1 ) ) == $spec1 ) continue ; | |
if ( substr ( $t , 0 , strlen ( $spec2 ) ) == $spec2 ) continue ; | |
# print $o->ptitle . " : " . $o->linkcount . " : " . $o->namespace . "<br/>\n" ; | |
$n = array ( 'n' => 'page' , 'a' => array ( | |
'title' => $o->ptitle , | |
'links' => $o->linkcount , | |
'namespace' => $o->namespace , | |
'nstext' => $this->namespaces[$o->namespace] , | |
# 'id' => $o->pid , | |
# 'len' => $o->plen , | |
# 'touched' => $o->ptouched , | |
# 'seen' => $o->groupcount , | |
) ) ; | |
$rl['*'][] = $n ; | |
} | |
$rl['a']['count'] = count ( $rl['*'] ) ; | |
$this->result['*'][] = $rl ; | |
$sql = "DROP TABLE temp_redlinks" ; | |
$this->query ( $this->db2temp , $sql ) ; | |
} | |
function checkedFlaggedRevisions () { | |
// $sql = 'SELECT * FROM flaggedrevs LIMIT 1' ; | |
$sql = "SELECT count(*) AS cnt FROM information_schema.tables WHERE table_schema='{$this->db}' AND table_name='flaggedrevs'" ; | |
$res = $this->query ( $this->db , $sql ) ; | |
// if ( mysql_errno() != 0 ) return ; | |
$o = mysql_fetch_object ( $res ) ; | |
if ( $o->cnt == 1 ) $this->has_flagged_revisions = true ; | |
} | |
function addSubcats ( $ttname , $category , $depth , $group ) { | |
$c = $this->getDBsafe ( $category ) ; | |
$this->query ( $this->db2temp , "CREATE TEMPORARY TABLE cat2 ( catname VARCHAR(255) PRIMARY KEY ) {$this->use_db_memory}" ) ; | |
// Seed | |
$this->query ( $this->db2temp , "INSERT IGNORE INTO cat2 ( catname ) VALUES ( \"$c\" )" ) ; | |
$this->query ( $this->db2temp , "INSERT IGNORE INTO $ttname ( catgroup , catname , depth ) VALUES ( $group , \"$c\" , 0 )" ) ; | |
// Iterate | |
$db = $this->db ; | |
for ( $d = 1 ; $d <= $depth ; $d++ ) { | |
$ld = $d - 1 ; | |
$sql = "INSERT IGNORE INTO $ttname ( catgroup , catname , depth ) SELECT DISTINCT $group,page_title,$d FROM $db.page,$db.categorylinks,cat2 WHERE page_id=cl_from AND cl_to=catname AND page_namespace=14" ; | |
// print "$sql<br/>" ; | |
$this->query ( $this->db2temp , $sql ) ; | |
$this->query ( $this->db2temp , "INSERT IGNORE INTO $ttname ( catgroup , catname , depth ) SELECT $group , catname , $ld FROM cat2" ) ; | |
if ( $d == $depth ) break ; | |
$this->query ( $this->db2temp , "DELETE FROM cat2" ) ; | |
$this->query ( $this->db2temp , "INSERT INTO cat2 ( catname ) SELECT catname FROM $ttname WHERE depth=$d AND catgroup=$group" ) ; | |
} | |
$this->query ( $this->db2temp , "DROP TABLE cat2" ) ; | |
} | |
function query ( $db , $sql ) { | |
if ( $db != $this->last_mysql_db ) { | |
mysql_select_db ( $db ) ; | |
$this->last_mysql_db = $db ; | |
} | |
$ret = mysql_query ( $sql , $this->mysql_con ) ; | |
if ( mysql_errno() != 0 ) $this->iDie ( "MYSQL error : " . mysql_error() . " [$sql]" ) ; | |
return $ret ; | |
} | |
function getCatLists () { | |
$ret = array ( 'n' => 'categories' , '*' => array() , 'a' => array () ) ; | |
foreach ( $this->count2cat AS $group => $cat ) { | |
$n = array ( 'n' => 'list' , '*' => array() , 'a' => array () ) ; | |
$res = $this->query ( $this->db2temp , "SELECT catname FROM cat1 WHERE catgroup=$group" ) ; | |
while ( $o = mysql_fetch_object ( $res ) ) { | |
$n['*'][] = array ( 'n' => 'c' , 'a' => array ( 'name' => $o->catname ) ) ; | |
} | |
$n['a']['root'] = $cat ; | |
$n['a']['depth'] = $this->cat_depth[$cat] ; | |
$n['a']['count'] = count ( $n['*'] ) ; | |
$ret['*'][] = $n ; | |
} | |
$ret['a']['count'] = count ( $ret['*'] ) ; | |
return $ret ; | |
} | |
function createTemporaryTable ( $ttname , $bc ) { | |
$this->query ( $this->db2temp , "CREATE TEMPORARY TABLE $ttname ( catgroup INTEGER , catname VARCHAR(255) , depth INTEGER , PRIMARY KEY ca ( catgroup , catname ) , INDEX cb ( catgroup , depth ) ) {$this->use_db_memory}" ) ; | |
$cnt = 1 ; | |
if ( $ttname == 'cat1' ) $this->catgroups = array () ; | |
foreach ( $bc AS $k => $cat ) { | |
$c = explode ( '|' , $cat , 2 ) ; | |
if ( count ( $c ) > 1 ) $depth = array_pop ( $c ) ; | |
else $depth = $this->depth ; | |
$cat = array_shift ( $c ) ; | |
$bc[$k] = $cat ; // Removing "|n" | |
if ( $ttname == 'cat1' ) { | |
$this->cat_depth[$cat] = $depth ; | |
$this->catgroups[$cat] = $cnt ; | |
$this->count2cat[$cnt] = $cat ; | |
} | |
$this->addSubcats ( $ttname , $cat , $depth , $cnt ) ; | |
$cnt++ ; | |
} | |
return $bc ; | |
} | |
function runQuery () { | |
$starttime = $this->getTime() ; | |
$this->initializeDBHandle () ; | |
$this->checkedFlaggedRevisions () ; | |
$debug = 0 ; | |
if ( $debug ) { $n = $this->getTime() - $starttime ; print "Start:$n<br/>\n" ; } | |
$this->basecats = $this->createTemporaryTable ( 'cat1' , $this->basecats ) ; | |
$res = $this->query ( $this->db2temp , "SELECT count(*) AS c FROM cat1" ) ; | |
$o = mysql_fetch_object ( $res ) ; | |
$total_subcats = $o->c ; | |
if ( $debug ) { $n = $this->getTime() - $starttime - $n ; print "Subcats:$n<br/>\n" ; } | |
$this->result = array ( 'n' => 'result' , 'a' => array() , '*' => array() ) ; | |
$use_master_table = false ; | |
$force_master = array ( 'subset' , 'union' , 'diff' , 'atleast' ) ; | |
foreach ( $force_master AS $m ) { | |
if ( isset ( $this->comb[$m] ) ) $use_master_table = true ; | |
} | |
if ( $use_master_table ) { | |
$this->createTemporaryMasterTable () ; | |
if ( count ( $this->negcats ) > 0 ) { | |
$this->negcats = $this->createTemporaryTable ( 'cat1_neg' , $this->negcats ) ; | |
$this->filterMasterTableNegative ( 'temp' ) ; | |
$this->query ( $this->db2temp , "DROP TABLE cat1_neg" ) ; | |
} | |
} | |
if ( $debug ) { $n = $this->getTime() - $starttime - $n ; print "Master table:$n<br/>\n" ; } | |
if ( isset ( $this->comb['list'] ) ) $this->result['*'][] = $this->getCatLists () ; | |
if ( isset ( $this->comb['subset'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'subset' ) ; | |
if ( isset ( $this->comb['union'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'union' ) ; | |
if ( isset ( $this->comb['diff'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'diff' ) ; | |
if ( isset ( $this->comb['atleast'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'atleast' ) ; | |
// Clean up the tables; should happen automaticcally, but PARANOIA! | |
$this->query ( $this->db2temp , "DROP TABLE cat1" ) ; | |
if ( $use_master_table ) $this->query ( $this->db2temp , "DROP TABLE temp" ) ; | |
if ( $debug ) { $n = $this->getTime() - $starttime - $n ; print "Combinations:$n<br/>\n" ; } | |
$endtime = $this->getTime() ; | |
$difftime = $endtime - $starttime ; | |
$this->result['a']['querytime_sec'] = $difftime ; | |
$this->result['a']['total_categories_searched'] = $total_subcats ; | |
$this->result['a']['query_url'] = $this->url_get() ; | |
} | |
function printHTML () { | |
$script = array_pop ( explode ( '/' , $_SERVER["SCRIPT_NAME"] ) ) ; | |
$uil = mb_strtoupper($this->interface_language) ; | |
$lil = mb_strtolower($this->interface_language) ; | |
header('Content-type: text/html; charset=utf-8'); | |
print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//'.$uil.'" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' . "\n\n" ; | |
print '<html><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"><body>' ; | |
print "<table style='background-color:#BAD0EF'><tr><td rowspan='2' nowrap><h1 style='margin-top:0px;margin-bottom:0px;padding-bottom:0px;padding-right:5px'>" . $this->loc['toolname'] . '</h1></td>' ; | |
print "<td valign='bottom' width='100%'><small>{$this->loc['subtitle']}, <em>modified by <a href='http://toolserver.org/~whym'>whym</a></em></small>" ; | |
print " [<a href='http://meta.wikipedia.org/wiki/CatScan2/{$lil}'><small>{$this->loc['manual']}</small></a>]" ; | |
print " [<a href='{$this->i18n_url}#{$uil}'><small>{$this->loc['interface_text']}</small></a>]</td></tr><tr><td>" ; | |
print "{$this->loc['interface_language']} : " ; | |
foreach ( $this->available_interface_languages AS $l ) { | |
if ( $l == $this->interface_language ) print " [<small><b>" . mb_strtoupper ( $l ) . "</b></small></a>]" ; | |
else print " [<a href='$script?interface_language=$l'><small>" . mb_strtoupper ( $l ) . "</small></a>]" ; | |
} | |
print "</td></table>" ; | |
$cats = implode ( "\n" , $this->basecats_orig ) ; | |
$ncats = implode ( "\n" , $this->negcats_orig ) ; | |
// Show the entry form | |
// print "<i>[TBI] = To Be Implemented</i>" ; | |
print '<form method="post" action="./' . $script . '">' ; | |
print '<table border="1">' ; | |
print "<tr><th>" . $this->loc['language'] . "</th>" ; | |
print "<td><input type='text' name='language' value='{$this->language}' /></td></tr>" ; | |
print "<tr><th>" . $this->loc['project'] . "</th>" ; | |
print "<td><input type='text' name='project' value='{$this->project}' /></td></tr>" ; | |
print "<tr><th>" . $this->loc['depth'] . "</th>" ; | |
print "<td><input type='text' name='depth' value='{$this->depth}' />{$this->loc['depth_explanation']}</td></tr>" ; | |
print "<tr><th>" . $this->loc['categories'] . "</th>" ; | |
print "<td><textarea name='categories' rows='3' cols='80'>{$cats}</textarea></td></tr>" ; | |
print "<tr><th>" . $this->loc['negative_categories'] . "</th>" ; | |
print "<td><textarea name='negcats' rows='2' cols='80'>{$ncats}</textarea></td></tr>" ; | |
$combs = array ( 'list' , 'subset' , 'union' , 'diff' , 'atleast' ) ; | |
print "<tr><th>" . $this->loc['combination'] . "</th><td>" ; | |
foreach ( $combs AS $c ) { | |
$checked = isset ( $this->comb[$c] ) ? 'checked' : '' ; | |
$name = $this->loc["comb_$c"] ; | |
print "<input type='checkbox' name='comb[$c]' value='1' id='comb_$c' $checked /><label for='comb_$c'>$name</label>" ; | |
} | |
print " <input type='text' size='3' name='atleast_count' value='{$this->atleast_count}' />" ; | |
print "</td></tr>" ; | |
print "<tr><th>" . $this->loc['namespaces'] . "</th><td>" ; | |
$ne = '' ; | |
$no = '' ; | |
foreach ( $this->namespaces AS $num=> $name ) { | |
$checked = isset ( $this->ns[$num] ) ? 'checked' : '' ; | |
$s = "<td><input type='checkbox' name='ns[$num]' id='ns_$num' value='1' $checked /><label for='ns_$num'>$name</label></td>" ; | |
if ( $num % 2 == 0 ) $ne .= $s ; | |
else $no .= $s ; | |
} | |
print "<table><tr>$ne</tr></tr>$no</tr></table>" ; | |
print '</td></tr>' ; | |
print "<tr><th>" . $this->loc['show_redirects'] . "</th><td>" ; | |
print "<table><tr>" ; | |
$c1 = $this->show_redirects == 'both' ? 'checked' : '' ; | |
$c2 = $this->show_redirects == 'yes' ? 'checked' : '' ; | |
$c3 = $this->show_redirects == 'no' ? 'checked' : '' ; | |
print "<td><input type='radio' name='show_redirects' value='both' id='show_redirects_both' $c1 /><label for='show_redirects_both'>{$this->loc['edits_both']}</label></td>" ; | |
print "<td><input type='radio' name='show_redirects' value='yes' id='show_redirects_yes' $c2 /><label for='show_redirects_yes'>{$this->loc['edits_yes']}</label></td>" ; | |
print "<td><input type='radio' name='show_redirects' value='no' id='show_redirects_no' $c3 /><label for='show_redirects_no'>{$this->loc['edits_no']}</label></td>" ; | |
print "</tr></table>" ; | |
print '</td></tr>' ; | |
print "<tr><th>" . $this->loc['templates'] . "</th><td>" ; | |
print "<table><tr>" ; | |
$first = true ; | |
foreach ( $this->templates AS $t ) { | |
$t2 = array_pop ( explode ( '_' , $t ) ) ; | |
$n = "templates_use_talk_$t2" ; | |
$tut_checked = $this->$n ? 'checked' : '' ; | |
if ( $first ) $b = '' ; | |
else $b = 'style="border-left:1px solid black; padding-left:2px"' ; | |
print "<td $b>{$this->loc[$t]}<br/><textarea name='$t' cols='30' rows='4'>" . implode ( "\n" , $this->$t ) . "</textarea>" ; | |
print "<br/><input type='checkbox' name='templates_use_talk_$t2' id='templates_use_talk_$t2' value='1' $tut_checked />" ; | |
print "<label for='templates_use_talk_$t2'>{$this->loc['templates_use_talk_instead']}</label>" ; | |
print "</td>" ; | |
$first = false ; | |
} | |
print "</tr></table>" ; | |
print '</td></tr>' ; | |
$edits = array ( 'bots' , 'anons' , 'flagged' ) ; | |
print "<tr><th>" . $this->loc['edits'] . "</th><td>" ; | |
print "<table>" ; | |
foreach ( $edits AS $e ) { | |
$c1 = $this->edits[$e] == 'both' ? 'checked' : '' ; | |
$c2 = $this->edits[$e] == 'yes' ? 'checked' : '' ; | |
$c3 = $this->edits[$e] == 'no' ? 'checked' : '' ; | |
print "<tr><td>{$this->loc["edit_$e"]}</td>" ; | |
print "<td><input type='radio' name='edits[$e]' value='both' id='edits_both_$e' $c1 /><label for='edits_both_$e'>{$this->loc['edits_both']}</label></td>" ; | |
print "<td><input type='radio' name='edits[$e]' value='yes' id='edits_yes_$e' $c2 /><label for='edits_yes_$e'>{$this->loc['edits_yes']}</label></td>" ; | |
print "<td><input type='radio' name='edits[$e]' value='no' id='edits_no_$e' $c3 /><label for='edits_no_$e'>{$this->loc['edits_no']}</label></td>" ; | |
print "</tr>" ; | |
} | |
print "</table>" ; | |
print '</td></tr>' ; | |
print "<tr><th>" . $this->loc['last_change'] . "</th><td>" ; | |
print "<table>" ; | |
print "<tr><td>{$this->loc['before']}</td><td><input type='text' name='before' value='{$this->before}' /></td><td rowspan='2'>{$this->loc['date_format']}</td></tr>" ; | |
print "<tr><td>{$this->loc['after']}</td><td><input type='text' name='after' value='{$this->after}' /></td></tr>" ; | |
print "</table>" ; | |
print '</td></tr>' ; | |
print "<tr><th>" . "first edit date" . "</th><td>" ; | |
print "<table>" ; | |
print "<tr><td>{$this->loc['before']}</td><td><input type='text' name='fbefore' value='{$this->fbefore}' /></td><td rowspan='2'>{$this->loc['date_format']}</td></tr>" ; | |
print "<tr><td>{$this->loc['after']}</td><td><input type='text' name='fafter' value='{$this->fafter}' /></td></tr>" ; | |
print "</table>" ; | |
print '</td></tr>' ; | |
print "<tr><th>" . $this->loc['size'] . "</th><td>" ; | |
print "<table>" ; | |
print "<tr><td>{$this->loc['larger']}</td><td><input type='text' name='larger' value='{$this->larger}' /></td><td rowspan='2'>{$this->loc['size_explanation']}</td></tr>" ; | |
print "<tr><td>{$this->loc['smaller']}</td><td><input type='text' name='smaller' value='{$this->smaller}' /></td></tr>" ; | |
print "</table>" ; | |
print '</td></tr>' ; | |
print "<tr><th>" . $this->loc['link_number'] . "</th><td>" ; | |
print "<table>" ; | |
print "<tr><td>{$this->loc['maxlinks']}</td><td><input type='text' name='maxlinks' value='{$this->maxlinks}' /></td><td rowspan='2'>{$this->loc['links_explanation']}</td></tr>" ; | |
print "<tr><td>{$this->loc['minlinks']}</td><td><input type='text' name='minlinks' value='{$this->minlinks}' /></td></tr>" ; | |
print "</table>" ; | |
print '</td></tr>' ; | |
$sred = $this->show_redlinks ? 'checked' : '' ; | |
$sredo = $this->show_redlinks_only ? 'checked' : '' ; | |
$rtr = $this->remove_template_redlinks ? 'checked' : '' ; | |
$aro = $this->article_redlinks_only ? 'checked' : '' ; | |
print "<tr><th>" . $this->loc['redlinks'] . "</th><td>" ; | |
print "<table><tr>" ; | |
print "<td><input type='checkbox' name='show_redlinks' value='1' id='show_redlinks' $sred /><label for='show_redlinks'>" . $this->loc['show_redlinks'] . "</label></td>" ; | |
print "<td><input type='checkbox' name='show_redlinks_only' value='1' id='show_redlinks_only' $sredo /><label for='show_redlinks_only'>" . $this->loc['show_redlinks_only'] . "</label></td>" ; | |
print "<td><input type='checkbox' name='remove_template_redlinks' value='1' id='remove_template_redlinks' $rtr /><label for='remove_template_redlinks'>" . $this->loc['remove_template_redlinks'] . "</label></td>" ; | |
print "<td><input type='checkbox' name='article_redlinks_only' value='1' id='article_redlinks_only' $aro /><label for='article_redlinks_only'>" . $this->loc['article_redlinks_only'] . "</label></td>" ; | |
print "</tr><tr>" ; | |
print "<td colspan='2'>{$this->loc['min_redlink_count']} <input type='text' size='3' name='min_redlink_count' value='{$this->min_redlink_count}' /></td>" ; | |
print "</tr></table>" ; | |
print '</td></tr>' ; | |
$sa = $this->sort_order == 'ascending' ? 'checked' : '' ; | |
$sd = $this->sort_order == 'descending' ? 'checked' : '' ; | |
print "<tr><th>" . $this->loc['sort'] . "</th><td>" ; | |
foreach ( $this->sort_modes AS $sm ) { | |
if ( $this->sort_by == $sm ) $chk = 'checked' ; | |
else $chk = '' ; | |
if ( !isset ( $this->loc['sort_by_'.$sm] ) ) $txt = "<i>untranslated</i> sort_by_$sm" ; | |
else $txt = $this->loc['sort_by_'.$sm] ; | |
print "<input type='radio' name='sortby' value='$sm' id='sortby_$sm' $chk /><label for='sortby_$sm'>" . $txt . "</label>" ; | |
} | |
print ' ' ; | |
print "<input type='radio' name='sortorder' value='ascending' id='sort_ascending' $sa /><label for='sort_ascending'>" . $this->loc['sort_ascending'] . "</label>" ; | |
print "<input type='radio' name='sortorder' value='descending' id='sort_descending' $sd /><label for='sort_descending'>" . $this->loc['sort_descending'] . "</label>" ; | |
print '</td></tr>' ; | |
print "<tr><th>" . $this->loc['format'] . "</th><td>" ; | |
foreach ( $this->formats AS $k => $v ) { | |
$checked = $this->format == $k ? 'checked' : '' ; | |
print "<input type='radio' name='format' value='$k' id='format_$k' $checked /><label for='format_$k'>$v</label>" ; | |
} | |
$eid = $this->ext_image_data ? 'checked' : '' ; | |
print "<br/><input type='checkbox' name='ext_image_data' id='ext_image_data' value='1' $eid><label for='ext_image_data'>{$this->loc['ext_image_data']}</label>" ; | |
print '</td></tr>' ; | |
print "<tr><th></th><td><input type='submit' name='doit' value='{$this->loc['doit']}' /></td></tr>" ; | |
if ( isset ( $_REQUEST['doit'] ) ) { | |
$url = $this->url_get() ; | |
$s = $this->loc['query_url'] ; | |
$s = str_replace ( '$1' , "$url&doit=1" , $s ) ; | |
$s = str_replace ( '$2' , $url , $s ) ; | |
print "<tr><th>" . $this->loc['th_query_url'] ; | |
print "</th><td>$s</td></tr>" ; | |
} | |
print '</table>' ; | |
print "<input type='hidden' name='interface_language' value='{$this->interface_language}' />" ; | |
print '</form>' ; | |
$this->prettyOutput () ; | |
print '</body></html>' ; | |
} | |
function url_get () { | |
$url = 'http://' . $_SERVER["SERVER_NAME"] . $_SERVER["SCRIPT_NAME"] . "?" ; | |
$o = array () ; | |
foreach ( $_REQUEST AS $k => $v ) { | |
if ( substr ( $k , 0 , 1 ) == '_' ) continue ; | |
if ( $k == 'doit' ) continue ; | |
if ( $k == 'interface_language' and $v == 'en' ) continue ; | |
if ( $k == 'language' and $v == $this->interface_language ) continue ; | |
if ( $k == 'ext_image_data' and !isset($this->ns[6]) ) continue ; | |
if ( isset ( $this->default_parameter[$k] ) and $v == $this->default_parameter[$k] ) continue ; | |
if ( is_array ( $v ) ) { | |
if ( count ( $v ) == 0 ) continue ; | |
if ( $k == 'ns' and $v == array ( 0 => 1 ) ) continue ; | |
if ( $k == 'comb' and $v == array ( 'subset' => 1 ) ) continue ; | |
$w = array () ; | |
foreach ( $v AS $a => $b ) { | |
$w[] = $k . '%5B' . $a . '%5D=' . urlencode ( $b ) ; | |
} | |
$v = implode ( '&' , $w ) ; | |
$o[] = $v ; | |
} else { | |
$v = urlencode ( trim ( $v ) ) ; | |
if ( $v == '' ) continue ; | |
$o[] = $k . "=" . $v ; | |
} | |
} | |
$url .= implode ( '&' , $o ) ; | |
return $url ; | |
} | |
function makeCSV ( $s ) { | |
return str_replace ( '"' , '\\"' , $s ) ; | |
} | |
// Handles result output for HTML, CSV, TSV, Wiki | |
function prettyOutput () { | |
if ( !isset ( $this->result ) ) return ; | |
$type = $this->format ; | |
if ( $type == 'csv' or $type == 'tsv' or $type == 'wiki' ) | |
header('Content-type: text/plain; charset=utf-8'); | |
if ( $type == 'html' ) print "<h1><a name='results'>{$this->loc['results']}</a></h1>" ; | |
$baseurl = "http://{$this->language}.{$this->project}.org/wiki" ; | |
if ( $type == 'csv' ) $sep = '","' ; | |
else if ( $type == 'tsv' ) $sep = "\t" ; | |
else $sep = '' ; | |
foreach ( $this->result['*'] AS $c ) { | |
if ( count ( $c ) == 0 ) continue ; // Do not show result | |
$tag = $c['n'] ; | |
// TAG : categories | |
if ( $tag == 'categories' ) { | |
if ( $type == 'html' ) print "<h2>{$this->loc['comb_list']}</h2>" ; | |
else if ( $type == 'wiki' ) print "== {$this->loc['comb_list']} ==\n" ; | |
foreach ( $c['*'] AS $roots ) { | |
$root = $roots['a']['root'] ; | |
$depth = $roots['a']['depth'] ; | |
$dt = " ({$this->loc['depth']} $depth)" ; | |
if ( $type == 'html' ) print "<h3>{$root}{$dt}</h3><ol>" ; | |
else if ( $type == 'wiki' ) print "\n=== {$root}{$dt} ===\n" ; | |
else if ( $type == 'csv' ) print '"|categories' . $sep . $this->makeCSV ( $root ) . $sep . $depth . "\"\n" ; | |
else print "|$tag$sep$root$sep$depth\n" ; | |
foreach ( $roots['*'] AS $x ) { | |
$cat = $x['a']['name'] ; | |
if ( $type == 'html' ) print "<li><a target='_blank' href='http://{$this->language}.{$this->project}.org/wiki/Category:$cat'>$cat</a></li>" ; | |
else if ( $type == 'wiki' ) print "# $cat\n" ; | |
else if ( $type == 'csv' ) print '"' . $this->makeCSV ( $cat ) . "\"\n" ; | |
else print "$cat\n" ; | |
} | |
if ( $type == 'html' ) print "</ol>" ; | |
} | |
continue ; | |
} | |
// TAG : combination | |
if ( isset ( $this->loc['comb_'.$c['a']['type']] ) ) $ty = $this->loc['comb_'.$c['a']['type']] ; | |
else $ty = $c['a']['type'] ; | |
if ( $c['a']['type'] == 'atleast' ) $ty .= " (" . $c['a']['atleast'] . ")" ; | |
if ( $type == 'html' ) { | |
print "<h2>$ty</h2>" ; | |
print "<table border='1'><tbody>" ; | |
} else if ( $type == 'wiki' ) { | |
print "== $ty ==\n" ; | |
print "{| border='1'\n" ; | |
} | |
// Find headers | |
$headers = array () ; | |
foreach ( $c['*'] AS $p ) { | |
foreach ( $p['a'] AS $a => $v ) { | |
$headers[$a] = $a ; | |
} | |
} | |
if ( $type == 'html' and isset ( $headers['nstext'] ) ) unset ( $headers['nstext'] ) ; | |
if ( $type == 'html' ) { | |
print "<tr>" ; | |
foreach ( $headers AS $h ) { | |
$h2 = $h ; | |
if ( isset ( $this->loc['h_'.$h] ) ) $h2 = $this->loc['h_'.$h] ; | |
print "<th>$h2</th>" ; | |
} | |
print "</tr>" ; | |
} else if ( $type == 'wiki' ) { | |
$ho = array () ; | |
foreach ( $headers AS $h ) { | |
$h2 = $h ; | |
if ( isset ( $this->loc['h_'.$h] ) ) $h2 = $this->loc['h_'.$h] ; | |
$ho[] = $h2 ; | |
} | |
print "!" . implode ( " !! " , $ho ) . "\n|-\n" ; | |
} else { | |
print '|' . $c['a']['type'] . "\n" ; | |
$ho = array () ; | |
foreach ( $headers AS $h ) { | |
$h2 = $h ; | |
if ( isset ( $this->loc['h_'.$h] ) ) $h2 = $this->loc['h_'.$h] ; | |
if ( $type == 'tsv' or $type == 'csv' ) $h2 = $h ; | |
$ho[] = $h2 ; | |
} | |
if ( $type == 'csv' ) print '"' ; | |
print implode ( $sep , $ho ) ; | |
if ( $type == 'csv' ) print '"' ; | |
print "\n" ; | |
} | |
// Show rows | |
foreach ( $c['*'] AS $p ) { | |
if ( $type == 'html' ) print '<tr>' ; | |
// else if ( $type == 'csv' ) print '"' . $c['a']['type'] ; | |
// else if ( $type == 'tsv' ) print $c['a']['type'] ; | |
$first_col = true ; | |
foreach ( $headers AS $h ) { | |
if ( isset ( $p['a'][$h] ) ) $o = $p['a'][$h] ; | |
else $o = '' ; | |
$pn = str_replace ( '_' , ' ' , $o ) ; | |
$pns = '' ; | |
$ppns = '' ; | |
if ( $h == 'title' and isset ( $p['a']['namespace'] ) and $p['a']['namespace'] != '0' ) { | |
if ( isset ( $this->namespaces[$p['a']['namespace']] ) ) { | |
if ( $this->language == 'commons' ) { | |
$pns = $this->namespaces_en[$p['a']['namespace']] ; | |
} else { | |
$pns = $this->namespaces[$p['a']['namespace']] ; | |
} | |
$pns .= ':' ; | |
} | |
if ( $p['a']['namespace'] == 6 and $type == 'wiki' ) $ppns = ':' ; | |
else if ( $p['a']['namespace'] == 14 and $type == 'wiki' ) $ppns = ':' ; | |
} | |
if ( $type == 'html' ) { | |
if ( $h == 'title' ) { | |
// $o2 = urlencode ( $o ) ; // Strange problem with "/" | |
$o2 = $o ; | |
$o2 = str_replace ( "'" , urlencode("'") , $o2 ) ; | |
print "<td><a target='_blank' href='$baseurl/$pns$o2'>$pn</a></td>" ; | |
} else if ( $h == 'namespace' ) { | |
print "<td>" ; | |
if ( $this->language == 'commons' ) print $this->namespaces_en[$o] ; | |
else print $this->namespaces[$o] ; | |
print "</td>" ; | |
} else print "<td>$o</td>" ; | |
} else if ( $type == 'wiki' ) { | |
if ( $first_col ) print "|" ; | |
else print " || " ; | |
if ( $h == 'title' ) { | |
print "[[$ppns$pns$pn]]" ; | |
} else print $o ; | |
} else if ( $type == 'csv' ) { | |
if ( $first_col ) print '"' ; | |
else print $sep ; | |
print $this->makeCSV ( $pns.$o ) ; | |
} else { | |
if ( !$first_col ) print "\t" ; | |
print $pns.$o ; | |
} | |
$first_col = false ; | |
} | |
if ( $type == 'html' ) print '</tr>' ; | |
else if ( $type == 'wiki' ) print "\n|-\n" ; | |
else if ( $type == 'csv' ) print "\"\n" ; | |
else print "\n" ; | |
} | |
if ( $type == 'html' ) { | |
print "<tbody><tfoot>" ; | |
$td_cols = count ( $headers ) - 1 ; | |
foreach ( $c['a'] AS $a => $v ) { | |
print "<tr><th>$a</th><td colspan='$td_cols'>$v</td></tr>" ; | |
} | |
print "</tfoot></table>" ; | |
} else if ( $type == 'wiki' ) { | |
print "|}\n" ; | |
} | |
} | |
if ( $type == 'html' ) { | |
print "<p>Query took " . $this->result['a']['querytime_sec'] . " seconds.</p>" ; | |
print "<p>Searched " . $this->result['a']['total_categories_searched'] . " categories.</p>" ; | |
} | |
if ( $type == 'wiki' && count ( $this->result['a'] ) > 0 ) { | |
print "----\n" ; | |
foreach ( $this->result['a'] AS $k => $v ) { | |
print ';' . $k . ':' . $v . "\n" ; | |
} | |
} | |
} | |
function printXMLsub ( &$d ) { | |
if ( !is_array ( $d ) ) { | |
print htmlspecialchars ( $d , ENT_COMPAT , 'UTF-8' ) ; | |
return ; | |
} | |
if ( !isset ( $d['n'] ) ) return ; | |
print '<' . $d['n'] ; | |
if ( isset ( $d['a'] ) ) { | |
foreach ( $d['a'] AS $a => $v ) { | |
print ' ' . $a . '="' ; | |
print htmlspecialchars ( $v , ENT_COMPAT , 'UTF-8' ) ; | |
print '"' ; | |
} | |
} | |
if ( isset ( $d['*'] ) and count ( $d['*'] ) > 0 ) { | |
print ">" ; | |
foreach ( $d['*'] AS $s ) { | |
$this->printXMLsub ( $s ) ; | |
} | |
print "</" . $d['n'] . ">" ; | |
} else print " />" ; | |
} | |
function printXML () { | |
header('Content-type: text/xml; charset=utf-8'); | |
$this->printXMLsub ( $this->result ) ; | |
} | |
function printPHP () { | |
header('Content-type: application/serialized_PHP_variable'); | |
print serialize ( $this->result ) ; | |
} | |
function printJSON () { | |
header('Content-type: application/x-json; charset=utf-8'); | |
print json_encode ( $this->result ) ; | |
} | |
function comparePages ( $p1 , $p2 ) { | |
$ret = 0 ; | |
if ( $this->sorting_redlinks ) { | |
if ( $p1['a']['links'] > $p2['a']['links'] ) $ret = -1 ; | |
else if ( $p1['a']['links'] < $p2['a']['links'] ) $ret = 1 ; | |
if ( $ret != 0 ) return $ret ; // Descending link count takes priority in redlinks | |
} | |
if ( $this->sort_by == 'title' ) { | |
if ( $p1['a']['title'] > $p2['a']['title'] ) $ret = 1 ; | |
else if ( $p1['a']['title'] < $p2['a']['title'] ) $ret = -1 ; | |
} else if ( $this->sort_by == 'ns_title' ) { | |
if ( (int) $p1['a']['namespace'] > (int) $p2['a']['namespace'] ) $ret = 1 ; | |
else if ( (int) $p1['a']['namespace'] < (int) $p2['a']['namespace'] ) $ret = -1 ; | |
else { | |
if ( $p1['a']['title'] > $p2['a']['title'] ) $ret = 1 ; | |
else if ( $p1['a']['title'] < $p2['a']['title'] ) $ret = -1 ; | |
} | |
} else if ( $this->sort_by == 'size' ) { | |
if ( (int) $p1['a']['len'] > (int) $p2['a']['len'] ) $ret = 1 ; | |
else if ( (int) $p1['a']['len'] < (int) $p2['a']['len'] ) $ret = -1 ; | |
} else if ( $this->sort_by == 'filesize' ) { | |
if ( (int) $p1['a']['img_size'] > (int) $p2['a']['img_size'] ) $ret = 1 ; | |
else if ( (int) $p1['a']['img_size'] < (int) $p2['a']['img_size'] ) $ret = -1 ; | |
} else if ( $this->sort_by == 'uploaddate' ) { | |
if ( $p1['a']['img_timestamp'] > $p2['a']['img_timestamp'] ) $ret = 1 ; | |
else if ( $p1['a']['img_timestamp'] < $p2['a']['img_timestamp'] ) $ret = -1 ; | |
} else if ( $this->sort_by == 'date' ) { | |
if ( $p1['a']['touched'] > $p2['a']['touched'] ) $ret = 1 ; | |
else if ( $p1['a']['touched'] < $p2['a']['touched'] ) $ret = -1 ; | |
} | |
// print "$ret" ; | |
if ( $this->sort_order == 'descending' ) return -$ret ; | |
return $ret ; | |
} | |
function doSort () { | |
if ( $this->sort_by == 'none' && !$this->show_redlinks ) return ; | |
if ( !isset ( $this->result ) ) return ; | |
if ( !isset ( $this->result['*'] ) ) return ; | |
foreach ( $this->result['*'] AS $k => $c ) { | |
if ( count ( $c ) == 0 ) continue ; // Empty, no sorting neccessary | |
$tag = $c['n'] ; | |
$this->sorting_redlinks = substr ( $tag , 0 , 9 ) == 'redlinks.' ; | |
if ( $tag == 'categories' ) { | |
continue ; | |
} | |
usort ( $this->result['*'][$k]['*'] , array($this,'comparePages') ) ; | |
} | |
} | |
function out ( $doit ) { | |
$this->doSort () ; | |
if ( !$doit ) { | |
$this->printHTML() ; | |
return ; | |
} | |
if ( $this->format == 'xml' ) $this->printXML() ; | |
else if ( $this->format == 'php' ) $this->printPHP() ; | |
else if ( $this->format == 'json' ) $this->printJSON() ; | |
else if ( $this->format == 'wiki' ) $this->prettyOutput() ; | |
else if ( $this->format == 'csv' ) $this->prettyOutput() ; | |
else if ( $this->format == 'tsv' ) $this->prettyOutput() ; | |
else $this->printHTML() ; | |
} | |
} | |
// MAIN PROGRAM | |
$catscan = new CatScan () ; | |
$doit = isset ( $_REQUEST['doit'] ) ; | |
if ( $doit ) $catscan->runQuery() ; | |
$catscan->out ( $doit ) ; | |
// THE END | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment