Skip to content

Instantly share code, notes, and snippets.

@whym
Created July 4, 2011 18:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save whym/1063741 to your computer and use it in GitHub Desktop.
Save whym/1063741 to your computer and use it in GitHub Desktop.
a category query tool for Wikimedia wikis, forked from CatScan 2.0β: https://fisheye.toolserver.org/browse/Magnus/catscan_rewrite.php?r=117
<?PHP
/*
Rewrite of CatScan for Wikimedia Deutschland
(c) 2009 by Magnus Manske
Released under GPL
*/
error_reporting(E_ERROR|E_CORE_ERROR|E_ALL|E_COMPILE_ERROR);
ini_set('display_errors', 'On');
ini_set ('memory_limit', 1024*1024*96); // 96 MB
ini_set('user_agent','CatScan 2 (Magnus)'); # Fake user agent
if (!function_exists('mb_ucfirst') && function_exists('mb_substr')) {
function mb_ucfirst($string) {
$string = mb_strtoupper(mb_substr($string, 0, 1)) . mb_substr($string, 1);
return $string;
}
}
if (!function_exists('mb_lcfirst') && function_exists('mb_substr')) {
function mb_lcfirst($string) {
$string = mb_strtolower(mb_substr($string, 0, 1)) . mb_substr($string, 1);
return $string;
}
}
class CatScan {
function CatScan ( $auto_fill = true ) {
$this->last_mysql_db = '' ;
$this->max_temp_objects = 30000 ;
$this->use_db_memory = 'STORAGE MEMORY' ; // Set to blank string for "normal" temporary table, or 'STORAGE MEMORY' for memory
$this->basecats = array () ;
$this->negcats = array () ;
$this->has_flagged_revisions = false ;
$this->depth = 0 ;
$this->interface_language = $this->getParameter ( 'interface_language' , 'en' ) ;
if ( $auto_fill ) $this->getParameters() ;
$this->i18n_url = 'http://meta.wikimedia.org/w/index.php?title=CatScan2/Interface' ;
$this->loadLocale () ;
$this->sql_comment = '/* catscan_rewrite */' ;
}
function loadLocale () {
// Load interface language from wiki page
$text = file_get_contents ( $this->i18n_url . "&action=raw" ) ;
$text = explode ( "\n" , $text ) ;
$loc = array () ;
$lang = '' ;
foreach ( $text AS $line ) {
$line = trim ( $line ) ;
if ( substr ( $line , 0 , 2 ) == '==' ) {
$lang = trim ( mb_strtolower ( str_replace ( '=' , '' , $line ) ) ) ;
$loc[$lang] = array () ;
} else if ( substr ( $line , 0 , 1 ) == ';' ) {
$line = trim ( substr ( $line , 1 ) ) ;
$l = explode ( ':' , $line , 2 ) ;
if ( count ( $l ) != 2 ) continue ;
$loc[$lang][trim(mb_strtolower($l[0]))] = $l[1] ;
}
}
$this->available_interface_languages = array_keys ( $loc ) ;
if ( isset ( $loc[$this->interface_language] ) ) $this->loc = $loc[$this->interface_language] ;
else $this->loc = $loc['en'] ; // Fallback
// Namespaces namespaces
$this->namespaces_en = array () ;
$this->namespaces = array () ;
$api_url = "http://{$this->language}.{$this->project}.org/w/api.php" ;
$url = "{$api_url}?action=query&meta=siteinfo&siprop=general|namespaces|namespacealiases|statistics&format=php" ;
$data = unserialize ( file_get_contents ( $url ) ) ;
$data = $data['query']['namespaces'] ;
// print "<pre>" ; print_r ( $data ) ; print "</pre>" ;
foreach ( $data AS $num => $d ) {
// print "<pre>" ; print_r ( $d ) ; print "</pre>" ;
unset ( $v ) ;
unset ( $vc ) ;
if ( isset ( $d['*'] ) ) $v = $d['*'] ;
if ( isset ( $d['canonical'] ) ) $vc = $d['canonical'] ;
if ( $num == 0 ) {
if ( !isset ( $v ) || $v == '' ) $v = $this->loc['namespace_0'] ;
if ( !isset ( $vc ) || $vc == '' ) $vc = $loc['en']['namespace_0'] ;
}
// print "$v / $vc<hr/>" ;
if ( isset ( $v ) ) $this->namespaces[$num] = $v ;
if ( isset ( $vc ) ) $this->namespaces_en[$num] = $vc ;
}
foreach ( $this->loc AS $k => $v ) { // Legacy cleanup
if ( substr ( $k , 0 , 10 ) != 'namespace_' ) continue ;
unset ( $this->loc[$k] ) ;
}
/*
foreach ( $loc['en'] AS $k => $v ) {
if ( substr ( $k , 0 , 10 ) != 'namespace_' ) continue ;
$nsn = substr ( $k , 10 ) ;
$this->namespaces_en[$nsn] = $v ;
// unset ( $this->loc[$k] ) ;
}
foreach ( $this->loc AS $k => $v ) {
if ( substr ( $k , 0 , 10 ) != 'namespace_' ) continue ;
$nsn = substr ( $k , 10 ) ;
$this->namespaces[$nsn] = $v ;
unset ( $this->loc[$k] ) ;
}
*/
$this->sort_modes = array ( 'none' , 'title' , 'ns_title' , 'size' , 'date' , 'filesize' , 'uploaddate' ) ;
$this->formats = array (
'html' => 'HTML' ,
'csv' => 'CSV' ,
'tsv' => 'TSV' ,
'wiki' => 'Wiki' ,
'php' => 'PHP' ,
'xml' => 'XML' ,
'json' => 'JSON' ,
) ;
// Initialize language
if ( $this->language == $this->interface_language ) {
if ( isset ( $loc[$this->interface_language] ) ) {
if ( isset ( $loc[$this->interface_language]['auto_lang'] ) ) {
$this->language = $loc[$this->interface_language]['auto_lang'] ;
}
}
}
}
function iDie ( $text ) {
if ( $this->format == 'xml' ) {
header('Content-type: text/xml; charset=utf-8');
print "<error>" ;
print htmlspecialchars ( $text ) ;
print "</error>" ;
} else if ( $this->format == 'wiki' or $this->format == 'csv' or $this->format == 'tsv' ) {
header('Content-type: text/plain; charset=utf-8');
print "ERROR : $text " ;
} else {
header('Content-type: text/html; charset=utf-8');
print $text ;
}
exit () ;
}
// ATTENTION! THIS NEEDS TO BE ADAPTED TO LOCATION!!!
function getDBpassword () {
$this->mysql_user = "whym" ;
$this->db2temp = "u_" . $this->mysql_user ; // Needs to exist on all servers!
if ( !file_exists ( "../.my.cnf" ) ) $this->iDie ( "Could not acquire DB password" ) ;
$t = file_get_contents ( "../.my.cnf" ) ;
$lines = explode ( "\n" , $t ) ;
foreach ( $lines AS $l ) {
$l = trim ( $l ) ;
if ( substr ( $l , 0 , 8 ) != 'password' ) continue ;
$l = explode ( '"' , $l ) ;
array_shift ( $l ) ;
$this->mysql_password = array_shift ( $l ) ;
}
}
function getDBname () {
$ret = $this->language ;
if ( $this->language == 'commons' ) $ret = 'commonswiki_p' ;
else if ( $this->project == 'wikipedia' ) $ret .= 'wiki_p' ;
else if ( $this->project == 'wikisource' ) $ret .= 'wikisource_p' ;
else if ( $this->project == 'wikibooks' ) $ret .= 'wikibooks_p' ;
else if ( $this->project == 'wikinews' ) $ret .= 'wikinews_p' ;
else if ( $this->project == 'wikiversity' ) $ret .= 'wikiversity_p' ;
else $this->iDie ( "Cannot construct database name for $language.$project - aborting." ) ;
return $ret ;
}
function getDBconnection () {
if ( isset ( $this->mysql_con ) ) return $this->mysql_con ; // Paranoia
if ( !isset ( $this->mysql_password ) ) $this->getDBpassword () ;
$project = $this->project ;
if ( $project == "wikipedia" ) $project = "wiki" ;
$nlang = str_replace ( 'classic' , 'classical' , $this->language ) ;
if ( $this->language == 'commons' ) $project = 'wiki' ;
$server = $nlang . $project . "-p.db.ts.wikimedia.org" ;
if ( !$this->mysql_con = @mysql_connect ( $server , $this->mysql_user , $this->mysql_password ) ) {
$this->iDie ( "Could not connect to $server : " . mysql_error() ) ;
}
}
function initializeDBHandle () {
$this->getDBconnection () ;
$this->db = $this->getDBname () ;
}
function getParameter ( $key , $default = '' ) {
if ( $default == '' and isset ( $this->default_parameter[$key] ) ) $default = $this->default_parameter[$key] ;
if ( !isset ( $_REQUEST[$key] ) ) return $default ;
$v = $_REQUEST[$key] ;
return $v ;
}
function getParameters () {
$this->templates = array ( 'templates_yes' , 'templates_any' , 'templates_no' ) ;
// Set default values
$this->default_parameter = array (
'project' => 'wikipedia' ,
'depth' => 0 ,
'format' => 'html' ,
'sortby' => 'none' ,
'sortorder' => 'ascending' ,
'atleast_count' => 0 ,
'min_redlink_count' => 1 ,
'show_redirects' => 'both' ,
'edits' => array ( 'bots' => 'both' , 'anons' => 'both' , 'flagged' => 'both' ) ,
) ;
// Get misc parameters
$this->language = $this->getParameter ( 'language' , $this->interface_language ) ;
$this->project = $this->getParameter ( 'project' ) ;
$this->depth = floor ( $this->getParameter ( 'depth' ) ) ;
$this->format = $this->getParameter ( 'format' ) ;
$this->before = $this->getParameter ( 'before' ) ;
$this->after = $this->getParameter ( 'after' ) ;
$this->fbefore = $this->getParameter ( 'fbefore' ) ;
$this->fafter = $this->getParameter ( 'fafter' ) ;
$this->larger = $this->getParameter ( 'larger' ) ;
$this->smaller = $this->getParameter ( 'smaller' ) ;
$this->minlinks = $this->getParameter ( 'minlinks' ) ;
$this->maxlinks = $this->getParameter ( 'maxlinks' ) ;
$this->sort_by = $this->getParameter ( 'sortby' ) ;
$this->sort_order = $this->getParameter ( 'sortorder' ) ;
$this->atleast_count = floor ( $this->getParameter ( 'atleast_count' ) ) ;
$this->ext_image_data = $this->getParameter ( 'ext_image_data' , !isset($_REQUEST['doit']) ) ;
$this->show_redlinks = isset ( $_REQUEST['show_redlinks'] ) ;
$this->show_redlinks_only = isset ( $_REQUEST['show_redlinks_only'] ) ;
$this->remove_template_redlinks = isset ( $_REQUEST['remove_template_redlinks'] ) ;
$this->article_redlinks_only = isset ( $_REQUEST['article_redlinks_only'] ) ;
$this->min_redlink_count = $this->getParameter ( 'min_redlink_count' ) ;
$this->show_redirects = $this->getParameter ( 'show_redirects' ) ;
$this->templates_use_talk_yes = isset ( $_REQUEST['templates_use_talk_yes'] ) ;
$this->templates_use_talk_any = isset ( $_REQUEST['templates_use_talk_any'] ) ;
$this->templates_use_talk_no = isset ( $_REQUEST['templates_use_talk_no'] ) ;
if ( $this->language == 'commons' ) $this->project = 'wikimedia' ;
foreach ( $this->templates AS $t ) {
$u = explode ( "\n" , trim ( $this->getParameter ( $t , '' ) ) ) ;
if ( count ( $u ) == 1 and $u[0] == '' ) $u = array () ;
$this->$t = $u ;
}
if ( isset ( $_REQUEST['ns'] ) ) $this->ns = $_REQUEST['ns'] ;
else $this->ns = array ( 0 => 1 ) ;
if ( isset ( $_REQUEST['comb'] ) ) $this->comb = $_REQUEST['comb'] ;
else $this->comb = array ( 'subset' => '1' ) ;
if ( isset ( $_REQUEST['edits'] ) ) $this->edits = $_REQUEST['edits'] ;
else $this->edits = $this->default_parameter['edits'] ;
foreach ( $this->default_parameter['edits'] AS $e ) {
if ( !isset ( $this->edits[$e] ) ) $this->edits[$e] = 'both' ;
}
// Get categories, either as multiline text or as category1/category2
$categories = $this->getParameter ( 'categories' , '' ) ;
$categories = explode ( "\n" , $categories ) ;
$categories[] = $this->getParameter ( 'category1' , '' ) ;
$categories[] = $this->getParameter ( 'category2' , '' ) ;
foreach ( $categories AS $c ) {
$c = trim ( $c ) ;
if ( $c == '' ) continue ;
$this->basecats[] = $c ;
}
// Negative categories
$negcats = $this->getParameter ( 'negcats' , '' ) ;
$negcats = explode ( "\n" , $negcats ) ;
foreach ( $negcats AS $c ) {
$c = trim ( $c ) ;
if ( $c == '' ) continue ;
$this->negcats[] = $c ;
}
$this->basecats_orig = $this->basecats ;
$this->negcats_orig = $this->negcats ;
}
function makeDBsafe ( &$title ) {
$title = mb_ucfirst ( trim ( $title ) ) ;
$title = str_replace ( ' ' , '_' , $title ) ;
$title = str_replace ( '"' , '\"' , $title ) ;
}
function getDBsafe ( $title ) {
$this->makeDBsafe ( $title ) ;
return $title ;
}
function getTime () {
$ret = microtime() ;
$ret = explode ( ' ' , $ret ) ;
return $ret[0] + $ret[1] ;
}
function filterMasterTableNegative ( $ttname ) {
// Filter for negative categories
if ( count ( $this->negcats ) == 0 ) return ;
$sql = "DELETE FROM $ttname WHERE pid IN ( SELECT DISTINCT cl_from FROM {$this->db}.categorylinks,cat1_neg WHERE cl_to=catname )" ;
$this->query ( $this->db2temp , $sql ) ;
}
function createTemporaryMasterTable () {
$sql = "CREATE TEMPORARY TABLE temp ( pid INTEGER , ptitle VARCHAR(255) , pns INTEGER , ptouched VARCHAR(14) , prev INTEGER , plen INTEGER , pgroup INTEGER ) {$this->use_db_memory}" ;
$this->query ( $this->db2temp , $sql ) ;
$where = array () ;
// Time filter
$additional_table_from = '' ;
if ( $this->before != '' or $this->after != '' ) {
if ( $this->before != '' ) $where[] = 'rev_timestamp <= "' . $this->getDBsafe ( $this->before ) . '"' ;
if ( $this->after != '' ) $where[] = 'rev_timestamp >= "' . $this->getDBsafe ( $this->after ) . '"' ;
$where[] = 'rev_id = page_latest' ;
$additional_table_from = ",{$this->db}.revision" ;
}
// Time filter for first edit
if ( $this->fbefore != '' or $this->fafter != '' ) {
if ( $this->fbefore != '' ) $where[] = 'rev_timestamp <= "' . $this->getDBsafe ( $this->fbefore ) . '"' ;
if ( $this->fafter != '' ) $where[] = 'rev_timestamp >= "' . $this->getDBsafe ( $this->fafter ) . '"' ;
$where[] = 'rev_timestamp = (SELECT min(r2.rev_timestamp) FROM ' . "{$this->db}.revision r2 " . 'WHERE r2.rev_page = page_id)' ;
$additional_table_from = ",{$this->db}.revision" ;
}
// Size filter
if ( $this->larger != '' ) $where[] = 'page_len >= ' . $this->getDBsafe ( $this->larger ) ;
if ( $this->smaller != '' ) $where[] = 'page_len <= ' . $this->getDBsafe ( $this->smaller ) ;
if ( $this->show_redirects == 'yes' ) $where[] = 'page_is_redirect=1' ;
if ( $this->show_redirects == 'no' ) $where[] = 'page_is_redirect=0' ;
if ( count ( $this->basecats ) == 0 ) {
$this->template_primary = true ;
$this->template_group_limit = 0 ;
$tl = array () ;
if ( count ( $this->templates_yes ) > 0 ) {
foreach ( $this->templates_yes AS $t ) $tl[] = $this->getDBsafe ( $t ) ;
$this->template_group_limit = count ( $tl ) ;
} else {
foreach ( $this->templates_any AS $t ) $tl[] = $this->getDBsafe ( $t ) ;
}
$where[] = "page_id=tl_from" ;
$where[] = 'page_namespace IN (' . implode ( ',' , array_keys ( $this->ns ) ) . ')' ;
$where = implode ( " AND " , $where ) ;
$groupcnt = 0 ;
foreach ( $tl AS $t ) {
$sql = "INSERT INTO temp ( pid,ptitle,pns,ptouched,prev,plen,pgroup) " ;
$sql .= "SELECT DISTINCT page_id,page_title,page_namespace,page_touched,page_latest,page_len,$groupcnt " ;
$sql .= "FROM {$this->db}.page,{$this->db}.templatelinks {$additional_table_from} WHERE tl_title=\"$t\" AND tl_namespace=10 AND $where" ;
// print "$sql<hr/>" ; flush();
$groupcnt++ ;
$this->query ( $this->db2temp , $sql ) ;
}
} else {
$this->template_primary = false ;
$where[] = "catname=cl_to" ;
$where[] = "page_id=cl_from" ;
$where[] = 'page_namespace IN (' . implode ( ',' , array_keys ( $this->ns ) ) . ')' ;
$where = implode ( " AND " , $where ) ;
$sql = "INSERT INTO temp ( pid,ptitle,pns,ptouched,prev,plen,pgroup) " ;
$sql .= "SELECT DISTINCT page_id,page_title,page_namespace,page_touched,page_latest,page_len,catgroup " ;
$sql .= "FROM {$this->db}.page,{$this->db}.categorylinks,cat1 {$additional_table_from} WHERE $where" ;
$this->query ( $this->db2temp , $sql ) ;
}
}
function getCombinationTempTable ( $type ) {
$ret = array ( 'n' => 'combination' , '*' => array() , 'a' => array () ) ;
$cnt = 0 ;
foreach ( $this->catgroups AS $base => $cg ) $cnt = $cg ;
$sql = "CREATE TEMPORARY TABLE temp2 ( pid INTEGER , ptitle VARCHAR(255) , pns INTEGER , ptouched VARCHAR(14) , prev INTEGER , plen INTEGER , pgroup INTEGER , groupcount INTEGER , linkcount INTEGER NULL ) {$this->use_db_memory}" ;
$this->query ( $this->db2temp , $sql ) ;
// Table names
$rev = "{$this->db}.revision" ;
$ug = "{$this->db}.user_groups" ;
$fp = "{$this->db}.flaggedpages" ;
$pl = "{$this->db}.pagelinks" ;
$table_page = "{$this->db}.page" ;
$table_tl = "{$this->db}.templatelinks" ;
$order = "pid,ptitle,pns,ptouched,prev,plen,pgroup" ;
if ( $this->template_primary ) {
$cnt = $this->template_group_limit ;
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt >= $cnt" ;
} else {
// Construct main query
if ( $type == 'subset' ) {
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt = $cnt" ;
} else if ( $type == 'union' ) {
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid" ;
} else if ( $type == 'diff' ) {
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt = 1" ;
} else if ( $type == 'atleast' ) {
$sql = "SELECT $order,count(DISTINCT pgroup) AS cnt FROM temp GROUP BY pid HAVING cnt >= {$this->atleast_count}" ;
}
}
$sql = "INSERT INTO temp2 ($order,groupcount) $sql" ;
// Run main query
$res = $this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
// HERE BE EXPENSIVE FILTERS!
// Works by removing all entries from temp2 that do NOT match the filters
// Links
if ( $type == 'subset' and $this->minlinks . $this->maxlinks != '' ) {
$min = $this->getDBsafe ( $this->minlinks ) ;
$max = $this->getDBsafe ( $this->maxlinks ) ;
$sql = "UPDATE temp2 SET linkcount = ( SELECT count(*) AS lc FROM $pl WHERE pl_from=pid)" ;
$res = $this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
$sql = "DELETE FROM temp2 WHERE linkcount " ;
if ( $min == '' ) $sql .= "> $max" ;
else if ( $max == '' ) $sql .= "< $min" ;
else $sql .= "> $max OR linkcount < $min" ;
$res = $this->query ( $this->db2temp , $sql , $this->query ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
}
// Anons
if ( $this->edits['anons'] != 'both' ) {
if ( $this->edits['anons'] == 'yes' ) $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT rev_id FROM $rev WHERE rev_id=prev AND rev_user<>0)" ;
else $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT rev_id FROM $rev WHERE rev_id=prev AND rev_user=0)" ;
$res = $this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
}
// Bots
if ( $this->edits['bots'] != 'both' ) {
if ( $this->edits['bots'] == 'yes' ) $sql = "DELETE FROM temp2 WHERE prev NOT IN ( SELECT DISTINCT rev_id FROM $rev,$ug WHERE rev_id=prev AND rev_user=ug_user AND ug_group='bot')" ;
else $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT rev_id FROM $rev,$ug WHERE rev_id=prev AND rev_user=ug_user AND ug_group='bot')" ;
$res = $this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
}
// Flagged revisions
if ( $this->edits['flagged'] != 'both' ) {
if ( !$this->has_flagged_revisions ) $this->iDie ( "Flagged revisions not enabeled on {$this->language}.{$this->project}.org" ) ;
if ( $this->edits['flagged'] == 'yes' ) $sql = "DELETE FROM temp2 WHERE prev NOT IN ( SELECT DISTINCT fp_stable FROM $fp WHERE pid=fp_page_id AND fp_stable=prev AND fp_reviewed=1 )" ;
else $sql = "DELETE FROM temp2 WHERE prev IN ( SELECT DISTINCT fp_stable FROM $fp WHERE pid=fp_page_id AND fp_stable=prev AND fp_reviewed=1 )" ;
$res = $this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
}
if ( !$this->template_primary ) {
// Templates : All
if ( count ( $this->templates_yes ) > 0 ) {
$tl = array () ;
foreach ( $this->templates_yes AS $t ) $tl[] = $this->getDBsafe ( $t ) ;
$tlc = count ( $tl ) ;
$tl = '"' . implode ( '","' , $tl ) . '"' ;
if ( $this->templates_use_talk_yes ) {
$sql = "DELETE FROM temp2 WHERE $tlc > ( SELECT count(DISTINCT tl_title) FROM $table_tl,$table_page WHERE MOD(pns,2)=0 AND tl_from=page_id AND page_title=ptitle AND page_namespace=pns+1 AND tl_title IN ( $tl ) )" ;
} else {
$sql = "DELETE FROM temp2 WHERE $tlc > ( SELECT count(DISTINCT tl_title) FROM $table_tl WHERE tl_from=pid AND tl_title IN ( $tl ) )" ;
}
$this->query ( $this->db2temp , $sql ) ;
}
}
// Templates : Any
if ( !$this->template_primary || count ( $this->templates_yes ) > 0 ) {
if ( count ( $this->templates_any ) > 0 ) {
$tl = array () ;
foreach ( $this->templates_any AS $t ) $tl[] = $this->getDBsafe ( $t ) ;
$tlc = count ( $tl ) ;
$tl = '"' . implode ( '","' , $tl ) . '"' ;
if ( $this->templates_use_talk_any ) {
$sql = "DELETE FROM temp2 WHERE 0 = ( SELECT count(*) FROM $table_tl,$table_page WHERE MOD(pns,2)=0 AND tl_from=page_id AND page_title=ptitle AND page_namespace=pns+1 AND tl_title IN ( $tl ) )" ;
} else {
$sql = "DELETE FROM temp2 WHERE 0 = ( SELECT count(*) FROM $table_tl WHERE tl_from=pid AND tl_title IN ( $tl ) )" ;
}
$this->query ( $this->db2temp , $sql ) ;
}
}
// Templates : None
if ( count ( $this->templates_no ) > 0 ) {
$tl = array () ;
foreach ( $this->templates_no AS $t ) $tl[] = $this->getDBsafe ( $t ) ;
$tlc = count ( $tl ) ;
$tl = '"' . implode ( '","' , $tl ) . '"' ;
if ( $this->templates_use_talk_no ) {
$sql = "DELETE FROM temp2 WHERE 0 < ( SELECT count(*) FROM $table_tl,$table_page WHERE MOD(pns,2)=0 AND tl_from=page_id AND page_title=ptitle AND page_namespace=pns+1 AND tl_title IN ( $tl ) )" ;
} else {
$sql = "DELETE FROM temp2 WHERE 0 < ( SELECT count(*) FROM $table_tl WHERE tl_from=pid AND tl_title IN ( $tl ) )" ;
}
$this->query ( $this->db2temp , $sql ) ;
}
// Call redlinks
if ( $this->show_redlinks ) {
$this->check_redlinks ( 'temp2' , $type ) ;
if ( $this->show_redlinks_only ) {
return array () ;
}
}
// Retrieve results
$sql = "SELECT * FROM temp2" ;
$res = $this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
$tmp = array () ;
while ( $o = mysql_fetch_object ( $res ) ) {
if ( isset ( $tmp[$o->pid] ) ) continue ; // Had that page already
if ( count ( $tmp ) > $this->max_temp_objects ) $this->iDie ( "Maximum potential result objects exceeded, aborting" ) ;
$tc = count ( $tmp ) ;
$tmp[$o->pid] = $tc ;
$n = array ( 'n' => 'page' , 'a' => array (
'title' => $o->ptitle ,
'id' => $o->pid ,
'namespace' => $o->pns ,
'len' => $o->plen ,
'touched' => $o->ptouched ,
'seen' => $o->groupcount ,
) ) ;
$n['a']['nstext'] = $this->namespaces[$o->pns] ;
if ( '' <> $o->linkcount ) $n['a']['links'] = $o->linkcount ;
$ret['*'][$tc] = $n ;
}
$ret['a']['type'] = $type ;
if ( $type == 'atleast' ) $ret['a']['atleast'] = $this->atleast_count ;
if ( $this->ext_image_data ) {
$x = array ( 'img_size' , 'img_width' , 'img_height' , 'img_media_type' , 'img_major_mime' , 'img_minor_mime' , 'img_user_text' , 'img_timestamp' , 'img_sha1' ) ;
$qx = implode ( ',' , $x ) ;
$res = $this->query ( $this->db2temp , "SELECT pid,$qx FROM temp2,{$this->db}.image WHERE pns=6 AND img_name=ptitle" ) ;
while ( $o = mysql_fetch_object ( $res ) ) {
$id = $tmp[$o->pid] ;
foreach ( $x AS $y ) {
$ret['*'][$id]['a'][$y] = $o->$y ;
}
}
}
// Drop table
$sql = "DROP TABLE temp2" ;
$res = $this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
$ret['a']['count'] = count ( $ret['*'] ) ;
return $ret ;
}
function check_redlinks ( $table , $type ) {
# if ( !isset ( $this->redlinks_out ) ) {
# $this->redlinks_out = array () ;
# }
$pagelinks = "{$this->db}.pagelinks" ;
$page = "{$this->db}.page" ;
$sql = "CREATE TEMPORARY TABLE temp_redlinks ( ptitle VARCHAR (255) BINARY , namespace INTEGER , linkcount INTEGER , INDEX i1 ( ptitle , namespace ) ) {$this->use_db_memory}" ;
$this->query ( $this->db2temp , $sql ) ;
if ( mysql_errno() != 0 ) $this->iDie ( mysql_error() ) ;
$sql = "INSERT INTO temp_redlinks ( ptitle , namespace , linkcount ) SELECT pl_title,pl_namespace,COUNT(pl_from) AS cnt FROM $pagelinks,$table WHERE pl_from=pid GROUP BY pl_title,pl_namespace HAVING cnt>={$this->min_redlink_count}" ;
$res = $this->query ( $this->db2temp , $sql ) ;
// Remove all non-article namespace links (optional)
if ( $this->article_redlinks_only ) {
$sql = "DELETE FROM temp_redlinks WHERE namespace<>0" ;
$res = $this->query ( $this->db2temp , $sql ) ;
}
// Remove all "redlinks" that exists
$sql = "DELETE FROM temp_redlinks WHERE EXISTS ( SELECT * FROM $page WHERE page_title=temp_redlinks.ptitle AND page_namespace=temp_redlinks.namespace )" ;
$res = $this->query ( $this->db2temp , $sql ) ;
// Remove template redlinks (optional)
if ( $this->remove_template_redlinks ) {
$sql = "DELETE FROM temp_redlinks WHERE EXISTS ( SELECT * FROM $page,$pagelinks WHERE pl_from=page_id AND page_namespace=10 AND pl_title=temp_redlinks.ptitle AND pl_namespace=temp_redlinks.namespace )" ;
$res = $this->query ( $this->db2temp , $sql ) ;
}
$type = "redlinks.$type" ;
$rl = array ( 'n' => "$type" , '*' => array() , 'a' => array ( 'type' => $type ) ) ;
$sql = "SELECT * FROM temp_redlinks" ;
$res = $this->query ( $this->db2temp , $sql ) ;
$spec1 = 'Special:' ;
$spec2 = $spec1 ;
if ( isset ( $this->loc['special_namespace'] ) ) $spec2 = $this->loc['special_namespace'] . ':' ;
while ( $o = mysql_fetch_object ( $res ) ) {
$t = mb_ucfirst ( $o->ptitle ) ;
if ( substr ( $t , 0 , strlen ( $spec1 ) ) == $spec1 ) continue ;
if ( substr ( $t , 0 , strlen ( $spec2 ) ) == $spec2 ) continue ;
# print $o->ptitle . " : " . $o->linkcount . " : " . $o->namespace . "<br/>\n" ;
$n = array ( 'n' => 'page' , 'a' => array (
'title' => $o->ptitle ,
'links' => $o->linkcount ,
'namespace' => $o->namespace ,
'nstext' => $this->namespaces[$o->namespace] ,
# 'id' => $o->pid ,
# 'len' => $o->plen ,
# 'touched' => $o->ptouched ,
# 'seen' => $o->groupcount ,
) ) ;
$rl['*'][] = $n ;
}
$rl['a']['count'] = count ( $rl['*'] ) ;
$this->result['*'][] = $rl ;
$sql = "DROP TABLE temp_redlinks" ;
$this->query ( $this->db2temp , $sql ) ;
}
function checkedFlaggedRevisions () {
// $sql = 'SELECT * FROM flaggedrevs LIMIT 1' ;
$sql = "SELECT count(*) AS cnt FROM information_schema.tables WHERE table_schema='{$this->db}' AND table_name='flaggedrevs'" ;
$res = $this->query ( $this->db , $sql ) ;
// if ( mysql_errno() != 0 ) return ;
$o = mysql_fetch_object ( $res ) ;
if ( $o->cnt == 1 ) $this->has_flagged_revisions = true ;
}
function addSubcats ( $ttname , $category , $depth , $group ) {
$c = $this->getDBsafe ( $category ) ;
$this->query ( $this->db2temp , "CREATE TEMPORARY TABLE cat2 ( catname VARCHAR(255) PRIMARY KEY ) {$this->use_db_memory}" ) ;
// Seed
$this->query ( $this->db2temp , "INSERT IGNORE INTO cat2 ( catname ) VALUES ( \"$c\" )" ) ;
$this->query ( $this->db2temp , "INSERT IGNORE INTO $ttname ( catgroup , catname , depth ) VALUES ( $group , \"$c\" , 0 )" ) ;
// Iterate
$db = $this->db ;
for ( $d = 1 ; $d <= $depth ; $d++ ) {
$ld = $d - 1 ;
$sql = "INSERT IGNORE INTO $ttname ( catgroup , catname , depth ) SELECT DISTINCT $group,page_title,$d FROM $db.page,$db.categorylinks,cat2 WHERE page_id=cl_from AND cl_to=catname AND page_namespace=14" ;
// print "$sql<br/>" ;
$this->query ( $this->db2temp , $sql ) ;
$this->query ( $this->db2temp , "INSERT IGNORE INTO $ttname ( catgroup , catname , depth ) SELECT $group , catname , $ld FROM cat2" ) ;
if ( $d == $depth ) break ;
$this->query ( $this->db2temp , "DELETE FROM cat2" ) ;
$this->query ( $this->db2temp , "INSERT INTO cat2 ( catname ) SELECT catname FROM $ttname WHERE depth=$d AND catgroup=$group" ) ;
}
$this->query ( $this->db2temp , "DROP TABLE cat2" ) ;
}
function query ( $db , $sql ) {
if ( $db != $this->last_mysql_db ) {
mysql_select_db ( $db ) ;
$this->last_mysql_db = $db ;
}
$ret = mysql_query ( $sql , $this->mysql_con ) ;
if ( mysql_errno() != 0 ) $this->iDie ( "MYSQL error : " . mysql_error() . " [$sql]" ) ;
return $ret ;
}
function getCatLists () {
$ret = array ( 'n' => 'categories' , '*' => array() , 'a' => array () ) ;
foreach ( $this->count2cat AS $group => $cat ) {
$n = array ( 'n' => 'list' , '*' => array() , 'a' => array () ) ;
$res = $this->query ( $this->db2temp , "SELECT catname FROM cat1 WHERE catgroup=$group" ) ;
while ( $o = mysql_fetch_object ( $res ) ) {
$n['*'][] = array ( 'n' => 'c' , 'a' => array ( 'name' => $o->catname ) ) ;
}
$n['a']['root'] = $cat ;
$n['a']['depth'] = $this->cat_depth[$cat] ;
$n['a']['count'] = count ( $n['*'] ) ;
$ret['*'][] = $n ;
}
$ret['a']['count'] = count ( $ret['*'] ) ;
return $ret ;
}
function createTemporaryTable ( $ttname , $bc ) {
$this->query ( $this->db2temp , "CREATE TEMPORARY TABLE $ttname ( catgroup INTEGER , catname VARCHAR(255) , depth INTEGER , PRIMARY KEY ca ( catgroup , catname ) , INDEX cb ( catgroup , depth ) ) {$this->use_db_memory}" ) ;
$cnt = 1 ;
if ( $ttname == 'cat1' ) $this->catgroups = array () ;
foreach ( $bc AS $k => $cat ) {
$c = explode ( '|' , $cat , 2 ) ;
if ( count ( $c ) > 1 ) $depth = array_pop ( $c ) ;
else $depth = $this->depth ;
$cat = array_shift ( $c ) ;
$bc[$k] = $cat ; // Removing "|n"
if ( $ttname == 'cat1' ) {
$this->cat_depth[$cat] = $depth ;
$this->catgroups[$cat] = $cnt ;
$this->count2cat[$cnt] = $cat ;
}
$this->addSubcats ( $ttname , $cat , $depth , $cnt ) ;
$cnt++ ;
}
return $bc ;
}
function runQuery () {
$starttime = $this->getTime() ;
$this->initializeDBHandle () ;
$this->checkedFlaggedRevisions () ;
$debug = 0 ;
if ( $debug ) { $n = $this->getTime() - $starttime ; print "Start:$n<br/>\n" ; }
$this->basecats = $this->createTemporaryTable ( 'cat1' , $this->basecats ) ;
$res = $this->query ( $this->db2temp , "SELECT count(*) AS c FROM cat1" ) ;
$o = mysql_fetch_object ( $res ) ;
$total_subcats = $o->c ;
if ( $debug ) { $n = $this->getTime() - $starttime - $n ; print "Subcats:$n<br/>\n" ; }
$this->result = array ( 'n' => 'result' , 'a' => array() , '*' => array() ) ;
$use_master_table = false ;
$force_master = array ( 'subset' , 'union' , 'diff' , 'atleast' ) ;
foreach ( $force_master AS $m ) {
if ( isset ( $this->comb[$m] ) ) $use_master_table = true ;
}
if ( $use_master_table ) {
$this->createTemporaryMasterTable () ;
if ( count ( $this->negcats ) > 0 ) {
$this->negcats = $this->createTemporaryTable ( 'cat1_neg' , $this->negcats ) ;
$this->filterMasterTableNegative ( 'temp' ) ;
$this->query ( $this->db2temp , "DROP TABLE cat1_neg" ) ;
}
}
if ( $debug ) { $n = $this->getTime() - $starttime - $n ; print "Master table:$n<br/>\n" ; }
if ( isset ( $this->comb['list'] ) ) $this->result['*'][] = $this->getCatLists () ;
if ( isset ( $this->comb['subset'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'subset' ) ;
if ( isset ( $this->comb['union'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'union' ) ;
if ( isset ( $this->comb['diff'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'diff' ) ;
if ( isset ( $this->comb['atleast'] ) ) $this->result['*'][] = $this->getCombinationTempTable ( 'atleast' ) ;
// Clean up the tables; should happen automaticcally, but PARANOIA!
$this->query ( $this->db2temp , "DROP TABLE cat1" ) ;
if ( $use_master_table ) $this->query ( $this->db2temp , "DROP TABLE temp" ) ;
if ( $debug ) { $n = $this->getTime() - $starttime - $n ; print "Combinations:$n<br/>\n" ; }
$endtime = $this->getTime() ;
$difftime = $endtime - $starttime ;
$this->result['a']['querytime_sec'] = $difftime ;
$this->result['a']['total_categories_searched'] = $total_subcats ;
$this->result['a']['query_url'] = $this->url_get() ;
}
function printHTML () {
$script = array_pop ( explode ( '/' , $_SERVER["SCRIPT_NAME"] ) ) ;
$uil = mb_strtoupper($this->interface_language) ;
$lil = mb_strtolower($this->interface_language) ;
header('Content-type: text/html; charset=utf-8');
print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//'.$uil.'" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' . "\n\n" ;
print '<html><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"><body>' ;
print "<table style='background-color:#BAD0EF'><tr><td rowspan='2' nowrap><h1 style='margin-top:0px;margin-bottom:0px;padding-bottom:0px;padding-right:5px'>" . $this->loc['toolname'] . '</h1></td>' ;
print "<td valign='bottom' width='100%'><small>{$this->loc['subtitle']}, <em>modified by <a href='http://toolserver.org/~whym'>whym</a></em></small>" ;
print " [<a href='http://meta.wikipedia.org/wiki/CatScan2/{$lil}'><small>{$this->loc['manual']}</small></a>]" ;
print " [<a href='{$this->i18n_url}#{$uil}'><small>{$this->loc['interface_text']}</small></a>]</td></tr><tr><td>" ;
print "{$this->loc['interface_language']} : " ;
foreach ( $this->available_interface_languages AS $l ) {
if ( $l == $this->interface_language ) print " [<small><b>" . mb_strtoupper ( $l ) . "</b></small></a>]" ;
else print " [<a href='$script?interface_language=$l'><small>" . mb_strtoupper ( $l ) . "</small></a>]" ;
}
print "</td></table>" ;
$cats = implode ( "\n" , $this->basecats_orig ) ;
$ncats = implode ( "\n" , $this->negcats_orig ) ;
// Show the entry form
// print "<i>[TBI] = To Be Implemented</i>" ;
print '<form method="post" action="./' . $script . '">' ;
print '<table border="1">' ;
print "<tr><th>" . $this->loc['language'] . "</th>" ;
print "<td><input type='text' name='language' value='{$this->language}' /></td></tr>" ;
print "<tr><th>" . $this->loc['project'] . "</th>" ;
print "<td><input type='text' name='project' value='{$this->project}' /></td></tr>" ;
print "<tr><th>" . $this->loc['depth'] . "</th>" ;
print "<td><input type='text' name='depth' value='{$this->depth}' />{$this->loc['depth_explanation']}</td></tr>" ;
print "<tr><th>" . $this->loc['categories'] . "</th>" ;
print "<td><textarea name='categories' rows='3' cols='80'>{$cats}</textarea></td></tr>" ;
print "<tr><th>" . $this->loc['negative_categories'] . "</th>" ;
print "<td><textarea name='negcats' rows='2' cols='80'>{$ncats}</textarea></td></tr>" ;
$combs = array ( 'list' , 'subset' , 'union' , 'diff' , 'atleast' ) ;
print "<tr><th>" . $this->loc['combination'] . "</th><td>" ;
foreach ( $combs AS $c ) {
$checked = isset ( $this->comb[$c] ) ? 'checked' : '' ;
$name = $this->loc["comb_$c"] ;
print "<input type='checkbox' name='comb[$c]' value='1' id='comb_$c' $checked /><label for='comb_$c'>$name</label>" ;
}
print " <input type='text' size='3' name='atleast_count' value='{$this->atleast_count}' />" ;
print "</td></tr>" ;
print "<tr><th>" . $this->loc['namespaces'] . "</th><td>" ;
$ne = '' ;
$no = '' ;
foreach ( $this->namespaces AS $num=> $name ) {
$checked = isset ( $this->ns[$num] ) ? 'checked' : '' ;
$s = "<td><input type='checkbox' name='ns[$num]' id='ns_$num' value='1' $checked /><label for='ns_$num'>$name</label></td>" ;
if ( $num % 2 == 0 ) $ne .= $s ;
else $no .= $s ;
}
print "<table><tr>$ne</tr></tr>$no</tr></table>" ;
print '</td></tr>' ;
print "<tr><th>" . $this->loc['show_redirects'] . "</th><td>" ;
print "<table><tr>" ;
$c1 = $this->show_redirects == 'both' ? 'checked' : '' ;
$c2 = $this->show_redirects == 'yes' ? 'checked' : '' ;
$c3 = $this->show_redirects == 'no' ? 'checked' : '' ;
print "<td><input type='radio' name='show_redirects' value='both' id='show_redirects_both' $c1 /><label for='show_redirects_both'>{$this->loc['edits_both']}</label></td>" ;
print "<td><input type='radio' name='show_redirects' value='yes' id='show_redirects_yes' $c2 /><label for='show_redirects_yes'>{$this->loc['edits_yes']}</label></td>" ;
print "<td><input type='radio' name='show_redirects' value='no' id='show_redirects_no' $c3 /><label for='show_redirects_no'>{$this->loc['edits_no']}</label></td>" ;
print "</tr></table>" ;
print '</td></tr>' ;
print "<tr><th>" . $this->loc['templates'] . "</th><td>" ;
print "<table><tr>" ;
$first = true ;
foreach ( $this->templates AS $t ) {
$t2 = array_pop ( explode ( '_' , $t ) ) ;
$n = "templates_use_talk_$t2" ;
$tut_checked = $this->$n ? 'checked' : '' ;
if ( $first ) $b = '' ;
else $b = 'style="border-left:1px solid black; padding-left:2px"' ;
print "<td $b>{$this->loc[$t]}<br/><textarea name='$t' cols='30' rows='4'>" . implode ( "\n" , $this->$t ) . "</textarea>" ;
print "<br/><input type='checkbox' name='templates_use_talk_$t2' id='templates_use_talk_$t2' value='1' $tut_checked />" ;
print "<label for='templates_use_talk_$t2'>{$this->loc['templates_use_talk_instead']}</label>" ;
print "</td>" ;
$first = false ;
}
print "</tr></table>" ;
print '</td></tr>' ;
$edits = array ( 'bots' , 'anons' , 'flagged' ) ;
print "<tr><th>" . $this->loc['edits'] . "</th><td>" ;
print "<table>" ;
foreach ( $edits AS $e ) {
$c1 = $this->edits[$e] == 'both' ? 'checked' : '' ;
$c2 = $this->edits[$e] == 'yes' ? 'checked' : '' ;
$c3 = $this->edits[$e] == 'no' ? 'checked' : '' ;
print "<tr><td>{$this->loc["edit_$e"]}</td>" ;
print "<td><input type='radio' name='edits[$e]' value='both' id='edits_both_$e' $c1 /><label for='edits_both_$e'>{$this->loc['edits_both']}</label></td>" ;
print "<td><input type='radio' name='edits[$e]' value='yes' id='edits_yes_$e' $c2 /><label for='edits_yes_$e'>{$this->loc['edits_yes']}</label></td>" ;
print "<td><input type='radio' name='edits[$e]' value='no' id='edits_no_$e' $c3 /><label for='edits_no_$e'>{$this->loc['edits_no']}</label></td>" ;
print "</tr>" ;
}
print "</table>" ;
print '</td></tr>' ;
print "<tr><th>" . $this->loc['last_change'] . "</th><td>" ;
print "<table>" ;
print "<tr><td>{$this->loc['before']}</td><td><input type='text' name='before' value='{$this->before}' /></td><td rowspan='2'>{$this->loc['date_format']}</td></tr>" ;
print "<tr><td>{$this->loc['after']}</td><td><input type='text' name='after' value='{$this->after}' /></td></tr>" ;
print "</table>" ;
print '</td></tr>' ;
print "<tr><th>" . "first edit date" . "</th><td>" ;
print "<table>" ;
print "<tr><td>{$this->loc['before']}</td><td><input type='text' name='fbefore' value='{$this->fbefore}' /></td><td rowspan='2'>{$this->loc['date_format']}</td></tr>" ;
print "<tr><td>{$this->loc['after']}</td><td><input type='text' name='fafter' value='{$this->fafter}' /></td></tr>" ;
print "</table>" ;
print '</td></tr>' ;
print "<tr><th>" . $this->loc['size'] . "</th><td>" ;
print "<table>" ;
print "<tr><td>{$this->loc['larger']}</td><td><input type='text' name='larger' value='{$this->larger}' /></td><td rowspan='2'>{$this->loc['size_explanation']}</td></tr>" ;
print "<tr><td>{$this->loc['smaller']}</td><td><input type='text' name='smaller' value='{$this->smaller}' /></td></tr>" ;
print "</table>" ;
print '</td></tr>' ;
print "<tr><th>" . $this->loc['link_number'] . "</th><td>" ;
print "<table>" ;
print "<tr><td>{$this->loc['maxlinks']}</td><td><input type='text' name='maxlinks' value='{$this->maxlinks}' /></td><td rowspan='2'>{$this->loc['links_explanation']}</td></tr>" ;
print "<tr><td>{$this->loc['minlinks']}</td><td><input type='text' name='minlinks' value='{$this->minlinks}' /></td></tr>" ;
print "</table>" ;
print '</td></tr>' ;
$sred = $this->show_redlinks ? 'checked' : '' ;
$sredo = $this->show_redlinks_only ? 'checked' : '' ;
$rtr = $this->remove_template_redlinks ? 'checked' : '' ;
$aro = $this->article_redlinks_only ? 'checked' : '' ;
print "<tr><th>" . $this->loc['redlinks'] . "</th><td>" ;
print "<table><tr>" ;
print "<td><input type='checkbox' name='show_redlinks' value='1' id='show_redlinks' $sred /><label for='show_redlinks'>" . $this->loc['show_redlinks'] . "</label></td>" ;
print "<td><input type='checkbox' name='show_redlinks_only' value='1' id='show_redlinks_only' $sredo /><label for='show_redlinks_only'>" . $this->loc['show_redlinks_only'] . "</label></td>" ;
print "<td><input type='checkbox' name='remove_template_redlinks' value='1' id='remove_template_redlinks' $rtr /><label for='remove_template_redlinks'>" . $this->loc['remove_template_redlinks'] . "</label></td>" ;
print "<td><input type='checkbox' name='article_redlinks_only' value='1' id='article_redlinks_only' $aro /><label for='article_redlinks_only'>" . $this->loc['article_redlinks_only'] . "</label></td>" ;
print "</tr><tr>" ;
print "<td colspan='2'>{$this->loc['min_redlink_count']} <input type='text' size='3' name='min_redlink_count' value='{$this->min_redlink_count}' /></td>" ;
print "</tr></table>" ;
print '</td></tr>' ;
$sa = $this->sort_order == 'ascending' ? 'checked' : '' ;
$sd = $this->sort_order == 'descending' ? 'checked' : '' ;
print "<tr><th>" . $this->loc['sort'] . "</th><td>" ;
foreach ( $this->sort_modes AS $sm ) {
if ( $this->sort_by == $sm ) $chk = 'checked' ;
else $chk = '' ;
if ( !isset ( $this->loc['sort_by_'.$sm] ) ) $txt = "<i>untranslated</i> sort_by_$sm" ;
else $txt = $this->loc['sort_by_'.$sm] ;
print "<input type='radio' name='sortby' value='$sm' id='sortby_$sm' $chk /><label for='sortby_$sm'>" . $txt . "</label>" ;
}
print ' &nbsp; ' ;
print "<input type='radio' name='sortorder' value='ascending' id='sort_ascending' $sa /><label for='sort_ascending'>" . $this->loc['sort_ascending'] . "</label>" ;
print "<input type='radio' name='sortorder' value='descending' id='sort_descending' $sd /><label for='sort_descending'>" . $this->loc['sort_descending'] . "</label>" ;
print '</td></tr>' ;
print "<tr><th>" . $this->loc['format'] . "</th><td>" ;
foreach ( $this->formats AS $k => $v ) {
$checked = $this->format == $k ? 'checked' : '' ;
print "<input type='radio' name='format' value='$k' id='format_$k' $checked /><label for='format_$k'>$v</label>" ;
}
$eid = $this->ext_image_data ? 'checked' : '' ;
print "<br/><input type='checkbox' name='ext_image_data' id='ext_image_data' value='1' $eid><label for='ext_image_data'>{$this->loc['ext_image_data']}</label>" ;
print '</td></tr>' ;
print "<tr><th></th><td><input type='submit' name='doit' value='{$this->loc['doit']}' /></td></tr>" ;
if ( isset ( $_REQUEST['doit'] ) ) {
$url = $this->url_get() ;
$s = $this->loc['query_url'] ;
$s = str_replace ( '$1' , "$url&doit=1" , $s ) ;
$s = str_replace ( '$2' , $url , $s ) ;
print "<tr><th>" . $this->loc['th_query_url'] ;
print "</th><td>$s</td></tr>" ;
}
print '</table>' ;
print "<input type='hidden' name='interface_language' value='{$this->interface_language}' />" ;
print '</form>' ;
$this->prettyOutput () ;
print '</body></html>' ;
}
function url_get () {
$url = 'http://' . $_SERVER["SERVER_NAME"] . $_SERVER["SCRIPT_NAME"] . "?" ;
$o = array () ;
foreach ( $_REQUEST AS $k => $v ) {
if ( substr ( $k , 0 , 1 ) == '_' ) continue ;
if ( $k == 'doit' ) continue ;
if ( $k == 'interface_language' and $v == 'en' ) continue ;
if ( $k == 'language' and $v == $this->interface_language ) continue ;
if ( $k == 'ext_image_data' and !isset($this->ns[6]) ) continue ;
if ( isset ( $this->default_parameter[$k] ) and $v == $this->default_parameter[$k] ) continue ;
if ( is_array ( $v ) ) {
if ( count ( $v ) == 0 ) continue ;
if ( $k == 'ns' and $v == array ( 0 => 1 ) ) continue ;
if ( $k == 'comb' and $v == array ( 'subset' => 1 ) ) continue ;
$w = array () ;
foreach ( $v AS $a => $b ) {
$w[] = $k . '%5B' . $a . '%5D=' . urlencode ( $b ) ;
}
$v = implode ( '&' , $w ) ;
$o[] = $v ;
} else {
$v = urlencode ( trim ( $v ) ) ;
if ( $v == '' ) continue ;
$o[] = $k . "=" . $v ;
}
}
$url .= implode ( '&' , $o ) ;
return $url ;
}
function makeCSV ( $s ) {
return str_replace ( '"' , '\\"' , $s ) ;
}
// Handles result output for HTML, CSV, TSV, Wiki
function prettyOutput () {
if ( !isset ( $this->result ) ) return ;
$type = $this->format ;
if ( $type == 'csv' or $type == 'tsv' or $type == 'wiki' )
header('Content-type: text/plain; charset=utf-8');
if ( $type == 'html' ) print "<h1><a name='results'>{$this->loc['results']}</a></h1>" ;
$baseurl = "http://{$this->language}.{$this->project}.org/wiki" ;
if ( $type == 'csv' ) $sep = '","' ;
else if ( $type == 'tsv' ) $sep = "\t" ;
else $sep = '' ;
foreach ( $this->result['*'] AS $c ) {
if ( count ( $c ) == 0 ) continue ; // Do not show result
$tag = $c['n'] ;
// TAG : categories
if ( $tag == 'categories' ) {
if ( $type == 'html' ) print "<h2>{$this->loc['comb_list']}</h2>" ;
else if ( $type == 'wiki' ) print "== {$this->loc['comb_list']} ==\n" ;
foreach ( $c['*'] AS $roots ) {
$root = $roots['a']['root'] ;
$depth = $roots['a']['depth'] ;
$dt = " ({$this->loc['depth']} $depth)" ;
if ( $type == 'html' ) print "<h3>{$root}{$dt}</h3><ol>" ;
else if ( $type == 'wiki' ) print "\n=== {$root}{$dt} ===\n" ;
else if ( $type == 'csv' ) print '"|categories' . $sep . $this->makeCSV ( $root ) . $sep . $depth . "\"\n" ;
else print "|$tag$sep$root$sep$depth\n" ;
foreach ( $roots['*'] AS $x ) {
$cat = $x['a']['name'] ;
if ( $type == 'html' ) print "<li><a target='_blank' href='http://{$this->language}.{$this->project}.org/wiki/Category:$cat'>$cat</a></li>" ;
else if ( $type == 'wiki' ) print "# $cat\n" ;
else if ( $type == 'csv' ) print '"' . $this->makeCSV ( $cat ) . "\"\n" ;
else print "$cat\n" ;
}
if ( $type == 'html' ) print "</ol>" ;
}
continue ;
}
// TAG : combination
if ( isset ( $this->loc['comb_'.$c['a']['type']] ) ) $ty = $this->loc['comb_'.$c['a']['type']] ;
else $ty = $c['a']['type'] ;
if ( $c['a']['type'] == 'atleast' ) $ty .= " (" . $c['a']['atleast'] . ")" ;
if ( $type == 'html' ) {
print "<h2>$ty</h2>" ;
print "<table border='1'><tbody>" ;
} else if ( $type == 'wiki' ) {
print "== $ty ==\n" ;
print "{| border='1'\n" ;
}
// Find headers
$headers = array () ;
foreach ( $c['*'] AS $p ) {
foreach ( $p['a'] AS $a => $v ) {
$headers[$a] = $a ;
}
}
if ( $type == 'html' and isset ( $headers['nstext'] ) ) unset ( $headers['nstext'] ) ;
if ( $type == 'html' ) {
print "<tr>" ;
foreach ( $headers AS $h ) {
$h2 = $h ;
if ( isset ( $this->loc['h_'.$h] ) ) $h2 = $this->loc['h_'.$h] ;
print "<th>$h2</th>" ;
}
print "</tr>" ;
} else if ( $type == 'wiki' ) {
$ho = array () ;
foreach ( $headers AS $h ) {
$h2 = $h ;
if ( isset ( $this->loc['h_'.$h] ) ) $h2 = $this->loc['h_'.$h] ;
$ho[] = $h2 ;
}
print "!" . implode ( " !! " , $ho ) . "\n|-\n" ;
} else {
print '|' . $c['a']['type'] . "\n" ;
$ho = array () ;
foreach ( $headers AS $h ) {
$h2 = $h ;
if ( isset ( $this->loc['h_'.$h] ) ) $h2 = $this->loc['h_'.$h] ;
if ( $type == 'tsv' or $type == 'csv' ) $h2 = $h ;
$ho[] = $h2 ;
}
if ( $type == 'csv' ) print '"' ;
print implode ( $sep , $ho ) ;
if ( $type == 'csv' ) print '"' ;
print "\n" ;
}
// Show rows
foreach ( $c['*'] AS $p ) {
if ( $type == 'html' ) print '<tr>' ;
// else if ( $type == 'csv' ) print '"' . $c['a']['type'] ;
// else if ( $type == 'tsv' ) print $c['a']['type'] ;
$first_col = true ;
foreach ( $headers AS $h ) {
if ( isset ( $p['a'][$h] ) ) $o = $p['a'][$h] ;
else $o = '' ;
$pn = str_replace ( '_' , ' ' , $o ) ;
$pns = '' ;
$ppns = '' ;
if ( $h == 'title' and isset ( $p['a']['namespace'] ) and $p['a']['namespace'] != '0' ) {
if ( isset ( $this->namespaces[$p['a']['namespace']] ) ) {
if ( $this->language == 'commons' ) {
$pns = $this->namespaces_en[$p['a']['namespace']] ;
} else {
$pns = $this->namespaces[$p['a']['namespace']] ;
}
$pns .= ':' ;
}
if ( $p['a']['namespace'] == 6 and $type == 'wiki' ) $ppns = ':' ;
else if ( $p['a']['namespace'] == 14 and $type == 'wiki' ) $ppns = ':' ;
}
if ( $type == 'html' ) {
if ( $h == 'title' ) {
// $o2 = urlencode ( $o ) ; // Strange problem with "/"
$o2 = $o ;
$o2 = str_replace ( "'" , urlencode("'") , $o2 ) ;
print "<td><a target='_blank' href='$baseurl/$pns$o2'>$pn</a></td>" ;
} else if ( $h == 'namespace' ) {
print "<td>" ;
if ( $this->language == 'commons' ) print $this->namespaces_en[$o] ;
else print $this->namespaces[$o] ;
print "</td>" ;
} else print "<td>$o</td>" ;
} else if ( $type == 'wiki' ) {
if ( $first_col ) print "|" ;
else print " || " ;
if ( $h == 'title' ) {
print "[[$ppns$pns$pn]]" ;
} else print $o ;
} else if ( $type == 'csv' ) {
if ( $first_col ) print '"' ;
else print $sep ;
print $this->makeCSV ( $pns.$o ) ;
} else {
if ( !$first_col ) print "\t" ;
print $pns.$o ;
}
$first_col = false ;
}
if ( $type == 'html' ) print '</tr>' ;
else if ( $type == 'wiki' ) print "\n|-\n" ;
else if ( $type == 'csv' ) print "\"\n" ;
else print "\n" ;
}
if ( $type == 'html' ) {
print "<tbody><tfoot>" ;
$td_cols = count ( $headers ) - 1 ;
foreach ( $c['a'] AS $a => $v ) {
print "<tr><th>$a</th><td colspan='$td_cols'>$v</td></tr>" ;
}
print "</tfoot></table>" ;
} else if ( $type == 'wiki' ) {
print "|}\n" ;
}
}
if ( $type == 'html' ) {
print "<p>Query took " . $this->result['a']['querytime_sec'] . " seconds.</p>" ;
print "<p>Searched " . $this->result['a']['total_categories_searched'] . " categories.</p>" ;
}
if ( $type == 'wiki' && count ( $this->result['a'] ) > 0 ) {
print "----\n" ;
foreach ( $this->result['a'] AS $k => $v ) {
print ';' . $k . ':' . $v . "\n" ;
}
}
}
function printXMLsub ( &$d ) {
if ( !is_array ( $d ) ) {
print htmlspecialchars ( $d , ENT_COMPAT , 'UTF-8' ) ;
return ;
}
if ( !isset ( $d['n'] ) ) return ;
print '<' . $d['n'] ;
if ( isset ( $d['a'] ) ) {
foreach ( $d['a'] AS $a => $v ) {
print ' ' . $a . '="' ;
print htmlspecialchars ( $v , ENT_COMPAT , 'UTF-8' ) ;
print '"' ;
}
}
if ( isset ( $d['*'] ) and count ( $d['*'] ) > 0 ) {
print ">" ;
foreach ( $d['*'] AS $s ) {
$this->printXMLsub ( $s ) ;
}
print "</" . $d['n'] . ">" ;
} else print " />" ;
}
function printXML () {
header('Content-type: text/xml; charset=utf-8');
$this->printXMLsub ( $this->result ) ;
}
function printPHP () {
header('Content-type: application/serialized_PHP_variable');
print serialize ( $this->result ) ;
}
function printJSON () {
header('Content-type: application/x-json; charset=utf-8');
print json_encode ( $this->result ) ;
}
function comparePages ( $p1 , $p2 ) {
$ret = 0 ;
if ( $this->sorting_redlinks ) {
if ( $p1['a']['links'] > $p2['a']['links'] ) $ret = -1 ;
else if ( $p1['a']['links'] < $p2['a']['links'] ) $ret = 1 ;
if ( $ret != 0 ) return $ret ; // Descending link count takes priority in redlinks
}
if ( $this->sort_by == 'title' ) {
if ( $p1['a']['title'] > $p2['a']['title'] ) $ret = 1 ;
else if ( $p1['a']['title'] < $p2['a']['title'] ) $ret = -1 ;
} else if ( $this->sort_by == 'ns_title' ) {
if ( (int) $p1['a']['namespace'] > (int) $p2['a']['namespace'] ) $ret = 1 ;
else if ( (int) $p1['a']['namespace'] < (int) $p2['a']['namespace'] ) $ret = -1 ;
else {
if ( $p1['a']['title'] > $p2['a']['title'] ) $ret = 1 ;
else if ( $p1['a']['title'] < $p2['a']['title'] ) $ret = -1 ;
}
} else if ( $this->sort_by == 'size' ) {
if ( (int) $p1['a']['len'] > (int) $p2['a']['len'] ) $ret = 1 ;
else if ( (int) $p1['a']['len'] < (int) $p2['a']['len'] ) $ret = -1 ;
} else if ( $this->sort_by == 'filesize' ) {
if ( (int) $p1['a']['img_size'] > (int) $p2['a']['img_size'] ) $ret = 1 ;
else if ( (int) $p1['a']['img_size'] < (int) $p2['a']['img_size'] ) $ret = -1 ;
} else if ( $this->sort_by == 'uploaddate' ) {
if ( $p1['a']['img_timestamp'] > $p2['a']['img_timestamp'] ) $ret = 1 ;
else if ( $p1['a']['img_timestamp'] < $p2['a']['img_timestamp'] ) $ret = -1 ;
} else if ( $this->sort_by == 'date' ) {
if ( $p1['a']['touched'] > $p2['a']['touched'] ) $ret = 1 ;
else if ( $p1['a']['touched'] < $p2['a']['touched'] ) $ret = -1 ;
}
// print "$ret" ;
if ( $this->sort_order == 'descending' ) return -$ret ;
return $ret ;
}
function doSort () {
if ( $this->sort_by == 'none' && !$this->show_redlinks ) return ;
if ( !isset ( $this->result ) ) return ;
if ( !isset ( $this->result['*'] ) ) return ;
foreach ( $this->result['*'] AS $k => $c ) {
if ( count ( $c ) == 0 ) continue ; // Empty, no sorting neccessary
$tag = $c['n'] ;
$this->sorting_redlinks = substr ( $tag , 0 , 9 ) == 'redlinks.' ;
if ( $tag == 'categories' ) {
continue ;
}
usort ( $this->result['*'][$k]['*'] , array($this,'comparePages') ) ;
}
}
function out ( $doit ) {
$this->doSort () ;
if ( !$doit ) {
$this->printHTML() ;
return ;
}
if ( $this->format == 'xml' ) $this->printXML() ;
else if ( $this->format == 'php' ) $this->printPHP() ;
else if ( $this->format == 'json' ) $this->printJSON() ;
else if ( $this->format == 'wiki' ) $this->prettyOutput() ;
else if ( $this->format == 'csv' ) $this->prettyOutput() ;
else if ( $this->format == 'tsv' ) $this->prettyOutput() ;
else $this->printHTML() ;
}
}
// MAIN PROGRAM
$catscan = new CatScan () ;
$doit = isset ( $_REQUEST['doit'] ) ;
if ( $doit ) $catscan->runQuery() ;
$catscan->out ( $doit ) ;
// THE END
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment