Skip to content

Instantly share code, notes, and snippets.

@mariushoch
Created May 11, 2020 11:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mariushoch/ae4538741ec3e5409dfa4e9a90c346dc to your computer and use it in GitHub Desktop.
Save mariushoch/ae4538741ec3e5409dfa4e9a90c346dc to your computer and use it in GitHub Desktop.
Find items with conflicting sitelinks, for a given list of item ids.
<?php
function errEcho( $str ) {
file_put_contents( 'php://stderr', $str, FILE_APPEND );
}
if ( $argc !== 2 || $argv[1] === '--help' ) {
errEcho( "Find items with conflicting sitelinks, for a given list of item ids.\n\n" );
errEcho( "Usage: findSiteLinkConflicts [--help] file-with-itemids\n" );
exit( 0 );
}
$fileName = $argv[1];
$file = fopen( $fileName, 'r' );
if ( !$file ) {
errEcho( "Could not open $fileName for reading.\n" );
exit( 1 );
}
while ( ( $line = fgets( $file ) ) !== false ) {
$itemId = trim( $line );
if ( !preg_match( '/^Q[1-9]\d+\z/', $itemId ) ) {
errEcho( "Skipping input row: \"$itemId\"\n" );
continue;
}
$subjectEntity = json_decode(
file_get_contents(
'https://www.wikidata.org/w/api.php?action=wbgetentities&ids=' . urlencode( $itemId ) .
'&props=sitelinks&format=json'
),
true
);
if ( !isset( $subjectEntity['entities'] ) ) {
errEcho( "Failed reading Item $itemId\n" );
continue;
}
errEcho( "Checking sitelinks for Item $itemId\n" );
$requestCount = 0;
$conflicts = [];
$siteLinks = $subjectEntity['entities'][$itemId]['sitelinks'];
foreach ( $siteLinks as $siteLink ) {
$siteId = $siteLink['site'];
$pageTitle = $siteLink['title'];
$apiJson = file_get_contents(
'https://www.wikidata.org/w/api.php?action=wbgetentities&sites=' . urlencode( $siteId ) .
'&titles=' . urlencode( $pageTitle ) . '&props=&format=json'
);
$apiResponse = json_decode( $apiJson, true );
$requestCount++;
if ( $requestCount % 10 === 0 ) {
errEcho( "Got 10 sitelinks for $itemId\n" );
}
if ( !isset( $apiResponse['entities'] ) ) {
continue;
}
$conflicts[] = array_keys( $apiResponse['entities'] )[0];
}
$conflicts = array_diff( array_unique( $conflicts ), [ $itemId ] );
echo "Item $itemId conflicts with: " . implode( ', ', $conflicts ) . "\n";
}
errEcho( "\nDone\n" );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment