Created
November 24, 2014 08:03
-
-
Save ss23/6d9ec9266e4cf6eefb60 to your computer and use it in GitHub Desktop.
Get parasable data out of PnW nation pages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
define('PNW_JAR', '/tmp/pnwcookies.txt'); | |
define('PNW_EMAIL', ''); | |
define('PNW_PASS', ''); // No special chars allowed | |
if ((!empty($argv)) && ($argc > 1)) { | |
$_GET['nation'] = $argv[1]; | |
} | |
if (empty($_GET['nation'])) { | |
die("Please specify a nation as a GET parameter"); | |
} | |
if (!ctype_alpha($_GET['nation'])) { | |
die('Please only enter alphanumeric characters, or get ss23 to fix the broken script validation'); | |
} | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_COOKIEJAR, PNW_JAR); | |
curl_setopt($ch, CURLOPT_COOKIEFILE, PNW_JAR); | |
// Check if our cookie is still valid | |
curl_setopt($ch, CURLOPT_URL, 'https://politicsandwar.com/index.php?id=62&n=Gyro'); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
$output = curl_exec($ch); | |
curl_close($ch); | |
if (strpos($output, '<meta http-equiv="REFRESH"')) { | |
if (!pnw_login()) { | |
die("Problem logging in to PnW. API should be considered down. Go pester ss23 or something."); | |
} | |
} | |
// Do the request for all the dataz | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_COOKIEJAR, PNW_JAR); | |
curl_setopt($ch, CURLOPT_COOKIEFILE, PNW_JAR); | |
curl_setopt($ch, CURLOPT_URL, 'https://politicsandwar.com/index.php?id=62&n=' . $_GET['nation']); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
$output = curl_exec($ch); | |
curl_close($ch); | |
// Check if the nation was valid | |
if (strpos($output, "/id=15533'")) { | |
die("Invalid nation specified"); | |
} | |
// Parse the output | |
$doc = new DOMDocument(); | |
@$doc->loadHTML($output); // Someone needs to get sheepy to fix his broken HTML | |
$xpath = new DomXpath($doc); | |
$dommy = $xpath->query("//*[@class='nationtable']/*"); | |
$cities = array(); | |
// Each of these is some kind of informational thingy | |
$first = true; | |
foreach ($dommy as $info) { | |
if ($first) { | |
// These are <th>'s providing information about the nations | |
foreach ($info->childNodes as $child) { | |
if ($first) { | |
$first = false; // Notice this is reused more than once, but a single var works fine | |
continue; // Skip first child, it's blank | |
} | |
$cities[] = array('Name' => $child->textContent); | |
} | |
continue; | |
} | |
// We're not on the first, so real data is here now | |
$first = true; | |
$i = 0; | |
foreach ($info->childNodes as $child) { | |
if ($first) { | |
$first = false; | |
$currentAttribute = $child->firstChild->textContent; | |
//var_dump($child->firstChild); var_dump($child);die(); | |
continue; | |
} | |
$cities[$i++][$currentAttribute] = $child->textContent; | |
} | |
} | |
// Add a count field | |
$total = count($cities) - 1; | |
$final = array(); | |
$final['count'] = $total; | |
$final['cities'] = $cities; | |
echo json_encode($final); | |
function pnw_login() { | |
// will log in and fill the cookie jar with delicious cookies | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_COOKIEJAR, PNW_JAR); | |
curl_setopt($ch, CURLOPT_COOKIEFILE, PNW_JAR); | |
curl_setopt($ch, CURLOPT_URL, 'https://politicsandwar.com/login/'); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
$output = curl_exec($ch); | |
curl_close($ch); | |
$sesh = substr($output, strpos($output, 'sesh" value="') + 13, 32); | |
// Do the post | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_COOKIEJAR, PNW_JAR); | |
curl_setopt($ch, CURLOPT_COOKIEFILE, PNW_JAR); | |
curl_setopt($ch, CURLOPT_URL, 'https://politicsandwar.com/login/'); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, 'email=' . PNW_EMAIL . '&password=' . PNW_PASS . '&rememberme=1&loginform=Login&sesh=' . $sesh); | |
$output = curl_exec($ch); | |
curl_close($ch); | |
return (strpos($output, 'You have successfully logged in.') !== FALSE); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment