Skip to content

Instantly share code, notes, and snippets.

@VuokkoVuorinnen
Created June 12, 2013 19:23
Show Gist options
  • Save VuokkoVuorinnen/5768302 to your computer and use it in GitHub Desktop.
Save VuokkoVuorinnen/5768302 to your computer and use it in GitHub Desktop.
<?php
/**
* Created by JetBrains PhpStorm.
* User: Vuokko
* Date: 12/06/13
* Time: 19:52
* To change this template use File | Settings | File Templates.
*/
// Turn off all error reporting, otherwise you get some fucked up DOMelement warnings :p
error_reporting(0);
/**
* @param $thread the URL of the thread in the form: http://boards.4chan.org/p/res/2016557
*/
function getImageUrls($thread) {
$chanhtml = file_get_contents($thread);
$dom = new DOMDocument();
$dom->recover = true;
$dom->strictErrorChecking = FALSE;
$dom->loadHTML($chanhtml);
$xpath = new DOMXPath($dom);
$nodelist = $xpath->query("//span[@class='fileText']/a");
foreach ($nodelist as $node) {
//echo "\n$node->tagName";
echo "http://images.4chan.org/p/src/" . getData($node);
}
}
/**
* @param $node recursive method to get the data from the document
* @return string what's between <a href="herpderp"> and </a>
*/
function getData($node) {
foreach ($node->childNodes as $child) {
if ($child->nodeType == XML_ELEMENT_NODE) {
return ($child->tagName === '' ? '' : "\n").$child->tagName;
}
if ($child->nodeType == XML_TEXT_NODE) {
return $child->nodeValue;
}
if ($child->hasChildNodes()) {
return getData($child); // recursive call
}
}
}
echo "derp";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment