Skip to content

Instantly share code, notes, and snippets.

@iann0036
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iann0036/a54d546a29e20b4bf0ee to your computer and use it in GitHub Desktop.
Save iann0036/a54d546a29e20b4bf0ee to your computer and use it in GitHub Desktop.
<?php
$page_max = 123;
$leadurl = '';
$results = array();
for ($page=1; $page<$page_max; $page++) {
$doc = new DOMDocument();
$doc->loadHTML(file_get_contents($leadurl.$page));
$elems = $doc->getElementsByTagName('div');
foreach ($elems as $elem) {
$length = $elem->attributes->length;
for ($i=0; $i<$length; $i++) {
if ($elem->attributes->item($i)->name=="attr-name") {
$value = $elem->attributes->item($i)->value;
$parts = explode("\n",$elem->textContent);
foreach ($parts as $part) {
if (strpos($part,"SOMETEXT")!==FALSE) {
$attr = filter_var($part, FILTER_SANITIZE_NUMBER_INT);
break;
}
}
$results[] = array('value' => $value, 'attr' => $attr);
echo $value.','.$attr."<br>";
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment