Simple PHP functions to extract links and metadata from a webpage
<?php | |
//Created as an alternative to an article demonstrating the use of regular expressions to parse HTML: | |
// http://www.web-development-blog.com/archives/parse-html-with-preg_match_all/ | |
/** | |
* @param string $remoteUrl | |
* @param string $yourLink | |
* @return bool | |
*/ | |
function check_back_link($remoteUrl, $yourLink) | |
{ | |
$document = new DOMDocument('1.1'); | |
$document->loadHtmlFile($remoteUrl); | |
$linkQuery = new DOMXPath($document); | |
$results = $linkQuery->evaluate('/html/body//a[@href="' . $yourLink . '"]/'); | |
return ($results->length > 0); | |
} | |
/** | |
* @param string $remoteUrl | |
* @param string $metaName | |
* @return array | |
*/ | |
function extract_meta_data($remoteUrl, $metaName) | |
{ | |
$document = new DOMDocument('1.1'); | |
$document->loadHtmlFile($remoteUrl); | |
$linkQuery = new DOMXPath($document); | |
$results = $linkQuery->evaluate('/html/head/meta[@name="' . $metaName . '"]/@content'); | |
$content = []; | |
foreach($results as $domAttr) { | |
$content[] = $domAttr->value; | |
} | |
return $content; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment