Skip to content

Instantly share code, notes, and snippets.

@hans2103
Created June 4, 2015 13:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hans2103/5300f42bef6dc4893327 to your computer and use it in GitHub Desktop.
Save hans2103/5300f42bef6dc4893327 to your computer and use it in GitHub Desktop.
get Meta Description from websites
<?php
/**
* get Meta Tags from weblinks
*
* input for this script is data.txt
* data.txt contains weblinks. Every line a new weblink.
*
* use this php script from command line
* $ cat data.txt|while read LINE;do php ./getMetaTags.php $LINE>>output.csv
*
* script was needed to move Byte.nl/wiki to Byte.nl/kennisbank
*/
function file_get_contents_curl($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$url = $argv[1];
$pattern = "https://www.byte.nl/wiki/";
$slug = strtolower(substr($url, strlen($pattern)));
$html = file_get_contents_curl($url);
//parsing begins here:
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
//get and display what you need:
$title = $nodes->item(0)->nodeValue;
$metas = $doc->getElementsByTagName('meta');
for ($i = 0; $i < $metas->length; $i++)
{
$meta = $metas->item($i);
if($meta->getAttribute('name') == 'description')
$description = $meta->getAttribute('content');
if($meta->getAttribute('name') == 'keywords')
$keywords = $meta->getAttribute('content');
}
/*echo "URL: $url". '<br/><br/>';
echo "Title: $title". '<br/><br/>';
echo "Description: $description". '<br/><br/>';
echo "Keywords: $keywords";*/
echo "\"$slug\";\"$description\"\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment