Skip to content

Instantly share code, notes, and snippets.

@fedmich
Created May 16, 2012 06:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fedmich/2708094 to your computer and use it in GitHub Desktop.
Save fedmich/2708094 to your computer and use it in GitHub Desktop.
GetHTML()
<?php
/*
* @author Fedmich
* @version v1.1
* File caching version
*/
function GetHTML($url, $secs = 900) {
$md5 = md5($url);
$file = dirname(__FILE__) . "/cache/page_$md5.tmp";
if (file_exists($file)) {
if ((time() - filemtime($file)) < $secs) {
$content = file_get_contents($file);
if ($content) {
return $content;
}
}
}
$content = curl_page($url);
file_put_contents($file, $content);
return $content;
}
function curl_page($url) {
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_TIMEOUT, 10);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30");
//curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_ENCODING, 'UTF-8');
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
$curlResult = curl_exec($curl);
$curl_info = curl_getinfo($curl, CURLINFO_CONTENT_TYPE);
$httpStatus = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($httpStatus == 404) {
return '';
}
return $curlResult;
}
<?php
/*
* @author Fedmich
* @version v1.3
* File caching version
*/
function GetHTML($url, $secs = 900) {
$md5 = md5($url);
$file = dirname(__FILE__) . "/cache/page_$md5.tmp";
if( class_exists('Memcache') ){
$use_memcache = 1;
$memcache = new Memcache;
$memcache->connect('localhost', 11211) or die ("Memcache is not available");
$mem_key = "page_$md5";
$content = $memcache->get( $mem_key );
if( $content ){
return $content;
}
}
else{
$use_memcache = 0;
if (file_exists($file)) {
if ((time() - filemtime($file)) < $secs) {
$content = file_get_contents($file);
if ($content) {
return $content;
}
}
}
}
$content = curl_page($url);
if( $use_memcache ){
$memcache->set($mem_key, $content, MEMCACHE_COMPRESSED , $secs) or
die ("Can't save to Memcache");
}
else{
file_put_contents($file, $content);
}
return $content;
}
function curl_page($url) {
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_TIMEOUT, 10);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30");
//curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_ENCODING, 'UTF-8');
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
$curlResult = curl_exec($curl);
$curl_info = curl_getinfo($curl, CURLINFO_CONTENT_TYPE);
$httpStatus = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($httpStatus == 404) {
return '';
}
return $curlResult;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment