Skip to content

Instantly share code, notes, and snippets.

@ichigo92
Last active March 17, 2023 05:58
Show Gist options
  • Save ichigo92/065c839dcc7cfbac9be65f5d0353350e to your computer and use it in GitHub Desktop.
Save ichigo92/065c839dcc7cfbac9be65f5d0353350e to your computer and use it in GitHub Desktop.
PHP Crawler using CURL
<?php
//url
$redirect = "http://www.eyeofriyadh.com/events/";
$event = getEvents($redirect);
echo "Dates are : <br>" . $event;
//---------Uncomment this to go to the site and check the data-----------
//header("Location: $redirect");
function getEvents($url){
//For Guidance
//https://code.tutsplus.com/tutorials/techniques-for-mastering-curl--net-8470
//include("simple_html_dom.php");
//----------Using file_get_contents()-------------
/*$url='http://www.uniprot.org/';
//file_get_contents() reads remote webpage content
$lines_string=file_get_contents($url);
//output, you can also save it locally on the server
echo htmlspecialchars($lines_string);*/
//--------------Using PHP/Curl--------------------
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
"User-Agent: {Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)}",
"Accept-Language: {en-us,en;q=0.5}"
));
curl_setopt($curl, CURLOPT_HEADER, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$result = curl_exec($curl);
$info = curl_getinfo($curl);
$event_date = '/<div(.*?)style=\"color:#666A73;(.*?)padding:0px 10px 3px 10px;\"(.*?)>(.*?)<\/div>/i';
$event_title = '/<div(.*?)style=\"color:#666A73; padding:3px 10px;\"(.*?)>(.*?)(\n)<a href="(.*?)"(.*?)style=\"color:#000; font-weight:700; font-size:14px;letter-spacing: 0px; line-height:18px;\">(.*?)<\/a>(\n)<\/div>/i';
$event_details = '/<div(.*?)style=\"color:#666A73; margin-bottom:10px;\"(.*?)>(\n)(.*?)(\n)<\/div>/i';
//print_r($info);
//print_r($result);
if($result === FALSE)
{
echo "Error: " . curl_error($curl);
}
else{
//echo $result;
preg_match_all($event_date, $result, $match);
//$title = $match[0];
$max = max(array_map('count', $match));
for ($i=0; $i < $max; $i++) {
if($i == 0){
//you can either use 0 or 4
$title = $match[4][$i] . '<br>';
}
else{
//you can either use 0 or 4
$title = $title . $match[4][$i] . '<br>';
}
}
//-------------Matches the pattern with the $result and stores it in $match-------------
//echo $title;
// Print the entire match result
//print_r($match);
//var_dump($match);
//-------Use with simple_html_dom only--------
/*foreach($result->find("a") as $element)
echo $element->src . '<br>';*/
return $title;
}
curl_close($curl);
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment