Last active
March 17, 2023 05:58
-
-
Save ichigo92/065c839dcc7cfbac9be65f5d0353350e to your computer and use it in GitHub Desktop.
PHP Crawler using CURL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//url | |
$redirect = "http://www.eyeofriyadh.com/events/"; | |
$event = getEvents($redirect); | |
echo "Dates are : <br>" . $event; | |
//---------Uncomment this to go to the site and check the data----------- | |
//header("Location: $redirect"); | |
function getEvents($url){ | |
//For Guidance | |
//https://code.tutsplus.com/tutorials/techniques-for-mastering-curl--net-8470 | |
//include("simple_html_dom.php"); | |
//----------Using file_get_contents()------------- | |
/*$url='http://www.uniprot.org/'; | |
//file_get_contents() reads remote webpage content | |
$lines_string=file_get_contents($url); | |
//output, you can also save it locally on the server | |
echo htmlspecialchars($lines_string);*/ | |
//--------------Using PHP/Curl-------------------- | |
$curl = curl_init(); | |
curl_setopt($curl, CURLOPT_URL, $url); | |
curl_setopt($curl, CURLOPT_HTTPHEADER, array( | |
"User-Agent: {Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)}", | |
"Accept-Language: {en-us,en;q=0.5}" | |
)); | |
curl_setopt($curl, CURLOPT_HEADER, 1); | |
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); | |
$result = curl_exec($curl); | |
$info = curl_getinfo($curl); | |
$event_date = '/<div(.*?)style=\"color:#666A73;(.*?)padding:0px 10px 3px 10px;\"(.*?)>(.*?)<\/div>/i'; | |
$event_title = '/<div(.*?)style=\"color:#666A73; padding:3px 10px;\"(.*?)>(.*?)(\n)<a href="(.*?)"(.*?)style=\"color:#000; font-weight:700; font-size:14px;letter-spacing: 0px; line-height:18px;\">(.*?)<\/a>(\n)<\/div>/i'; | |
$event_details = '/<div(.*?)style=\"color:#666A73; margin-bottom:10px;\"(.*?)>(\n)(.*?)(\n)<\/div>/i'; | |
//print_r($info); | |
//print_r($result); | |
if($result === FALSE) | |
{ | |
echo "Error: " . curl_error($curl); | |
} | |
else{ | |
//echo $result; | |
preg_match_all($event_date, $result, $match); | |
//$title = $match[0]; | |
$max = max(array_map('count', $match)); | |
for ($i=0; $i < $max; $i++) { | |
if($i == 0){ | |
//you can either use 0 or 4 | |
$title = $match[4][$i] . '<br>'; | |
} | |
else{ | |
//you can either use 0 or 4 | |
$title = $title . $match[4][$i] . '<br>'; | |
} | |
} | |
//-------------Matches the pattern with the $result and stores it in $match------------- | |
//echo $title; | |
// Print the entire match result | |
//print_r($match); | |
//var_dump($match); | |
//-------Use with simple_html_dom only-------- | |
/*foreach($result->find("a") as $element) | |
echo $element->src . '<br>';*/ | |
return $title; | |
} | |
curl_close($curl); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment