Last active
March 31, 2023 20:15
-
-
Save afzafri/6240ba32fafc3a549545abe256fc98d6 to your computer and use it in GitHub Desktop.
Fetch/Scrape Facebook Page posts contents without using Facebook API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* Facebook Page Posts Scrapper API created by Afif Zafri. | |
Fetch/Scrape posts contents of a page without using Facebook API | |
Usage: http://site.com/api.php?username=CODE , where CODE is the page username | |
*/ | |
if(isset($_GET['username'])) | |
{ | |
$username = $_GET['username']; | |
$url = "https://www.facebook.com/".$username; | |
// header option for file_get_contents | |
// need to set user agent, because facebook will check user agent | |
$options = array( | |
'http'=>array( | |
'method'=>"GET", | |
'header'=>"Accept-language: en\r\n" . | |
"Cookie: foo=bar\r\n" . // check function.stream-context-create on php.net | |
"User-Agent: Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.102011-10-16 20:23:10\r\n" // i.e. An iPad | |
) | |
); | |
// fetch webpage content | |
$context = stream_context_create($options); | |
$result = file_get_contents($url, false, $context); | |
// Use regex to parse the html content, to only what we want | |
// get title | |
$paterntitle = '#<title id="pageTitle">([\w\W]*?)</title>#'; | |
preg_match_all($paterntitle, $result, $titleout); | |
$title = str_replace(" | Facebook","",$titleout[0]); | |
// get post | |
$patern = '#<div class="_5pbx userContent"([\w\W]*?)</div>#'; | |
preg_match_all($patern, $result, $out); | |
echo "<h2>Page Title: ".strip_tags($title[0])."</h2>"; | |
echo "<h2>Page Link: ".$url."</h2>"; | |
echo "<h2>Number of posts: ". count($out[0])."</h2>"; | |
echo "<h2>Output:</h2>"; | |
echo " | |
<table border='1' style='border-collapse: collapse;'> | |
<tr> | |
<th>No #</th> | |
<th>Posts</th> | |
</tr> | |
"; | |
for($i=0;$i<count($out[0]);$i++) | |
{ | |
$post = strip_tags($out[0][$i]); | |
echo " | |
<tr> | |
<td>".($i+1)."</td> | |
<td> | |
".$post." | |
</td> | |
</tr> | |
"; | |
} | |
echo "</table>"; | |
} | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* Facebook Page Posts Scrapper API created by Afif Zafri. | |
Fetch/Scrape posts contents of a page without using Facebook API | |
This version will fetch the posts IDs, and use Facebook Embded code | |
Usage: http://site.com/api.php?username=CODE , where CODE is the page username | |
*/ | |
if(isset($_GET['username'])) | |
{ | |
$username = $_GET['username']; | |
$url = "https://www.facebook.com/".$username; | |
// header option for file_get_contents | |
// need to set user agent, because facebook will check user agent | |
$options = array( | |
'http'=>array( | |
'method'=>"GET", | |
'header'=>"Accept-language: en\r\n" . | |
"Cookie: foo=bar\r\n" . // check function.stream-context-create on php.net | |
"User-Agent: Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.102011-10-16 20:23:10\r\n" // i.e. An iPad | |
) | |
); | |
// fetch webpage content | |
$context = stream_context_create($options); | |
$result = file_get_contents($url, false, $context); | |
// Use regex to parse the html content, to only what we want | |
// get title | |
$paterntitle = '#<title id="pageTitle">([\w\W]*?)</title>#'; | |
preg_match_all($paterntitle, $result, $titleout); | |
$title = str_replace(" | Facebook","",$titleout[0]); | |
// get fb script code | |
$patern = '#<script([\w\W]*?)</script>#'; | |
preg_match_all($patern, $result, $out); | |
// get posts id only | |
$paternid = '#"post_fbid":([\w\W]*?)}#'; | |
preg_match_all($paternid, $out[0][5], $ids); | |
$limit = ceil(count($ids[0]) / 2); | |
echo "<h2>Page Title: ".strip_tags($title[0])."</h2>"; | |
echo "<h2>Page Link: ".$url."</h2>"; | |
echo "<h2>Number of posts: ". $limit."</h2>"; | |
echo "<h2>Output:</h2>"; | |
for($i=0;$i<$limit;$i++) | |
{ | |
preg_match_all('/\d+/', $ids[0][$i], $numonly); | |
$postid = $numonly[0][0]; | |
//echo $postid. "<br>"; | |
$embdedlink = "https://www.facebook.com/plugins/post.php?href=https%3A%2F%2Fwww.facebook.com%2F".$username."%2Fposts%2F".$postid."&width=500"; | |
echo " | |
<iframe src='".$embdedlink."' width='500' height='224' style='border:none;overflow:hidden' scrolling='no' frameborder='0' allowTransparency='true'></iframe> | |
"; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment