Skip to content

Instantly share code, notes, and snippets.

@afzafri
Last active March 31, 2023 20:15
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save afzafri/6240ba32fafc3a549545abe256fc98d6 to your computer and use it in GitHub Desktop.
Save afzafri/6240ba32fafc3a549545abe256fc98d6 to your computer and use it in GitHub Desktop.
Fetch/Scrape Facebook Page posts contents without using Facebook API
<?php
/* Facebook Page Posts Scrapper API created by Afif Zafri.
Fetch/Scrape posts contents of a page without using Facebook API
Usage: http://site.com/api.php?username=CODE , where CODE is the page username
*/
if(isset($_GET['username']))
{
$username = $_GET['username'];
$url = "https://www.facebook.com/".$username;
// header option for file_get_contents
// need to set user agent, because facebook will check user agent
$options = array(
'http'=>array(
'method'=>"GET",
'header'=>"Accept-language: en\r\n" .
"Cookie: foo=bar\r\n" . // check function.stream-context-create on php.net
"User-Agent: Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.102011-10-16 20:23:10\r\n" // i.e. An iPad
)
);
// fetch webpage content
$context = stream_context_create($options);
$result = file_get_contents($url, false, $context);
// Use regex to parse the html content, to only what we want
// get title
$paterntitle = '#<title id="pageTitle">([\w\W]*?)</title>#';
preg_match_all($paterntitle, $result, $titleout);
$title = str_replace(" | Facebook","",$titleout[0]);
// get post
$patern = '#<div class="_5pbx userContent"([\w\W]*?)</div>#';
preg_match_all($patern, $result, $out);
echo "<h2>Page Title: ".strip_tags($title[0])."</h2>";
echo "<h2>Page Link: ".$url."</h2>";
echo "<h2>Number of posts: ". count($out[0])."</h2>";
echo "<h2>Output:</h2>";
echo "
<table border='1' style='border-collapse: collapse;'>
<tr>
<th>No #</th>
<th>Posts</th>
</tr>
";
for($i=0;$i<count($out[0]);$i++)
{
$post = strip_tags($out[0][$i]);
echo "
<tr>
<td>".($i+1)."</td>
<td>
".$post."
</td>
</tr>
";
}
echo "</table>";
}
?>
<?php
/* Facebook Page Posts Scrapper API created by Afif Zafri.
Fetch/Scrape posts contents of a page without using Facebook API
This version will fetch the posts IDs, and use Facebook Embded code
Usage: http://site.com/api.php?username=CODE , where CODE is the page username
*/
if(isset($_GET['username']))
{
$username = $_GET['username'];
$url = "https://www.facebook.com/".$username;
// header option for file_get_contents
// need to set user agent, because facebook will check user agent
$options = array(
'http'=>array(
'method'=>"GET",
'header'=>"Accept-language: en\r\n" .
"Cookie: foo=bar\r\n" . // check function.stream-context-create on php.net
"User-Agent: Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.102011-10-16 20:23:10\r\n" // i.e. An iPad
)
);
// fetch webpage content
$context = stream_context_create($options);
$result = file_get_contents($url, false, $context);
// Use regex to parse the html content, to only what we want
// get title
$paterntitle = '#<title id="pageTitle">([\w\W]*?)</title>#';
preg_match_all($paterntitle, $result, $titleout);
$title = str_replace(" | Facebook","",$titleout[0]);
// get fb script code
$patern = '#<script([\w\W]*?)</script>#';
preg_match_all($patern, $result, $out);
// get posts id only
$paternid = '#"post_fbid":([\w\W]*?)}#';
preg_match_all($paternid, $out[0][5], $ids);
$limit = ceil(count($ids[0]) / 2);
echo "<h2>Page Title: ".strip_tags($title[0])."</h2>";
echo "<h2>Page Link: ".$url."</h2>";
echo "<h2>Number of posts: ". $limit."</h2>";
echo "<h2>Output:</h2>";
for($i=0;$i<$limit;$i++)
{
preg_match_all('/\d+/', $ids[0][$i], $numonly);
$postid = $numonly[0][0];
//echo $postid. "<br>";
$embdedlink = "https://www.facebook.com/plugins/post.php?href=https%3A%2F%2Fwww.facebook.com%2F".$username."%2Fposts%2F".$postid."&width=500";
echo "
<iframe src='".$embdedlink."' width='500' height='224' style='border:none;overflow:hidden' scrolling='no' frameborder='0' allowTransparency='true'></iframe>
";
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment