Last active
July 26, 2017 09:02
-
-
Save onlurking/61e56a4a5ce63b0c94c9386f18b95c66 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import json | |
from urllib.request import Request, urlopen | |
import time | |
def request_until_succeed(url): | |
req = Request(url) | |
success = False | |
while success is False: | |
try: | |
response = urlopen(req) | |
if response.getcode() == 200: | |
success = True | |
except Exception as error: | |
time.sleep(1) | |
print("Error for URL {url}: {error}".format( | |
url=url, error=error)) | |
print("Retrying.") | |
return response.read() | |
def feed_data(url): | |
data = json.loads(request_until_succeed(url)) | |
return data | |
def save_json(posts): | |
result = open('./result.json', 'w+') | |
result.write(json.dumps(posts)) | |
def scrap_group(url): | |
posts = [] | |
has_next_page = True | |
num_processed = 0 | |
statuses = feed_data(url) | |
while has_next_page: | |
for status in statuses['data']: | |
num_processed += len(statuses['data']) | |
posts.append(status) | |
if not 'paging' in statuses.keys(): | |
has_next_page = False | |
save_json(posts) | |
else: | |
print("{num} processed posts.".format(num=num_processed)) | |
statuses = json.loads( | |
request_until_succeed( | |
statuses['paging']['next'])) | |
time.sleep(0.3) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('id', help='ID of Graph API Group') | |
parser.add_argument('-o', '--out', default="dump.json", help='Output file') | |
parser.add_argument('-t', '--token', help='Authentication token') | |
args = parser.parse_args() | |
query = ("feed?fields=from,comments.limit(1500)" | |
"{from,message,comment_count,comments.limit(1500)" | |
"{from,message,like_count},like_count}," | |
"message,created_time") | |
adress = ("https://graph.facebook.com/v2.10/" | |
"{id}/{query}&access_token={token}" | |
.format(id=args.id, query=query, | |
token=args.token)) | |
scrap_group(adress) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage
python fb-dl.py -t accesstoken groupid
Example
python fb-dl.py -t EAACEdEose0cBAEnIzWHRHjK1gzlJ4G6N4MdbkAeu7qgswvP2XHR1YBw64NhDHAHjn1ZAmCTdHFLEnr5b8OTu7nVVU3iT3IMxZBvnR4DPkZAuZBhzxyHYb9MP80GW06mlvZBX7voVPZBVOikZCXF65cYYaYYaSHBWamcFEyvZCSQKCG5FSL74C4LaOm4XJc26yLYZD 142918099147059