Skip to content

Instantly share code, notes, and snippets.

@onlurking
Last active July 26, 2017 09:02
Show Gist options
  • Save onlurking/61e56a4a5ce63b0c94c9386f18b95c66 to your computer and use it in GitHub Desktop.
Save onlurking/61e56a4a5ce63b0c94c9386f18b95c66 to your computer and use it in GitHub Desktop.
import argparse
import json
from urllib.request import Request, urlopen
import time
def request_until_succeed(url):
req = Request(url)
success = False
while success is False:
try:
response = urlopen(req)
if response.getcode() == 200:
success = True
except Exception as error:
time.sleep(1)
print("Error for URL {url}: {error}".format(
url=url, error=error))
print("Retrying.")
return response.read()
def feed_data(url):
data = json.loads(request_until_succeed(url))
return data
def save_json(posts):
result = open('./result.json', 'w+')
result.write(json.dumps(posts))
def scrap_group(url):
posts = []
has_next_page = True
num_processed = 0
statuses = feed_data(url)
while has_next_page:
for status in statuses['data']:
num_processed += len(statuses['data'])
posts.append(status)
if not 'paging' in statuses.keys():
has_next_page = False
save_json(posts)
else:
print("{num} processed posts.".format(num=num_processed))
statuses = json.loads(
request_until_succeed(
statuses['paging']['next']))
time.sleep(0.3)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('id', help='ID of Graph API Group')
parser.add_argument('-o', '--out', default="dump.json", help='Output file')
parser.add_argument('-t', '--token', help='Authentication token')
args = parser.parse_args()
query = ("feed?fields=from,comments.limit(1500)"
"{from,message,comment_count,comments.limit(1500)"
"{from,message,like_count},like_count},"
"message,created_time")
adress = ("https://graph.facebook.com/v2.10/"
"{id}/{query}&access_token={token}"
.format(id=args.id, query=query,
token=args.token))
scrap_group(adress)
@onlurking
Copy link
Author

Usage

python fb-dl.py -t accesstoken groupid

Example

python fb-dl.py -t EAACEdEose0cBAEnIzWHRHjK1gzlJ4G6N4MdbkAeu7qgswvP2XHR1YBw64NhDHAHjn1ZAmCTdHFLEnr5b8OTu7nVVU3iT3IMxZBvnR4DPkZAuZBhzxyHYb9MP80GW06mlvZBX7voVPZBVOikZCXF65cYYaYYaSHBWamcFEyvZCSQKCG5FSL74C4LaOm4XJc26yLYZD 142918099147059

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment