onlurking/fb-dl.py

## fb-dl.py
import argparse
import json
from urllib.request import Request, urlopen
import time


def request_until_succeed(url):
    req = Request(url)
    success = False
    while success is False:
        try:
            response = urlopen(req)
            if response.getcode() == 200:
                success = True

        except Exception as error:
            time.sleep(1)
            print("Error for URL {url}: {error}".format(
                url=url, error=error))
            print("Retrying.")

    return response.read()


def feed_data(url):
    data = json.loads(request_until_succeed(url))
    return data


def save_json(posts):
    result = open('./result.json', 'w+')
    result.write(json.dumps(posts))


def scrap_group(url):
    posts = []
    has_next_page = True
    num_processed = 0
    statuses = feed_data(url)

    while has_next_page:
        for status in statuses['data']:
            num_processed += len(statuses['data'])
            posts.append(status)
            if not 'paging' in statuses.keys():
                has_next_page = False
                save_json(posts)
            else:
                print("{num} processed posts.".format(num=num_processed))
                statuses = json.loads(
                    request_until_succeed(
                        statuses['paging']['next']))
                time.sleep(0.3)


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('id', help='ID of Graph API Group')
    parser.add_argument('-o', '--out', default="dump.json", help='Output file')
    parser.add_argument('-t', '--token', help='Authentication token')
    args = parser.parse_args()

    query = ("feed?fields=from,comments.limit(1500)"
             "{from,message,comment_count,comments.limit(1500)"
             "{from,message,like_count},like_count},"
             "message,created_time")

    adress = ("https://graph.facebook.com/v2.10/"
              "{id}/{query}&access_token={token}"
              .format(id=args.id, query=query,
                      token=args.token))

    scrap_group(adress)
	import argparse
	import json
	from urllib.request import Request, urlopen
	import time


	def request_until_succeed(url):
	req = Request(url)
	success = False
	while success is False:
	try:
	response = urlopen(req)
	if response.getcode() == 200:
	success = True

	except Exception as error:
	time.sleep(1)
	print("Error for URL {url}: {error}".format(
	url=url, error=error))
	print("Retrying.")

	return response.read()


	def feed_data(url):
	data = json.loads(request_until_succeed(url))
	return data


	def save_json(posts):
	result = open('./result.json', 'w+')
	result.write(json.dumps(posts))


	def scrap_group(url):
	posts = []
	has_next_page = True
	num_processed = 0
	statuses = feed_data(url)

	while has_next_page:
	for status in statuses['data']:
	num_processed += len(statuses['data'])
	posts.append(status)
	if not 'paging' in statuses.keys():
	has_next_page = False
	save_json(posts)
	else:
	print("{num} processed posts.".format(num=num_processed))
	statuses = json.loads(
	request_until_succeed(
	statuses['paging']['next']))
	time.sleep(0.3)


	if __name__ == '__main__':

	parser = argparse.ArgumentParser()
	parser.add_argument('id', help='ID of Graph API Group')
	parser.add_argument('-o', '--out', default="dump.json", help='Output file')
	parser.add_argument('-t', '--token', help='Authentication token')
	args = parser.parse_args()

	query = ("feed?fields=from,comments.limit(1500)"
	"{from,message,comment_count,comments.limit(1500)"
	"{from,message,like_count},like_count},"
	"message,created_time")

	adress = ("https://graph.facebook.com/v2.10/"
	"{id}/{query}&access_token={token}"
	.format(id=args.id, query=query,
	token=args.token))

	scrap_group(adress)