Skip to content

Instantly share code, notes, and snippets.

@JamesTheHacker
Created August 12, 2016 09:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JamesTheHacker/f47a75e3f17fe93794a014ea8fc6f66c to your computer and use it in GitHub Desktop.
Save JamesTheHacker/f47a75e3f17fe93794a014ea8fc6f66c to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import argparse
import json
from login import login, getEnv
from pyquery import PyQuery as pq
import re
import requests
import sys
def scrapeGroup(session, gid, limit, uid):
url = """https://www.facebook.com/ajax/browser/list/group_members/?id={0}
&gid={0}&edge=groups%3Amembers&order=default&view=grid&start={1}&dpr=1&
__user={2}&__a=1""".format(gid, 0, uid)
r = session.get(url)
j = json.loads(r.content[9:])
dom = pq(j['domops'][0][3]['__html'])
moreURL = parseMoreURL(dom, uid)
ids = parseUsers(dom)
while moreURL and len(ids) < int(limit):
r = session.get(moreURL)
j = json.loads(r.content[9:])
dom = pq(j['domops'][0][3]['__html'])
moreURL = parseMoreURL(dom, uid)
parsedIDs = parseUsers(dom)
for id in parsedIDs:
ids.append(id)
if not moreURL:
break
return ids
def parseUsers(dom):
'''
Parse user IDs and user names
'''
ids = []
for tr in dom('a[data-hovercard]').items():
uid = re.search('id=([0-9]+)', tr.attr('data-hovercard')).group(1)
ids.append(uid)
return list(set(ids))
def parseMoreURL(dom, uid):
'''
Parse the "See More" URL from the DOM. This contains the URL for the next
page of user ID's in the groups member list
'''
moreURL = dom('a.uiMorePagerPrimary').attr('href')
if not moreURL: return False
return 'https://www.facebook.com{0}&__user={1}&__a=1'.format(moreURL, uid)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('email', help='Facebook email address')
parser.add_argument('password', help='Facebook password')
parser.add_argument('gid', help='Group ID')
parser.add_argument('limit', help='Number of IDs to scrape')
parser.add_argument('useragent', help='Browser useragent')
args = parser.parse_args()
try:
# Attempt to log the user in
session = login(args.email, args.password, args.useragent)
dtsg, uid = getEnv(session)
except KeyError:
print 'Login Failed!'
sys.exit(1)
ids = scrapeGroup(session, args.gid, args.limit, uid)
for id in ids: print id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment