Skip to content

Instantly share code, notes, and snippets.

@kutschkem
Forked from gelendir/infb.css
Last active December 29, 2015 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kutschkem/7690411 to your computer and use it in GitHub Desktop.
Save kutschkem/7690411 to your computer and use it in GitHub Desktop.
infb fork; updated to use python 3 API. You now need a facebook API token; updated to crawl the about pages
#!/usr/bin/env python
#
# Copyright 2010 Facebook
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Python client library for the Facebook Platform.
This client library is designed to support the Graph API and the
official Facebook JavaScript SDK, which is the canonical way to
implement Facebook authentication. Read more about the Graph API at
http://developers.facebook.com/docs/api. You can download the Facebook
JavaScript SDK at http://github.com/facebook/connect-js/.
If your application is using Google AppEngine's webapp framework, your
usage of this module might look like this:
user = facebook.get_user_from_cookie(self.request.cookies, key, secret)
if user:
graph = facebook.GraphAPI(user["access_token"])
profile = graph.get_object("me")
friends = graph.get_connections("me", "friends")
"""
import cgi
import time
import urllib.request, urllib.parse, urllib.error
import urllib.request, urllib.error, urllib.parse
import http.client
import hashlib
import hmac
import base64
import logging
import socket
import collections
# Find a JSON parser
try:
import simplejson as json
except ImportError:
try:
from django.utils import simplejson as json
except ImportError:
import json
_parse_json = json.loads
# Find a query string parser
try:
from urllib.parse import parse_qs
except ImportError:
from cgi import parse_qs
class GraphAPI(object):
"""A client for the Facebook Graph API.
See http://developers.facebook.com/docs/api for complete
documentation for the API.
The Graph API is made up of the objects in Facebook (e.g., people,
pages, events, photos) and the connections between them (e.g.,
friends, photo tags, and event RSVPs). This client provides access
to those primitive types in a generic way. For example, given an
OAuth access token, this will fetch the profile of the active user
and the list of the user's friends:
graph = facebook.GraphAPI(access_token)
user = graph.get_object("me")
friends = graph.get_connections(user["id"], "friends")
You can see a list of all of the objects and connections supported
by the API at http://developers.facebook.com/docs/reference/api/.
You can obtain an access token via OAuth or by using the Facebook
JavaScript SDK. See
http://developers.facebook.com/docs/authentication/ for details.
If you are using the JavaScript SDK, you can use the
get_user_from_cookie() method below to get the OAuth access token
for the active user from the cookie saved by the SDK.
"""
def __init__(self, access_token=None, timeout=None):
self.access_token = access_token
self.timeout = timeout
def get_object(self, id, **args):
"""Fetchs the given object from the graph."""
return self.request(id, args)
def get_objects(self, ids, **args):
"""Fetchs all of the given object from the graph.
We return a map from ID to object. If any of the IDs are
invalid, we raise an exception.
"""
args["ids"] = ",".join(ids)
return self.request("", args)
def get_connections(self, id, connection_name, **args):
"""Fetchs the connections for given object."""
return self.request(id + "/" + connection_name, args)
def put_object(self, parent_object, connection_name, **data):
"""Writes the given object to the graph, connected to the given parent.
For example,
graph.put_object("me", "feed", message="Hello, world")
writes "Hello, world" to the active user's wall. Likewise, this
will comment on a the first post of the active user's feed:
feed = graph.get_connections("me", "feed")
post = feed["data"][0]
graph.put_object(post["id"], "comments", message="First!")
See http://developers.facebook.com/docs/api#publishing for all
of the supported writeable objects.
Certain write operations require extended permissions. For
example, publishing to a user's feed requires the
"publish_actions" permission. See
http://developers.facebook.com/docs/publishing/ for details
about publishing permissions.
"""
assert self.access_token, "Write operations require an access token"
return self.request(parent_object + "/" + connection_name,
post_args=data)
def put_wall_post(self, message, attachment={}, profile_id="me"):
"""Writes a wall post to the given profile's wall.
We default to writing to the authenticated user's wall if no
profile_id is specified.
attachment adds a structured attachment to the status message
being posted to the Wall. It should be a dictionary of the form:
{"name": "Link name"
"link": "http://www.example.com/",
"caption": "{*actor*} posted a new review",
"description": "This is a longer description of the attachment",
"picture": "http://www.example.com/thumbnail.jpg"}
"""
return self.put_object(profile_id, "feed", message=message,
**attachment)
def put_comment(self, object_id, message):
"""Writes the given comment on the given post."""
return self.put_object(object_id, "comments", message=message)
def put_like(self, object_id):
"""Likes the given post."""
return self.put_object(object_id, "likes")
def delete_object(self, id):
"""Deletes the object with the given ID from the graph."""
self.request(id, post_args={"method": "delete"})
def delete_request(self, user_id, request_id):
"""Deletes the Request with the given ID for the given user."""
conn = http.client.HTTPSConnection('graph.facebook.com')
url = '/%s_%s?%s' % (
request_id,
user_id,
urllib.parse.urlencode({'access_token': self.access_token}),
)
conn.request('DELETE', url)
response = conn.getresponse()
data = response.read()
response = _parse_json(data)
# Raise an error if we got one, but don't not if Facebook just
# gave us a Bool value
if (response and isinstance(response, dict) and response.get("error")):
raise GraphAPIError(response)
conn.close()
def put_photo(self, image, message=None, album_id=None, **kwargs):
"""Uploads an image using multipart/form-data.
image=File like object for the image
message=Caption for your image
album_id=None posts to /me/photos which uses or creates and uses
an album for your application.
"""
object_id = album_id or "me"
#it would have been nice to reuse self.request;
#but multipart is messy in urllib
post_args = {
'access_token': self.access_token,
'source': image,
'message': message,
}
post_args.update(kwargs)
content_type, body = self._encode_multipart_form(post_args)
req = urllib.request.Request(("https://graph.facebook.com/%s/photos" %
object_id),
data=body)
req.add_header('Content-Type', content_type)
try:
data = urllib.request.urlopen(req).read()
#For Python 3 use this:
#except urllib2.HTTPError as e:
except urllib.error.HTTPError as e:
data = e.read() # Facebook sends OAuth errors as 400, and urllib2
# throws an exception, we want a GraphAPIError
try:
response = _parse_json(data)
# Raise an error if we got one, but don't not if Facebook just
# gave us a Bool value
if (response and isinstance(response, dict) and
response.get("error")):
raise GraphAPIError(response)
except ValueError:
response = data
return response
# based on: http://code.activestate.com/recipes/146306/
def _encode_multipart_form(self, fields):
"""Encode files as 'multipart/form-data'.
Fields are a dict of form name-> value. For files, value should
be a file object. Other file-like objects might work and a fake
name will be chosen.
Returns (content_type, body) ready for httplib.HTTP instance.
"""
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
CRLF = '\r\n'
L = []
for (key, value) in list(fields.items()):
logging.debug("Encoding %s, (%s)%s" % (key, type(value), value))
if not value:
continue
L.append('--' + BOUNDARY)
if hasattr(value, 'read') and isinstance(value.read, collections.Callable):
filename = getattr(value, 'name', '%s.jpg' % key)
L.append(('Content-Disposition: form-data;'
'name="%s";'
'filename="%s"') % (key, filename))
L.append('Content-Type: image/jpeg')
value = value.read()
logging.debug(type(value))
else:
L.append('Content-Disposition: form-data; name="%s"' % key)
L.append('')
if isinstance(value, str):
logging.debug("Convert to ascii")
value = value.encode('ascii')
L.append(value)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
return content_type, body
def request(self, path, args=None, post_args=None):
"""Fetches the given path in the Graph API.
We translate args to a valid query string. If post_args is
given, we send a POST request to the given path with the given
arguments.
"""
args = args or {}
if self.access_token:
if post_args is not None:
post_args["access_token"] = self.access_token
else:
args["access_token"] = self.access_token
post_data = None if post_args is None else urllib.parse.urlencode(post_args)
try:
file = urllib.request.urlopen("https://graph.facebook.com/" + path + "?" +
urllib.parse.urlencode(args),
post_data, timeout=self.timeout)
except urllib.error.HTTPError as e:
response = _parse_json(bytes.decode(e.read()))
raise GraphAPIError(response)
except TypeError:
# Timeout support for Python <2.6
if self.timeout:
socket.setdefaulttimeout(self.timeout)
file = urllib.request.urlopen("https://graph.facebook.com/" + path + "?" +
urllib.parse.urlencode(args), post_data)
try:
fileInfo = file.info()
if fileInfo.get_content_maintype() == 'text':
response = _parse_json(bytes.decode(file.read()))
elif fileInfo.get_content_maintype() == 'image':
mimetype = fileInfo['content-type']
response = {
"data": file.read(),
"mime-type": mimetype,
"url": file.url,
}
else:
raise GraphAPIError('Maintype was not text or image')
finally:
file.close()
if response and isinstance(response, dict) and response.get("error"):
raise GraphAPIError(response["error"]["type"],
response["error"]["message"])
return response
def fql(self, query, args=None, post_args=None):
"""FQL query.
Example query: "SELECT affiliations FROM user WHERE uid = me()"
"""
args = args or {}
if self.access_token:
if post_args is not None:
post_args["access_token"] = self.access_token
else:
args["access_token"] = self.access_token
post_data = None if post_args is None else urllib.parse.urlencode(post_args)
args["q"] = query
args["format"] = "json"
try:
file = urllib.request.urlopen("https://graph.facebook.com/fql?" +
urllib.parse.urlencode(args),
post_data, timeout=self.timeout)
except TypeError:
# Timeout support for Python <2.6
if self.timeout:
socket.setdefaulttimeout(self.timeout)
file = urllib.request.urlopen("https://graph.facebook.com/fql?" +
urllib.parse.urlencode(args),
post_data)
try:
content = file.read()
response = _parse_json(content)
#Return a list if success, return a dictionary if failed
if type(response) is dict and "error_code" in response:
raise GraphAPIError(response)
except Exception as e:
raise e
finally:
file.close()
return response
def extend_access_token(self, app_id, app_secret):
"""
Extends the expiration time of a valid OAuth access token. See
<https://developers.facebook.com/roadmap/offline-access-removal/
#extend_token>
"""
args = {
"client_id": app_id,
"client_secret": app_secret,
"grant_type": "fb_exchange_token",
"fb_exchange_token": self.access_token,
}
response = urllib.request.urlopen("https://graph.facebook.com/oauth/"
"access_token?" +
urllib.parse.urlencode(args)).read()
query_str = parse_qs(response)
if "access_token" in query_str:
result = {"access_token": query_str["access_token"][0]}
if "expires" in query_str:
result["expires"] = query_str["expires"][0]
return result
else:
response = json.loads(response)
raise GraphAPIError(response)
class GraphAPIError(Exception):
def __init__(self, result):
#Exception.__init__(self, message)
#self.type = type
self.result = result
try:
self.type = result["error_code"]
except:
self.type = ""
# OAuth 2.0 Draft 10
try:
self.message = result["error_description"]
except:
# OAuth 2.0 Draft 00
try:
self.message = result["error"]["message"]
except:
# REST server style
try:
self.message = result["error_msg"]
except:
self.message = result
Exception.__init__(self, self.message)
def get_user_from_cookie(cookies, app_id, app_secret):
"""Parses the cookie set by the official Facebook JavaScript SDK.
cookies should be a dictionary-like object mapping cookie names to
cookie values.
If the user is logged in via Facebook, we return a dictionary with
the keys "uid" and "access_token". The former is the user's
Facebook ID, and the latter can be used to make authenticated
requests to the Graph API. If the user is not logged in, we
return None.
Download the official Facebook JavaScript SDK at
http://github.com/facebook/connect-js/. Read more about Facebook
authentication at
http://developers.facebook.com/docs/authentication/.
"""
cookie = cookies.get("fbsr_" + app_id, "")
if not cookie:
return None
parsed_request = parse_signed_request(cookie, app_secret)
if not parsed_request:
return None
try:
result = get_access_token_from_code(parsed_request["code"], "",
app_id, app_secret)
except GraphAPIError:
return None
result["uid"] = parsed_request["user_id"]
return result
def parse_signed_request(signed_request, app_secret):
""" Return dictionary with signed request data.
We return a dictionary containing the information in the
signed_request. This includes a user_id if the user has authorised
your application, as well as any information requested.
If the signed_request is malformed or corrupted, False is returned.
"""
try:
encoded_sig, payload = list(map(str, signed_request.split('.', 1)))
sig = base64.urlsafe_b64decode(encoded_sig + "=" *
((4 - len(encoded_sig) % 4) % 4))
data = base64.urlsafe_b64decode(payload + "=" *
((4 - len(payload) % 4) % 4))
except IndexError:
# Signed request was malformed.
return False
except TypeError:
# Signed request had a corrupted payload.
return False
data = _parse_json(data)
if data.get('algorithm', '').upper() != 'HMAC-SHA256':
return False
# HMAC can only handle ascii (byte) strings
# http://bugs.python.org/issue5285
app_secret = app_secret.encode('ascii')
payload = payload.encode('ascii')
expected_sig = hmac.new(app_secret,
msg=payload,
digestmod=hashlib.sha256).digest()
if sig != expected_sig:
return False
return data
def auth_url(app_id, canvas_url, perms=None, **kwargs):
url = "https://www.facebook.com/dialog/oauth?"
kvps = {'client_id': app_id, 'redirect_uri': canvas_url}
if perms:
kvps['scope'] = ",".join(perms)
kvps.update(kwargs)
return url + urllib.parse.urlencode(kvps)
def get_access_token_from_code(code, redirect_uri, app_id, app_secret):
"""Get an access token from the "code" returned from an OAuth dialog.
Returns a dict containing the user-specific access token and its
expiration date (if applicable).
"""
args = {
"code": code,
"redirect_uri": redirect_uri,
"client_id": app_id,
"client_secret": app_secret,
}
# We would use GraphAPI.request() here, except for that the fact
# that the response is a key-value pair, and not JSON.
response = urllib.request.urlopen("https://graph.facebook.com/oauth/access_token" +
"?" + urllib.parse.urlencode(args)).read()
query_str = parse_qs(response)
if "access_token" in query_str:
result = {"access_token": query_str["access_token"][0]}
if "expires" in query_str:
result["expires"] = query_str["expires"][0]
return result
else:
response = json.loads(response)
raise GraphAPIError(response)
def get_app_access_token(app_id, app_secret):
"""Get the access_token for the app.
This token can be used for insights and creating test users.
app_id = retrieved from the developer page
app_secret = retrieved from the developer page
Returns the application access_token.
"""
# Get an app access token
args = {'grant_type': 'client_credentials',
'client_id': app_id,
'client_secret': app_secret}
file = urllib.request.urlopen("https://graph.facebook.com/oauth/access_token?" +
urllib.parse.urlencode(args))
try:
result = file.read().split("=")[1]
finally:
file.close()
return result
/*
Ruel Pagayon (c) 2010 - ruel@ruel.me
Cascading Style Sheet for InFB Log Output.
*/
body {
background-color: #3C3C3C;
color: #FFF;
margin-top: 50px;
margin-left: 25px;
font-size: xx-small;
font-family: Calibri, Arial, sans;
}
.rby {
text-align: center;
font-size: xx-small;
}
table {
text-align: center;
}
td {
padding-top: 0.5em;
padding-bottom: 0.5em;
padding-left: 1em;
padding-right: 1em;
text-align: left;
font-size: small;
}
td.num {
color: #CCC;
}
td.cnum {
color: #AFAFAF;
}
a:active, a:visited, a:link {
color: #FFF;
font-weight: bold;
text-decoration: none;
}
a:hover {
color: #FFF;
font-weight: bold;
text-decoration: underline;
}
#!/usr/bin/python
#
# InFB - Information Facebook
# Usage: infb.py user@domain.tld password
# http://ruel.me
#
# Copyright (c) 2010, Ruel Pagayon - ruel@ruel.me
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of ruel.me nor the names of its contributors
# may be used to endorse or promote products derived from this
# script without specific prior written permission.
#
# THIS SCRIPT IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL RUEL PAGAYON BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SCRIPT, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys, re,json, facebook, urllib.parse, urllib.request as urllib2, urllib.error, http.cookiejar as cookielib, html.parser as HTMLParser, getpass
class FormScraper(HTMLParser.HTMLParser):
"""
Scrapes the Facebook login page for form values that need to be submitted on login.
Necessary because the form values change each time the login page is loaded.
Usage:
form_scraper = FormScraper()
form_scraper.feed(html_from_facebook)
form_values = form_scraper.values
"""
def __init__(self, *args, **kwargs):
HTMLParser.HTMLParser.__init__(self, *args, **kwargs)
self.in_form = False
self.values = []
def handle_starttag(self, tag, attrs):
tag = tag.lower()
attrs = dict(attrs)
if tag == 'form' and attrs['id'] == 'login_form':
self.in_form = True
elif self.in_form and tag == 'input' and attrs['type'] == 'hidden':
self.values.append( (attrs['name'], attrs['value']) )
def handle_endtag(self, tag):
if tag.lower() == 'form' and self.in_form:
self.in_form = False
class FriendReader:
def getFriends(self,oauth_access_token,user):
graph = facebook.GraphAPI(oauth_access_token)
profile = graph.get_object(user)
self.friends = graph.get_connections(user, "friends")
def main():
if len(sys.argv) < 3:
usage()
access_token = sys.argv[1]
user = sys.argv[2]
if len(sys.argv) < 4:
passw = getpass.getpass("Enter password: ")
else:
passw = sys.argv[3]
# Set needed modules
CHandler = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
browser = urllib2.build_opener(CHandler)
browser.addheaders = [('User-agent', 'InFB - ruel@ruel.me - http://ruel.me')]
urllib2.install_opener(browser)
#Retrieve login form data and initialize the cookies
print( 'Initializing..')
res = browser.open('https://www.facebook.com/login.php')
#Determine string encoding
content_type = res.info()['Content-Type'].split('; ')
encoding = 'utf-8'
if len(content_type) > 1 and content_type[1].startswith('charset'):
encoding = content_type[1].split('=')[1]
html = bytes.decode( res.read(), encoding=encoding )
res.close()
#scrape form for hidden inputs, add email and password to values
form_scraper = FormScraper()
form_scraper.feed(html)
form_data = form_scraper.values
form_data.extend( [('email', user), ('pass', passw)] )
#HACK: urlencode doesn't like strings that aren't encoded with the 'encode' function.
#Using html.encode(encoding) doesn't help either. why ??
form_data = [ ( x.encode(encoding), y.encode(encoding) ) for x,y in form_data ]
data = urllib.parse.urlencode(form_data)
friend_reader = FriendReader()
friend_reader.getFriends(access_token,user)
friends = friend_reader.friends
# Login
print ('Logging in to account ' + user)
res = browser.open('https://login.facebook.com/login.php?login_attempt=1', str.encode(data))
rcode = res.code
print (rcode)
print (res.url)
if re.search('/login.php?login_attempt=1', res.url):
print ('Login Failed')
exit(2)
res.close()
# Get Emails and Phone Numbers
print ("Getting Info..\n")
flog = open(user + '.html', 'w')
flog.write("<html>\n\t<head>\n\t\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n\t\t<title>InFB - " + user + "</title>\n\t\t<link href=\"infb.css\" rel=\"stylesheet\" type=\"text/css\" />\n\t</head>\n\t<body>\n\t\t<div class=\"rby\">\n\t\t\t<table class=\"flist\">\n\t\t\t\t")
page = 0
i = 1
for friend in friends['data']:
print(friend)
prof = 'http://facebook.com/' + str(friend['id'])
res = browser.open(prof)
prof = res.url + '/about'
res = browser.open(prof)
cont = bytes.decode(res.read())
res.close()
name = friend['name']
ms = re.search('Address</th><td class="_480u"><span class="fsm"><ul class="uiList _4kg">(.*?)</ul>',cont)
if ms:
addr = ms.group(1)
else:
addr = ''
ms = re.search('<td class="_51m- contactInfoPhone"><span dir="ltr">(.*?)</span>', cont)
if ms:
tel = ms.group(1)
else:
tel = ''
ms = re.search('Email</th><td class="_480u"><ul class="uiList _4kg"><li class="prs"><div class="clearfix"><div class="lfloat">(.*?)<\/div>', cont)
if ms:
email = re.sub('<br \/>', ', ', ms.group(1)).replace('&#64;', '@')
else:
email = ''
print (name + ' : ' + email + ' ' + tel)
flog.write("<tr class=\"lbreak\">\n\t\t\t\t\t<td class=\"num\">" + str(i) + "</td><td class=\"fname\"><a href=\"" + prof + "\" title=\"" + name + "\">" + name + "</a></td><td class=\"fmail\">" + email + "</td></td><td class=\"cnum\">" + tel + "</td><td class=\"addr\">"+addr+"\n\t\t\t\t\t</tr>\n\t\t\t\t")
i = i +1
flog.write("\n\t\t\t</table>\n\t\t</div>\n\t</body>\n</html>")
flog.close()
def usage():
print ('Usage: ' + sys.argv[0] + ' auth_token user@domain.tld [password]')
sys.exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment