Skip to content

Instantly share code, notes, and snippets.

@axiak
Created July 28, 2010 21:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save axiak/496433 to your computer and use it in GitHub Desktop.
Save axiak/496433 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
contactinfo.py is a simple script to get contact information from
Facebook.
To use, first install mechanize [1]. Then use the friendstocsv application [2]
on Facebook to export a csv file with all of your friends. Be sure
to include at least the profile URL.
Afterwards, edit this file to put your email, password, and user agent
string at the top.
Then, run it as follows:
$ python contactinfo.py <INPUT> <OUTPUT>
Where INPUT and OUTPUT are either filenames or '-'. If they are -, the
script will use the standard input and the standard output. So the
following is acceptable:
$ python contactinfo.py - - < friendstocsvoutput.csv > myinfo.csv
1: http://pypi.python.org/pypi/mechanize/
2: http://apps.facebook.com/friendstocsv/
"""
__VERSION__ = (0, 0, 1)
FB_EMAIL = "axiak@mit.edu"
FB_PASSWORD = ""
FB_USERAGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.38 Safari/533.4"
import csv
import sys
import random
import time
import re
import mechanize
br_re = re.compile(r"<br[^>]*>")
html_strip = re.compile(r"<[^>]+>")
def debug(s):
sys.stderr.write("%s\n" % s)
class PersonRecord(dict):
fields = (
'uid',
'last_name',
'first_name',
'name',
'birthday_date',
'hometown_location',
'state',
'country',
'zip',
'profile_url',
'emails',
'phone',
'aim',
'skype',
'yahoo',
'address',
'website',
)
def __init__(self, **kwargs):
super(PersonRecord, self).__init__()
for field in PersonRecord.fields:
self[field] = ''
self.update(kwargs)
def to_row(self):
return map(self.__getitem__, PersonRecord.fields)
def main():
browser = facebook_login()
outcsv, outfile = open_csv_output()
for person in read_csv(sys.argv[1]):
contact_info(browser, person)
outcsv.writerow(person.to_row())
outfile.flush()
outfile.close()
def facebook_login():
br = mechanize.Browser()
br.open("http://www.facebook.com/")
br.select_form(nr=0)
br.addheaders = [("User-agent", FB_USERAGENT),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
("Accept-Language", "en-us,en;q=0.5"),
("Accept-Charset", "ISO-8859-1,utf-8;q=0.5")]
br["email"] = FB_EMAIL
br["pass"] = FB_PASSWORD
response = br.submit()
content = response.read()
if '"standard_error"' in content:
error_re = re.compile(r'id="standard_error">(.+?)</h2>')
m = error_re.search(content)
if m:
msg = m.groups()[0]
msg = html_strip.sub('', br_re.sub('\n', msg))
else:
msg = "Invalid username and or password."
debug("ERROR: %s" % msg)
sys.exit(2)
return br
def random_wait(mean=1):
time.sleep(random.expovariate(1 / float(mean)))
def read_csv(arg):
if arg == '-':
input = sys.stdin
else:
input = open(arg, 'r')
reader = csv.reader(input)
header = reader.next()
name_mapper = {}
for i, name in enumerate(header):
name_mapper[i] = name
for row in reader:
person = PersonRecord()
for i, value in enumerate(row):
person[name_mapper[i]] = value
yield person
if arg != '-':
input.close()
def open_csv_output():
if len(sys.argv) > 2 and sys.argv[2] != '-':
output = open(sys.argv[2], 'w')
else:
output = sys.stdout
outcsv = csv.writer(output, quoting=csv.QUOTE_ALL)
outcsv.writerow(PersonRecord.fields)
return outcsv, output
def contact_info(browser, person):
URL = person['profile_url']
if '?' in URL:
URL += '&v=info'
else:
URL += '?v=info'
random_wait()
debug("INFO: Downloading %s" % URL)
for i in range(10):
try:
response = browser.open(URL)
except Exception, e:
debug("ERROR: Browser download error %s" % e)
debug("INFO: Waiting to redownload...")
random_wait()
else:
break
else:
debug("ERROR: Was unable to download from facebook too many times.")
sys.exit(3)
person.update(get_data(response.read()))
def get_data(info):
pieces = re.compile(r"class=\\\"label\\\">(.+?)<\\\/th.*?class=\\\"data\\\">(.+?)<\\\/td>")
label_transform = {
'mobile number': 'phone',
'email': 'emails',
'contact info': 'emails',
}
new_info = {}
for match in pieces.finditer(info):
label, data = match.groups()
label = label.rstrip(':').strip().lower()
label = label_transform.get(label, label)
data = html_strip.sub('', br_re.sub('|', data))
if label in PersonRecord.fields:
if label == 'emails' and data.startswith('Email:'):
data = data[6:]
new_info[label] = data
return new_info
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment