Created
August 2, 2012 00:51
-
-
Save ewindisch/3232043 to your computer and use it in GitHub Desktop.
LinkedIn OpenStack Foundation Members Mapping
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print "Importing." | |
from pprint import pprint | |
from urllib2 import urlopen | |
from bs4 import BeautifulSoup | |
# LinkedIn API code | |
import re | |
from ConfigParser import SafeConfigParser | |
import oauth2 as oauth | |
import urlparse | |
from urllib import urlencode | |
print "Loading configuration." | |
cp = SafeConfigParser({ | |
'token_key': None, | |
'token_secret': None, | |
'consumer_key': None, | |
'consumer_secret': None | |
}) | |
cp.read([ "linkedin.conf" ]) | |
token_key = cp.get("linkedin", "token_key") | |
token_secret = cp.get("linkedin", "token_secret") | |
consumer_key = cp.get("linkedin", "consumer_key") | |
consumer_secret = cp.get("linkedin", "consumer_secret") | |
print "Connecting application to LinkedIn." | |
consumer = oauth.Consumer(consumer_key, consumer_secret) | |
client = oauth.Client(consumer) | |
token=None | |
if not token_secret: | |
request_token_url = 'https://api.linkedin.com/uas/oauth/requestToken' | |
resp, content = client.request(request_token_url, "POST") | |
if resp['status'] != '200': | |
raise Exception("Invalid response %s." % resp['status']) | |
request_token = dict(urlparse.parse_qsl(content)) | |
authorize_url = 'https://api.linkedin.com/uas/oauth/authorize' | |
print "Go to the following link in your browser:" | |
print "%s?oauth_token=%s" % (authorize_url, request_token['oauth_token']) | |
accepted = 'n' | |
while accepted.lower() == 'n': | |
accepted = raw_input('Have you authorized me? (y/n) ') | |
oauth_verifier = raw_input('What is the PIN? ') | |
access_token_url = 'https://api.linkedin.com/uas/oauth/accessToken' | |
token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret']) | |
token.set_verifier(oauth_verifier) | |
client = oauth.Client(consumer, token) | |
resp, content = client.request(access_token_url, "POST") | |
access_token = dict(urlparse.parse_qsl(content)) | |
print "Access Token:" | |
print " - oauth_token = %s" % access_token['oauth_token'] | |
print " - oauth_token_secret = %s" % access_token['oauth_token_secret'] | |
print "You may now access protected resources using the access tokens above." | |
token_key = access_token['oauth_token'] | |
token_secret = access_token['oauth_token_secret'] | |
print "Authenticating with LinkedIn" | |
token = oauth.Token(key=token_key, secret=token_secret) | |
client = oauth.Client(consumer, token) | |
def unwrap(line): | |
return BeautifulSoup(line).headline.unwrap() | |
strip_html = re.compile(r'<.*?>') | |
def get_positions(name_aff): | |
name = name_aff[0].encode('ascii', 'ignore') | |
affiliation = name_aff[1].encode('ascii', 'ignore') | |
first_name = name[0].encode('ascii', 'ignore') | |
last_name = name[-1].encode('ascii', 'ignore') | |
url = "http://api.linkedin.com/v1/people-search:(people:(headline))?keywords=%s%%20%s&count=1&facet=network,F,S,A" \ | |
% (name, affiliation) | |
resp, content = client.request(url) | |
linked_soup = BeautifulSoup(content) | |
possible_positions = linked_soup.find_all("headline") | |
if len(possible_positions) > 0: | |
position = possible_positions[0].string | |
else: | |
position = "" | |
pprint(", ".join([ name, affiliation, position ])) | |
return (name, possible_positions) | |
def map_affiliations(aff): | |
if "rackspace" in aff or aff == "rax" or "racksapce" in aff or "cloudbuilders" in aff: | |
aff = u"rackspace" | |
elif (aff is "none" or aff is "non" or aff is "na" or aff is "n/a" | |
or aff is "" or aff is "0" or aff is "none." or aff is None): | |
aff = u"none" | |
elif "canonical" in aff: | |
aff = u"canonical" | |
elif "alcatel" in aff and "lucent" in aff: | |
aff = u"alcatel lucent" | |
elif "wikimedia" in aff: | |
aff = u"wikimedia" | |
elif "inktank" in aff: | |
aff = u"inktank" | |
elif "yahoo" in aff: | |
aff = u"yahoo" | |
elif "suse" in aff: | |
aff = u"suse" | |
elif "tipit" in aff: | |
aff = u"tipit" | |
elif "cloudscaling" in aff: | |
aff = u"cloudscaling" | |
elif "comcast" in aff: | |
aff = u"comcast" | |
elif aff.startswith("aptira"): | |
aff = u"aptira" | |
elif aff.startswith("ntt"): | |
aff = u"ntt" | |
elif aff.startswith("target"): | |
aff = u"target" | |
elif aff.startswith("switfstack") or aff.startswith("swiftstack"): | |
aff = u"swiftstack" | |
elif aff.startswith("redhat") or aff.startswith("red hat"): | |
aff = u"red hat" | |
elif aff.startswith("piston cloud") or aff.startswith("pistoncloud"): | |
aff = u"piston" | |
elif aff.startswith("opscode"): | |
aff = u"opscode" | |
elif aff.startswith("citrix"): | |
aff = u"citrix" | |
elif aff.startswith("dey storage"): | |
aff = u"dey storage" | |
elif aff.startswith("dell"): | |
aff = u"dell" | |
elif aff.startswith("zhaw"): | |
aff = u"zhaw" | |
elif aff.startswith("nicira"): | |
aff = u"nicira" | |
elif aff.startswith("nexenta"): | |
aff = u"nexenta" | |
elif aff.startswith("nebula"): | |
aff = u"nebula" | |
elif aff.startswith("morphlabs"): | |
aff = u"morphlabs" | |
elif aff.startswith("maldivica"): | |
aff = u"maldivica" | |
elif aff.startswith("cisco"): | |
aff = u"cisco" | |
elif aff.startswith("samsung"): | |
aff = u"samsung" | |
elif aff.startswith("emc"): | |
aff = u"emc" | |
elif (aff.startswith("hp") | |
or " hp" in aff | |
or "hewllet" in aff | |
or "hewlett" in aff): | |
aff = u"hp" | |
else: | |
aff = aff.encode('ascii', 'ignore') | |
return aff.lower() | |
# Return to your normally scheduled program | |
print "Fetching members list." | |
soup = BeautifulSoup(urlopen("http://www.openstack.org/community/members/").read()) | |
bullets = soup.find_all("li") | |
print "Scraping affiliations." | |
affiliations = [x.text.split("(")[1].split(")")[0].lower().strip() | |
for x in bullets if "(" in x.text] | |
print "Scraping names." | |
names = [x.text.split("(")[0].strip() | |
for x in bullets if "(" in x.text] | |
print "Mapping affiliations..." | |
affiliations = map(map_affiliations, affiliations) | |
print "Zipping to names..." | |
name_affs = zip(names, affiliations) | |
print "Getting positions." | |
name_map = map(get_positions, name_affs) | |
#print "Printing big list:" | |
#for k, aff in zip(name_map, affiliations): | |
# print "%s, %s (%s): %s" % (k[0][1], k[0][0], aff, k[1]) | |
#counts = {} | |
#for aff in affiliations: | |
# ign = counts.setdefault(aff, 0) | |
# counts[aff] += 1 | |
# | |
## display the count data, number of members per organization | |
#pprint(counts) | |
## list all the affiliations, sorted by highest representation | |
#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True)) | |
## give the total number of affiliations recorded | |
#print(len(counts)) | |
## display the top 20 | |
#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True)[:20]) | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment