Skip to content

Instantly share code, notes, and snippets.

@ewindisch
Created August 2, 2012 00:51
Show Gist options
  • Save ewindisch/3232043 to your computer and use it in GitHub Desktop.
Save ewindisch/3232043 to your computer and use it in GitHub Desktop.
LinkedIn OpenStack Foundation Members Mapping
print "Importing."
from pprint import pprint
from urllib2 import urlopen
from bs4 import BeautifulSoup
# LinkedIn API code
import re
from ConfigParser import SafeConfigParser
import oauth2 as oauth
import urlparse
from urllib import urlencode
print "Loading configuration."
cp = SafeConfigParser({
'token_key': None,
'token_secret': None,
'consumer_key': None,
'consumer_secret': None
})
cp.read([ "linkedin.conf" ])
token_key = cp.get("linkedin", "token_key")
token_secret = cp.get("linkedin", "token_secret")
consumer_key = cp.get("linkedin", "consumer_key")
consumer_secret = cp.get("linkedin", "consumer_secret")
print "Connecting application to LinkedIn."
consumer = oauth.Consumer(consumer_key, consumer_secret)
client = oauth.Client(consumer)
token=None
if not token_secret:
request_token_url = 'https://api.linkedin.com/uas/oauth/requestToken'
resp, content = client.request(request_token_url, "POST")
if resp['status'] != '200':
raise Exception("Invalid response %s." % resp['status'])
request_token = dict(urlparse.parse_qsl(content))
authorize_url = 'https://api.linkedin.com/uas/oauth/authorize'
print "Go to the following link in your browser:"
print "%s?oauth_token=%s" % (authorize_url, request_token['oauth_token'])
accepted = 'n'
while accepted.lower() == 'n':
accepted = raw_input('Have you authorized me? (y/n) ')
oauth_verifier = raw_input('What is the PIN? ')
access_token_url = 'https://api.linkedin.com/uas/oauth/accessToken'
token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret'])
token.set_verifier(oauth_verifier)
client = oauth.Client(consumer, token)
resp, content = client.request(access_token_url, "POST")
access_token = dict(urlparse.parse_qsl(content))
print "Access Token:"
print " - oauth_token = %s" % access_token['oauth_token']
print " - oauth_token_secret = %s" % access_token['oauth_token_secret']
print
print "You may now access protected resources using the access tokens above."
print
token_key = access_token['oauth_token']
token_secret = access_token['oauth_token_secret']
print "Authenticating with LinkedIn"
token = oauth.Token(key=token_key, secret=token_secret)
client = oauth.Client(consumer, token)
def unwrap(line):
return BeautifulSoup(line).headline.unwrap()
strip_html = re.compile(r'<.*?>')
def get_positions(name_aff):
name = name_aff[0].encode('ascii', 'ignore')
affiliation = name_aff[1].encode('ascii', 'ignore')
first_name = name[0].encode('ascii', 'ignore')
last_name = name[-1].encode('ascii', 'ignore')
url = "http://api.linkedin.com/v1/people-search:(people:(headline))?keywords=%s%%20%s&count=1&facet=network,F,S,A" \
% (name, affiliation)
resp, content = client.request(url)
linked_soup = BeautifulSoup(content)
possible_positions = linked_soup.find_all("headline")
if len(possible_positions) > 0:
position = possible_positions[0].string
else:
position = ""
pprint(", ".join([ name, affiliation, position ]))
return (name, possible_positions)
def map_affiliations(aff):
if "rackspace" in aff or aff == "rax" or "racksapce" in aff or "cloudbuilders" in aff:
aff = u"rackspace"
elif (aff is "none" or aff is "non" or aff is "na" or aff is "n/a"
or aff is "" or aff is "0" or aff is "none." or aff is None):
aff = u"none"
elif "canonical" in aff:
aff = u"canonical"
elif "alcatel" in aff and "lucent" in aff:
aff = u"alcatel lucent"
elif "wikimedia" in aff:
aff = u"wikimedia"
elif "inktank" in aff:
aff = u"inktank"
elif "yahoo" in aff:
aff = u"yahoo"
elif "suse" in aff:
aff = u"suse"
elif "tipit" in aff:
aff = u"tipit"
elif "cloudscaling" in aff:
aff = u"cloudscaling"
elif "comcast" in aff:
aff = u"comcast"
elif aff.startswith("aptira"):
aff = u"aptira"
elif aff.startswith("ntt"):
aff = u"ntt"
elif aff.startswith("target"):
aff = u"target"
elif aff.startswith("switfstack") or aff.startswith("swiftstack"):
aff = u"swiftstack"
elif aff.startswith("redhat") or aff.startswith("red hat"):
aff = u"red hat"
elif aff.startswith("piston cloud") or aff.startswith("pistoncloud"):
aff = u"piston"
elif aff.startswith("opscode"):
aff = u"opscode"
elif aff.startswith("citrix"):
aff = u"citrix"
elif aff.startswith("dey storage"):
aff = u"dey storage"
elif aff.startswith("dell"):
aff = u"dell"
elif aff.startswith("zhaw"):
aff = u"zhaw"
elif aff.startswith("nicira"):
aff = u"nicira"
elif aff.startswith("nexenta"):
aff = u"nexenta"
elif aff.startswith("nebula"):
aff = u"nebula"
elif aff.startswith("morphlabs"):
aff = u"morphlabs"
elif aff.startswith("maldivica"):
aff = u"maldivica"
elif aff.startswith("cisco"):
aff = u"cisco"
elif aff.startswith("samsung"):
aff = u"samsung"
elif aff.startswith("emc"):
aff = u"emc"
elif (aff.startswith("hp")
or " hp" in aff
or "hewllet" in aff
or "hewlett" in aff):
aff = u"hp"
else:
aff = aff.encode('ascii', 'ignore')
return aff.lower()
# Return to your normally scheduled program
print "Fetching members list."
soup = BeautifulSoup(urlopen("http://www.openstack.org/community/members/").read())
bullets = soup.find_all("li")
print "Scraping affiliations."
affiliations = [x.text.split("(")[1].split(")")[0].lower().strip()
for x in bullets if "(" in x.text]
print "Scraping names."
names = [x.text.split("(")[0].strip()
for x in bullets if "(" in x.text]
print "Mapping affiliations..."
affiliations = map(map_affiliations, affiliations)
print "Zipping to names..."
name_affs = zip(names, affiliations)
print "Getting positions."
name_map = map(get_positions, name_affs)
#print "Printing big list:"
#for k, aff in zip(name_map, affiliations):
# print "%s, %s (%s): %s" % (k[0][1], k[0][0], aff, k[1])
#counts = {}
#for aff in affiliations:
# ign = counts.setdefault(aff, 0)
# counts[aff] += 1
#
## display the count data, number of members per organization
#pprint(counts)
## list all the affiliations, sorted by highest representation
#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True))
## give the total number of affiliations recorded
#print(len(counts))
## display the top 20
#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True)[:20])
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment