ewindisch/openstack_foundation_affiliations.py

## openstack_foundation_affiliations.py
print "Importing."
from pprint import pprint
from urllib2 import urlopen

from bs4 import BeautifulSoup

# LinkedIn API code
import re
from ConfigParser import SafeConfigParser
import oauth2 as oauth
import urlparse
from urllib import urlencode

print "Loading configuration."
cp = SafeConfigParser({
    'token_key': None,
    'token_secret': None,
    'consumer_key': None,
    'consumer_secret': None
})
cp.read([ "linkedin.conf" ])
token_key = cp.get("linkedin", "token_key")
token_secret = cp.get("linkedin", "token_secret")
consumer_key = cp.get("linkedin", "consumer_key")
consumer_secret = cp.get("linkedin", "consumer_secret")

print "Connecting application to LinkedIn."
consumer = oauth.Consumer(consumer_key, consumer_secret)
client = oauth.Client(consumer)

token=None
if not token_secret:
    request_token_url      = 'https://api.linkedin.com/uas/oauth/requestToken'
    resp, content = client.request(request_token_url, "POST")
    if resp['status'] != '200':
        raise Exception("Invalid response %s." % resp['status'])
    request_token = dict(urlparse.parse_qsl(content))

    authorize_url =      'https://api.linkedin.com/uas/oauth/authorize'
    print "Go to the following link in your browser:"
    print "%s?oauth_token=%s" % (authorize_url, request_token['oauth_token'])

    accepted = 'n'
    while accepted.lower() == 'n':
        accepted = raw_input('Have you authorized me? (y/n) ')
    oauth_verifier = raw_input('What is the PIN? ')

    access_token_url = 'https://api.linkedin.com/uas/oauth/accessToken'
    token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret'])
    token.set_verifier(oauth_verifier)
    client = oauth.Client(consumer, token)

    resp, content = client.request(access_token_url, "POST")
    access_token = dict(urlparse.parse_qsl(content))

    print "Access Token:"
    print "    - oauth_token        = %s" % access_token['oauth_token']
    print "    - oauth_token_secret = %s" % access_token['oauth_token_secret']
    print
    print "You may now access protected resources using the access tokens above."
    print
    token_key = access_token['oauth_token']
    token_secret = access_token['oauth_token_secret']

print "Authenticating with LinkedIn"
token = oauth.Token(key=token_key, secret=token_secret)
client = oauth.Client(consumer, token)

def unwrap(line):
    return BeautifulSoup(line).headline.unwrap()

strip_html = re.compile(r'<.*?>')

def get_positions(name_aff):
    name = name_aff[0].encode('ascii', 'ignore')
    affiliation = name_aff[1].encode('ascii', 'ignore')
    first_name = name[0].encode('ascii', 'ignore')
    last_name = name[-1].encode('ascii', 'ignore')
    url = "http://api.linkedin.com/v1/people-search:(people:(headline))?keywords=%s%%20%s&count=1&facet=network,F,S,A" \
        % (name, affiliation)
    resp, content = client.request(url)
    linked_soup = BeautifulSoup(content)

    possible_positions = linked_soup.find_all("headline")
    if len(possible_positions) > 0:
        position = possible_positions[0].string
    else:
        position = ""
    pprint(", ".join([ name, affiliation, position ]))
    return (name, possible_positions)

def map_affiliations(aff):
  if "rackspace" in aff or aff == "rax" or "racksapce" in aff or "cloudbuilders" in aff:
    aff = u"rackspace"
  elif (aff is "none" or aff is "non" or aff is "na" or aff is "n/a"
        or aff is "" or aff is "0" or aff is "none." or aff is None):
    aff = u"none"
  elif "canonical" in aff:
    aff = u"canonical"
  elif "alcatel" in aff and "lucent" in aff:
    aff = u"alcatel lucent"
  elif "wikimedia" in aff:
    aff = u"wikimedia"
  elif "inktank" in aff:
    aff = u"inktank"
  elif "yahoo" in aff:
    aff = u"yahoo"
  elif "suse" in aff:
    aff = u"suse"
  elif "tipit" in aff:
    aff = u"tipit"
  elif "cloudscaling" in aff:
    aff = u"cloudscaling"
  elif "comcast" in aff:
    aff = u"comcast"
  elif aff.startswith("aptira"):
    aff = u"aptira"
  elif aff.startswith("ntt"):
    aff = u"ntt"
  elif aff.startswith("target"):
    aff = u"target"
  elif aff.startswith("switfstack") or aff.startswith("swiftstack"):
    aff = u"swiftstack"
  elif aff.startswith("redhat") or aff.startswith("red hat"):
    aff = u"red hat"
  elif aff.startswith("piston cloud") or aff.startswith("pistoncloud"):
    aff = u"piston"
  elif aff.startswith("opscode"):
    aff = u"opscode"
  elif aff.startswith("citrix"):
    aff = u"citrix"
  elif aff.startswith("dey storage"):
    aff = u"dey storage"
  elif aff.startswith("dell"):
    aff = u"dell"
  elif aff.startswith("zhaw"):
    aff = u"zhaw"
  elif aff.startswith("nicira"):
    aff = u"nicira"
  elif aff.startswith("nexenta"):
    aff = u"nexenta"
  elif aff.startswith("nebula"):
    aff = u"nebula"
  elif aff.startswith("morphlabs"):
    aff = u"morphlabs"
  elif aff.startswith("maldivica"):
    aff = u"maldivica"
  elif aff.startswith("cisco"):
    aff = u"cisco"
  elif aff.startswith("samsung"):
    aff = u"samsung"
  elif aff.startswith("emc"):
    aff = u"emc"
  elif (aff.startswith("hp")
        or " hp" in aff
        or "hewllet" in aff
        or "hewlett" in aff):
    aff = u"hp"
  else:
    aff = aff.encode('ascii', 'ignore')
  return aff.lower()


# Return to your normally scheduled program
print "Fetching members list."
soup = BeautifulSoup(urlopen("http://www.openstack.org/community/members/").read())
bullets = soup.find_all("li")

print "Scraping affiliations."
affiliations = [x.text.split("(")[1].split(")")[0].lower().strip()
                for x in bullets if "(" in x.text]
print "Scraping names."
names = [x.text.split("(")[0].strip()
                for x in bullets if "(" in x.text]

print "Mapping affiliations..."
affiliations = map(map_affiliations, affiliations)
print "Zipping to names..."
name_affs = zip(names, affiliations)

print "Getting positions."
name_map = map(get_positions, name_affs)

#print "Printing big list:"
#for k, aff in zip(name_map, affiliations):
#    print "%s, %s (%s): %s" % (k[0][1], k[0][0], aff, k[1])

#counts = {}
#for aff in affiliations:
#  ign = counts.setdefault(aff, 0)
#  counts[aff] += 1
#
## display the count data, number of members per organization
#pprint(counts)
## list all the affiliations, sorted by highest representation
#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True))
## give the total number of affiliations recorded
#print(len(counts))
## display the top 20
#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True)[:20])
#
	print "Importing."
	from pprint import pprint
	from urllib2 import urlopen

	from bs4 import BeautifulSoup

	# LinkedIn API code
	import re
	from ConfigParser import SafeConfigParser
	import oauth2 as oauth
	import urlparse
	from urllib import urlencode

	print "Loading configuration."
	cp = SafeConfigParser({
	'token_key': None,
	'token_secret': None,
	'consumer_key': None,
	'consumer_secret': None
	})
	cp.read([ "linkedin.conf" ])
	token_key = cp.get("linkedin", "token_key")
	token_secret = cp.get("linkedin", "token_secret")
	consumer_key = cp.get("linkedin", "consumer_key")
	consumer_secret = cp.get("linkedin", "consumer_secret")

	print "Connecting application to LinkedIn."
	consumer = oauth.Consumer(consumer_key, consumer_secret)
	client = oauth.Client(consumer)

	token=None
	if not token_secret:
	request_token_url = 'https://api.linkedin.com/uas/oauth/requestToken'
	resp, content = client.request(request_token_url, "POST")
	if resp['status'] != '200':
	raise Exception("Invalid response %s." % resp['status'])
	request_token = dict(urlparse.parse_qsl(content))

	authorize_url = 'https://api.linkedin.com/uas/oauth/authorize'
	print "Go to the following link in your browser:"
	print "%s?oauth_token=%s" % (authorize_url, request_token['oauth_token'])

	accepted = 'n'
	while accepted.lower() == 'n':
	accepted = raw_input('Have you authorized me? (y/n) ')
	oauth_verifier = raw_input('What is the PIN? ')

	access_token_url = 'https://api.linkedin.com/uas/oauth/accessToken'
	token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret'])
	token.set_verifier(oauth_verifier)
	client = oauth.Client(consumer, token)

	resp, content = client.request(access_token_url, "POST")
	access_token = dict(urlparse.parse_qsl(content))

	print "Access Token:"
	print " - oauth_token = %s" % access_token['oauth_token']
	print " - oauth_token_secret = %s" % access_token['oauth_token_secret']
	print
	print "You may now access protected resources using the access tokens above."
	print
	token_key = access_token['oauth_token']
	token_secret = access_token['oauth_token_secret']

	print "Authenticating with LinkedIn"
	token = oauth.Token(key=token_key, secret=token_secret)
	client = oauth.Client(consumer, token)

	def unwrap(line):
	return BeautifulSoup(line).headline.unwrap()

	strip_html = re.compile(r'<.*?>')

	def get_positions(name_aff):
	name = name_aff[0].encode('ascii', 'ignore')
	affiliation = name_aff[1].encode('ascii', 'ignore')
	first_name = name[0].encode('ascii', 'ignore')
	last_name = name[-1].encode('ascii', 'ignore')
	url = "http://api.linkedin.com/v1/people-search:(people:(headline))?keywords=%s%%20%s&count=1&facet=network,F,S,A" \
	% (name, affiliation)
	resp, content = client.request(url)
	linked_soup = BeautifulSoup(content)

	possible_positions = linked_soup.find_all("headline")
	if len(possible_positions) > 0:
	position = possible_positions[0].string
	else:
	position = ""
	pprint(", ".join([ name, affiliation, position ]))
	return (name, possible_positions)

	def map_affiliations(aff):
	if "rackspace" in aff or aff == "rax" or "racksapce" in aff or "cloudbuilders" in aff:
	aff = u"rackspace"
	elif (aff is "none" or aff is "non" or aff is "na" or aff is "n/a"
	or aff is "" or aff is "0" or aff is "none." or aff is None):
	aff = u"none"
	elif "canonical" in aff:
	aff = u"canonical"
	elif "alcatel" in aff and "lucent" in aff:
	aff = u"alcatel lucent"
	elif "wikimedia" in aff:
	aff = u"wikimedia"
	elif "inktank" in aff:
	aff = u"inktank"
	elif "yahoo" in aff:
	aff = u"yahoo"
	elif "suse" in aff:
	aff = u"suse"
	elif "tipit" in aff:
	aff = u"tipit"
	elif "cloudscaling" in aff:
	aff = u"cloudscaling"
	elif "comcast" in aff:
	aff = u"comcast"
	elif aff.startswith("aptira"):
	aff = u"aptira"
	elif aff.startswith("ntt"):
	aff = u"ntt"
	elif aff.startswith("target"):
	aff = u"target"
	elif aff.startswith("switfstack") or aff.startswith("swiftstack"):
	aff = u"swiftstack"
	elif aff.startswith("redhat") or aff.startswith("red hat"):
	aff = u"red hat"
	elif aff.startswith("piston cloud") or aff.startswith("pistoncloud"):
	aff = u"piston"
	elif aff.startswith("opscode"):
	aff = u"opscode"
	elif aff.startswith("citrix"):
	aff = u"citrix"
	elif aff.startswith("dey storage"):
	aff = u"dey storage"
	elif aff.startswith("dell"):
	aff = u"dell"
	elif aff.startswith("zhaw"):
	aff = u"zhaw"
	elif aff.startswith("nicira"):
	aff = u"nicira"
	elif aff.startswith("nexenta"):
	aff = u"nexenta"
	elif aff.startswith("nebula"):
	aff = u"nebula"
	elif aff.startswith("morphlabs"):
	aff = u"morphlabs"
	elif aff.startswith("maldivica"):
	aff = u"maldivica"
	elif aff.startswith("cisco"):
	aff = u"cisco"
	elif aff.startswith("samsung"):
	aff = u"samsung"
	elif aff.startswith("emc"):
	aff = u"emc"
	elif (aff.startswith("hp")
	or " hp" in aff
	or "hewllet" in aff
	or "hewlett" in aff):
	aff = u"hp"
	else:
	aff = aff.encode('ascii', 'ignore')
	return aff.lower()


	# Return to your normally scheduled program
	print "Fetching members list."
	soup = BeautifulSoup(urlopen("http://www.openstack.org/community/members/").read())
	bullets = soup.find_all("li")

	print "Scraping affiliations."
	affiliations = [x.text.split("(")[1].split(")")[0].lower().strip()
	for x in bullets if "(" in x.text]
	print "Scraping names."
	names = [x.text.split("(")[0].strip()
	for x in bullets if "(" in x.text]

	print "Mapping affiliations..."
	affiliations = map(map_affiliations, affiliations)
	print "Zipping to names..."
	name_affs = zip(names, affiliations)

	print "Getting positions."
	name_map = map(get_positions, name_affs)

	#print "Printing big list:"
	#for k, aff in zip(name_map, affiliations):
	# print "%s, %s (%s): %s" % (k[0][1], k[0][0], aff, k[1])

	#counts = {}
	#for aff in affiliations:
	# ign = counts.setdefault(aff, 0)
	# counts[aff] += 1
	#
	## display the count data, number of members per organization
	#pprint(counts)
	## list all the affiliations, sorted by highest representation
	#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True))
	## give the total number of affiliations recorded
	#print(len(counts))
	## display the top 20
	#pprint(sorted([(v,k) for k, v in counts.items()], reverse=True)[:20])
	#