Last active
August 29, 2015 14:02
-
-
Save pamplifier/a1276b4f9be7927d7f0d to your computer and use it in GitHub Desktop.
Meetup.com Group's Members' Names Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import json | |
""" | |
FUNCTION: | |
scrapeMemberNames | |
PARAMETERS: | |
url : str | |
do not forget quotation marks | |
Utilize meetup.com's members API to access members database of groups. | |
Input whatever parameters into API console | |
group_urlname: PyLadies (unique name in group's URL) | |
page: 200 (number of entries to include) | |
only: name (only return member name) | |
Utilize returned signed URL | |
RETURNS: | |
listonames : List of names to traverse through | |
EXAMPLE: | |
scrapeMemberNames('http://api.meetup.com/2/members?order=name&group_urlname= | |
PyLadiesTC&offset=0&format=json&only=name&page=40&sig_id=1 | |
52684422&sig=86764dd082256d269eff311bf15fa8f7e3a865bd') | |
output: [u'Alison Link', | |
u'Angie', | |
u'Anna Mandy', .....] | |
""" | |
def scrapeMemberNames(url): | |
listonames = list() #create empty list; to return | |
#extract all text from page | |
rawdata = requests.get(url).text | |
#put all text into a BeautifulSoup Object | |
souped = BeautifulSoup(rawdata) | |
#isolate everything contained in <p></p> | |
ptag = souped.p.string | |
#convert json dictionary into python dictionary | |
diction = json.loads(ptag) | |
#isolate data stored by 'results' key | |
ourresults = diction['results'] | |
#loop through 'results' through every 'name' element and append each corresponding datum to list | |
for name in ourresults: | |
listonames.append(name['name']) | |
return listonames | |
#Example code, lists all members of PyLadies | |
scrapeMemberNames('http://api.meetup.com/2/members?order=name&group_urlname=PyLadiesTC&offset=0&format=json&only=name&page=40&sig_id=152684422&sig=86764dd082256d269eff311bf15fa8f7e3a865bd') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
how to list pyMNtos; we have to do it twice because we can only view 200 people at a time at 0 offset
list1 = scrapeMemberNames('http://api.meetup.com/2/members?order=name&group_urlname=PyMNtos-Twin-Cities-Python-User-Group&offset=0&format=json&only=name&page=400&sig_id=152684422&sig=ba4d85c4e5011daf022e75b9c7c59eecc9725eea')
list2 = scrapeMemberNames('http://api.meetup.com/2/members?order=name&group_urlname=PyMNtos-Twin-Cities-Python-User-Group&offset=1&format=json&only=name&page=200&sig_id=152684422&sig=51d8211c832624d64b177633ba4e402826df0fb2')
for name in list2:
list1.append(name)
list1