Skip to content

Instantly share code, notes, and snippets.

@atdt
Created August 16, 2016 16:56
Show Gist options
  • Save atdt/f8317b8ded7a38feb216decd9cefbb42 to your computer and use it in GitHub Desktop.
Save atdt/f8317b8ded7a38feb216decd9cefbb42 to your computer and use it in GitHub Desktop.
import urllib.request
import re
import bs4
def soup_url(url):
req = urllib.request.Request(url)
with urllib.request.urlopen(req) as resp:
return bs4.BeautifulSoup(resp.read(), 'html.parser')
soup = soup_url('https://wikimediafoundation.org/wiki/Staff_and_contractors')
wmf_folks = {img['alt'] for img in soup.select('.gallerybox img')}
soup = soup_url('https://contributors.debian.org/')
deb_folks = {a.text for a in soup.find_all('a', href=re.compile('contributor'))}
print(wmf_folks & deb_folks)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment