Skip to content

Instantly share code, notes, and snippets.

@magnunleno
Forked from marciomazza/propostas_pybr12.py
Last active July 20, 2016 22:38
Show Gist options
  • Save magnunleno/68346ed0013a9012cda7b1e407da62cd to your computer and use it in GitHub Desktop.
Save magnunleno/68346ed0013a9012cda7b1e407da62cd to your computer and use it in GitHub Desktop.
Scrapping das propostas no site da pybr 12
#!/usr/bin/env python
import re
from bs4 import BeautifulSoup
from requests import get
from collections import defaultdict
assuntos_re = re.compile(r"\[(.*)\]")
res = get('http://speakerfight.com/events/python-brasil12-apresentacoes/')
soup = BeautifulSoup(res.content, 'html.parser')
div_event_proposals = soup.find('div', id='event-proposals')
propostas = div_event_proposals.find_all(attrs={'class': 'panel-body'})
data = defaultdict(list)
topicos = defaultdict(list)
for p in propostas:
titulo = p.h3.a.text.strip()
autor = p.find('p', attrs={'class': 'proposal-metadata'}).a.text
print(autor)
print(' ', titulo)
data[autor].append(titulo)
assuntos = assuntos_re.findall(titulo)
if assuntos:
assuntos = map(str.strip, assuntos[0].split(','))
for assunto in assuntos:
topicos[assunto].append(assuntos_re.sub("", titulo).strip())
else:
topicos['Sem assunto'].append(titulo)
data = sorted(data.items(), key=lambda x:len(x[1]), reverse=True)
print("\nPalestrantes com mais palestras...")
for k,v in data:
print("{}: {} palestras".format(k, len(v)))
for i in v:
print(" {}".format(i))
print("")
print(topicos)
print("\nResumo de topicos...")
for k, v in topicos.items():
print("[{}]".format(k))
for i in v:
print(" ", i)
print("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment