Skip to content

Instantly share code, notes, and snippets.

@marciomazza
Last active July 20, 2016 22:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save marciomazza/ed51a7e9b2e6f7c6b5bce9c78afccd19 to your computer and use it in GitHub Desktop.
Save marciomazza/ed51a7e9b2e6f7c6b5bce9c78afccd19 to your computer and use it in GitHub Desktop.
Scrapping das propostas no site da pybr 12
#!/usr/bin/env python
# requirements:
#
# beautifulsoup4
# requests
# unidecode
from itertools import groupby
from bs4 import BeautifulSoup
from requests import get
from unidecode import unidecode
def ler_propostas():
res = get('http://speakerfight.com/events/python-brasil12-apresentacoes/')
soup = BeautifulSoup(res.content, 'html.parser')
div_event_proposals = soup.find('div', id='event-proposals')
propostas = div_event_proposals.find_all(attrs={'class': 'panel-body'})
for p in propostas:
titulo = p.h3.a.text.strip()
autor = p.find('p', attrs={'class': 'proposal-metadata'}).a.text
yield autor, titulo
propostas = sorted(ler_propostas(), key=lambda x: unidecode(str(x)).lower())
propostas_por_autor = [(a, [titulo for _, titulo in props])
for a, props in groupby(propostas, lambda p: p[0])]
print('#### PROPOSTAS POR AUTOR ####\n')
for autor, propostas in propostas_por_autor:
print(autor)
for proposta in propostas:
print(' ', proposta)
autores_com_mais_de_uma_proposta = sorted([(len(l), a)
for a, l in propostas_por_autor
if len(l) > 1], reverse=True)
print('\n#### AUTORES COM MAIS DE UMA PROPOSTA ####\n')
for num, autor in autores_com_mais_de_uma_proposta:
print(num, autor)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment