Skip to content

Instantly share code, notes, and snippets.

@fmasanori
Last active February 2, 2020 05:59
Show Gist options
  • Save fmasanori/34d6741c7eff951fb39dae9c3973721f to your computer and use it in GitHub Desktop.
Save fmasanori/34d6741c7eff951fb39dae9c3973721f to your computer and use it in GitHub Desktop.
Baixa as Emendas da ALESP de 01.01.2010 até 31.12.2014 (uma legislatura)
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlretrieve
u1 = 'https://www.al.sp.gov.br/alesp/pesquisa-proposicoes/?direction=acima&lastPage=5167&currentPage='
u2 = '&act=detalhe&idDocumento=&rowsPerPage=10&currentPageDetalhe=1&tpDocumento=&method=search&text=&natureId=4005&legislativeNumber=&legislativeYear=&natureIdMainDoc=loa&anoDeExercicio=&legislativeNumberMainDoc=&legislativeYearMainDoc=&strInitialDate=01%2F01%2F2010&strFinalDate=31%2F12%2F2014&author=&supporter=&politicalPartyId=&tipoDocumento=&stageId=&strVotedInitialDate=&strVotedFinalDate='
base = 'https://www.al.sp.gov.br'
def baixa_pdf(u, nome):
url = base + u
fim = nome.find('-') - 1
nome = nome.replace('/', '-')
nome = nome[16:fim]
p = requests.get(url)
s = bs(p.content, 'html.parser')
x = s.find('table', class_ = 'tabelaDados')
if '(não existe documento)' in str(x):
print ('Sem documento:', nome)
return
pdf = x.find('a')['href']
urlretrieve(pdf, nome+'.pdf')
for k in range(5167):
url = u1+str(k)+u2
p = requests.get(url)
print ('Página:', k)
s = bs(p.content, 'html.parser')
x = s.find('table', class_ = 'tabela')
emendas = x.find_all('tr')
for e in emendas[1:]:
baixa_pdf (e.find('a')['href'],
e.find('strong').get_text().strip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment