Skip to content

Instantly share code, notes, and snippets.

@fmasanori
Created April 27, 2018 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fmasanori/0e46fb03bfe1af375555e1c22bf35dfc to your computer and use it in GitHub Desktop.
Save fmasanori/0e46fb03bfe1af375555e1c22bf35dfc to your computer and use it in GitHub Desktop.
BNMP scraping (by Felipe Koblinger)
import requests
import json
import time
import threading
from redis import Redis
from rq import Queue
from bnmp_scraping_court_orders import download_state_page
headers = {
'Host': 'www.cnj.jus.br',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'Origin': 'http://www.cnj.jus.br',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Content-Type': 'application/json;charset=UTF-8',
'Referer': 'http://www.cnj.jus.br/bnmp/',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7,es;q=0.6'
}
def request(url, payload, headers):
while True:
try:
post = requests.post(url, json = payload, headers = headers)
return post.json()
except e:
print('Something wrong happened... sleeping 10 seconds')
time.sleep(10)
search = 'http://www.cnj.jus.br/bnmp/rest/pesquisar'
def run(UF):
payload_json = {"criterio":{"orgaoJulgador":{"uf":UF,"municipio":"","descricao":""},"orgaoJTR":{},"parte":{"documentos":[{"identificacao":'null'}]}},"paginador":{"paginaAtual":1},"fonetica":"true","ordenacao":{"porNome":'false',"porData":'true'}}
print('Downloading %s state' %UF)
response = request(search, payload_json, headers)
pagination_number = response['paginador']['totalPaginas']
print ('== %s pages' %pagination_number)
# Queues
queue = Queue('court_orders', connection=Redis())
# Threading ->
for i in range (1, pagination_number + 1):
queue.enqueue(download_state_page, UF, i)
states = '''AC AL AP AM BA CE DF ES
GO MA MT MS MG PA PB PR
PE PI RJ RN RS RO RR SC
SE TO'''.split()
for state in states:
run(state)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment