Skip to content

Instantly share code, notes, and snippets.

@fmasanori
Created April 27, 2018 14:08
Show Gist options
  • Save fmasanori/1451408dae1d0a8cdcaf255a9b8a32b1 to your computer and use it in GitHub Desktop.
Save fmasanori/1451408dae1d0a8cdcaf255a9b8a32b1 to your computer and use it in GitHub Desktop.
BNMP Scraping Court Orders (by Felipe Koblinger)
import pymongo
import requests
from redis import Redis
from rq import Queue
from bnmp_scraping_detail import download_court_order_detail
headers = {
'Host': 'www.cnj.jus.br',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'Origin': 'http://www.cnj.jus.br',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Content-Type': 'application/json;charset=UTF-8',
'Referer': 'http://www.cnj.jus.br/bnmp/',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7,es;q=0.6'
}
def request(url, payload, headers):
post = requests.post(url, json = payload, headers = headers)
return post.json()
def download_state_page(state, page):
payload_json = {"criterio":{"orgaoJulgador":{"uf":state,"municipio":"","descricao":""},"orgaoJTR":{},"parte":{"documentos":[{"identificacao":'null'}]}},"paginador":{"paginaAtual":page},"fonetica":"true","ordenacao":{"porNome":'false',"porData":'true'}}
response = request('http://www.cnj.jus.br/bnmp/rest/pesquisar',
payload_json, headers)
connection = pymongo.MongoClient('mongodb://localhost')
db = connection.bnmp
court_orders = db.mandados
for court_order in response['mandados']:
court_order['_id'] = court_order.pop('id')
court_orders.save(court_order)
# Queues
queue = Queue('court_order_detail', connection=Redis())
queue.enqueue(download_court_order_detail, court_order['_id'])
#download_state_page('SP', 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment