Skip to content

Instantly share code, notes, and snippets.

@tcurvelo
Created January 12, 2019 20:09
Show Gist options
  • Save tcurvelo/e92d75fa1ec69777a46e87586267bec0 to your computer and use it in GitHub Desktop.
Save tcurvelo/e92d75fa1ec69777a46e87586267bec0 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from datetime import date
from urllib.parse import urlencode
import scrapy
class TjpbSpider(scrapy.Spider):
name = "tjpb"
allowed_domains = ["juris.tjpb.jus.br"]
def start_requests(self):
inicio = date(2013, 1, 1).strftime("%Y-%m-%d")
fim = date(2018, 12, 31).strftime("%Y-%m-%d")
classes = [
"suspensão de segurança",
"suspensão de liminar",
"antecipação de tutela",
]
params = {
"q": "",
"as_oq": "",
"as_eq": "",
"as_epq": "",
"decisao": "Decisao",
"output": "xml_no_dtd",
"proxystylesheet": "tjpb_index",
"oe": "UTF-8",
"ie": "UTF-8",
"ud": "1",
"lr": "lang_pt",
"getfields": "*",
"requiredfields": "BASE:Decisao",
"sort": "date:D:S:d1",
"as_q": "inmeta:ORD_DATA_JULGAMENTO:daterange:{}..{}".format(inicio, fim),
"ulang": "en",
"access": "p",
"entqr": "3",
"entqrm": "0",
"client": "tjpb_index",
"filter": "0",
"start": "0",
"site": "jurisp_digitalizada",
}
for classe in classes:
params["q"] = '"{}"'.format(classe) # entre aspas
url = "http://juris.tjpb.jus.br/search?{}".format(urlencode(params))
yield scrapy.Request(url, callback=self.parse)
def parse(self, response):
for decisao in response.css(".result-item"):
yield {
"url": decisao.css("a::attr(href)").get().strip(),
"processo": decisao.css("a::text").get().strip(),
"relator": decisao.xpath(
".//b[contains(text(),'Relator:')]/following-sibling::text()"
)
.get()
.strip(),
"julgamento": decisao.xpath(
".//b[contains(text(),'Data de Julgamento:')]/"
"following-sibling::text()"
)
.get()
.strip(),
"ementa": decisao.css('.td_ementa::text').get().strip(),
}
next_page = response.css(".pagination-link:contains('>')::attr(href)").get()
yield response.follow(next_page, callback=self.parse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment