Skip to content

Instantly share code, notes, and snippets.

@jjesusfilho
Last active June 28, 2017 17:01
Show Gist options
  • Save jjesusfilho/2bca202084df26577963c5a83faeabae to your computer and use it in GitHub Desktop.
Save jjesusfilho/2bca202084df26577963c5a83faeabae to your computer and use it in GitHub Desktop.
função para extrair jurisprudência do TJRJ
tjrjSG_meta<-function(BuscaLivre,quoted=TRUE){
if(quoted==TRUE){
BuscaLivre<-BuscaLivre %>%
str_replace_all("\\s+","+") %>%
deparse()
}
url<-"http://www.tjrj.jus.br/search?site=juris&client=juris&output=xml_no_dtd&proxystylesheet=juris"
query<-list(
q="",
btnG="Pesquisar",
processType="cnj",
site="juris",
client="juris",
output="xml_no_dtd",
proxystylesheet="juris",
entqrm=0,
oe="UTF-8",
ie="UTF-8",
ud=1,
exclude_apps=1,
filter=0,
getfields="*",
ulang="pt-BR",
lr="lang_pt",
sort="date:D:S:d1",
partialfields="(ctd:1|ctd:2)",
as_q="",
start=0)
query[[1]]<-BuscaLivre
s<-url %>%
GET(query=query) %>%
content("parsed")
num<-s %>%
xml_find_first(xpath="//*[@id='descResultadoPesquisa']/span[2]/b[3]") %>%
xml_integer()
dt<-data.frame()
for(i in seq(0,num,10)){
tryCatch({
query$start<-i
s<-url %>%
GET(query=query) %>%
content("parsed")
numero<-xml_text(xml_find_all(s,xpath='//*[@class="featured hoverLink"]'))
classe<-xml_text(xml_find_all(s,'//*[@class="cla"]'))
ementa<-xml_text(xml_find_all(s,'//*[@class="firstEmenta"]'))
data.julgamento<-xml_text(xml_find_all(s,"//*[@id='table_resultado']//tr/td/text()[2]"))
data.julgamento<-unlist(data.julgamento)[seq(3,30,3)]
segredo<-xml_text(xml_find_all(s,'//*[@class="outros" and @colspan=3]'))
segredo<-which(stri_detect_regex(tolower(segredo),"segredo"))
hrefs <- xml_find_all(s, "//tr[@class='dl'][1]/td[@class='outros']/a/@href") %>% xml_text()
hrefs<-unlist(hrefs)
s3<-union(segredo,1:10) ## A união permite que o número de ordem do NA fique em primeiro lugar
s4<-cbind(sequencia=s3,valores=c(rep(NA,length(segredo)),hrefs))
s4<-as.data.frame(s4,stringsAsFactors=F)
s5<-dplyr::arrange(s4,as.numeric(sequencia))
dt1<-data.frame(numero=numero,classe=classe,ementa=ementa,data.julgamento=data.julgamento,url=unlist(s5$valores),pagina=i)
dt<-rbind(dt,dt1)
}, error=function(m){
m
}, finally={
next
})
Sys.sleep(0.5)
}
return(dt)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment