Last active
May 6, 2019 14:07
-
-
Save JoaoCarabetta/8b97501e7d32bbac00df0c89084a9fae to your computer and use it in GitHub Desktop.
Identifica os parlamentares nas tramitações
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TEMP FUNCTION accent2latin(word STRING) AS | |
(( | |
WITH lookups AS ( | |
SELECT | |
'ã,ç,æ,œ,á,é,í,ó,ú,à,è,ì,ò,ù,ä,ë,ï,ö,ü,ÿ,â,ê,î,ô,û,å,ø,Ø,Å,Á,À,Â,Ä,È,É,Ê,Ë,Í,Î,Ï,Ì,Ò,Ó,Ô,Ö,Ú,Ù,Û,Ü,Ÿ,Ç,Æ,Œ,ñ' AS accents, | |
'a,c,ae,oe,a,e,i,o,u,a,e,i,o,u,a,e,i,o,u,y,a,e,i,o,u,a,o,O,A,A,A,A,A,E,E,E,E,I,I,I,I,O,O,O,O,U,U,U,U,Y,C,AE,OE,n' AS latins | |
), | |
pairs AS ( | |
SELECT accent, latin FROM lookups, | |
UNNEST(SPLIT(accents)) AS accent WITH OFFSET AS p1, | |
UNNEST(SPLIT(latins)) AS latin WITH OFFSET AS p2 | |
WHERE p1 = p2 | |
) | |
SELECT STRING_AGG(IFNULL(latin, char), '') | |
FROM UNNEST(SPLIT(word, '')) char | |
LEFT JOIN pairs | |
ON char = accent | |
)); | |
SELECT * | |
FROM ( | |
SELECT | |
w1.*, | |
w2.* | |
FROM ( | |
SELECT t1.*, | |
t2.id as id_parlamentar, t2.nome_parlamentar, t2.sigla_partido, t2.sigla_uf | |
FROM (SELECT * FROM `gabinete-compartilhado.congresso.tramitacao_` WHERE data_hora BETWEEN DATETIME '2019-02-01' AND DATETIME '2019-12-31') t1 | |
JOIN (SELECT * FROM `gabinete-compartilhado.congresso.camara_deputado_` WHERE ultima_legislatura = 56) t2 | |
ON accent2latin(lower(t1.despacho)) LIKE CONCAT('%', accent2latin(lower(t2.nome_parlamentar)), '%')) w1 | |
JOIN `gabinete-compartilhado.congresso.support_date_legislaturas` w2 | |
ON EXTRACT(DATE FROM w1.data_hora) = w2.aday) k1 | |
JOIN `gabinete-compartilhado.congresso.camara_deputados_semihomonimos` k2 | |
ON lower(k1.nome_parlamentar) = k2.nome | |
AND NOT REGEXP_CONTAINS(accent2latin(lower(k1.nome_parlamentar)), accent2latin(k2.regex)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment