Fabien Antoine rhanka

## ccass_to_json.sh
for file in $(find . -iname '*xml');do echo $file; (cat $file | xq '. | { chamber: (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.FORMATION | sub(".*_(?<x>...).*"; "\(.x)") | ascii_downcase), decision_date: .TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.DATE_DEC, ecli: (if (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.ECLI != null) then .TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.ECLI else .TEXTE_JURI_JUDI.META.META_COMMUN.ID end), jurisdiction: (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.JURIDICTION | sub("Cour de cassation"; "cc")), number: (if (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.NUMEROS_AFFAIRES.NUMERO_AFFAIRE | type=="string") then .TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.NUMEROS_AFFAIRES.NUMERO_AFFAIRE else .TEXTE_JURI_JUDI.META.META_COMMUN.ID end), publication: ["b","c"], solution: (if (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.SOLUTION != null) then (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.SOLUTION | ascii_downcase) else "aucune" end), update_date: .TEXTE_JURI_JUDI.META.META_SPEC.ME

## Raw deces.matchid.io to wikidata
#first run
cat wikidata_dead_french_deces_INSEE_1st.csv  | sed 's/{.*}//;s/"//g' | awk -F, '{print $1 "," $10*100 "," $13}' | sort -t, +0d -1 +2n -3 -u -k1,1 | awk -F ',' '{print $1 "," $3}' > wikidata_dead_french_deces_INSEE_1st_request.csv
cat wikidata_dead_french_deces_INSEE_1st_request.csv | tail -n +2 | awk -F ',' '{print $1 "\tP9058\t" "\"" $2 "\""}'> wikidata_dead_french_deces_INSEE_1st_request.qs
curl https://quickstatements.toolforge.org/api.php -d action=import -d submit=1 -d username=Rhanka -d "batchname=test" --data-raw "token=$QS_TOKEN" --data-urlencode data@wikidata_dead_french_deces_INSEE_1st_request.qs

#second run
cat 20200302_wikidata_dead_french_unmatched_deces_INSEE.csv | sed 's/{.*}//;s/"[^"]*"//g' | awk -F, '($NF ~ /auto|true|check/){print $1 "," $11*100 "," $14 "," $NF }' | sort -t, -k1,1 -k2,2nr | sort -t, -u -k1,1  | awk -F, '{print $1 "\tP9058\t" "\"" $3 "\""}' > 20200302_wikidata_dead_french_unmatched_deces_INSEE.qs
curl https://quickstatements.toolforge.org/api.php -d action=impor

## wikidata dead french people
select ?person  ?personLabel ?firstnameLabel  ?lastnameLabel ?birthdateLabel ?birthplaceLabel ?citizenshipLabel ?diedLabel where {
  ?person wdt:P27 wd:Q142.
  ?person wdt:P734 ?lastname.
  ?person wdt:P735 ?firstname.
  ?person wdt:P569 ?birthdate.
  ?person wdt:P27 ?citizenship.
  ?person wdt:P19 ?birthplace.
  ?person wdt:P570 ?died;
  FILTER((?died >= "1970-01-01T00:00:00Z"^^xsd:dateTime))
  service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }

## Projection rapprochée départements français
-proj webmercator \
-affine where="code.indexOf('971')==0" shift=6355000,3130000 scale=3 \
-affine where="code.indexOf('972')==0" shift=6680000,3305000 scale=3 \
-affine where="code.indexOf('973')==0" shift=6160000,4520000 scale=0.7 \
-affine where="code.indexOf('974')==0" shift=-5570000,7360000 scale=3 \
-affine where="code.indexOf('976')==0" shift=-4085000,6390000 scale=3

## cours_cass_xml2json.sh
#!/bin/bash

sudo apt-get install python pip wget
pip install jq xq

wget -rl1 https://echanges.dila.gouv.fr/OPENDATA/CASS/
find -iname '*.tar.gz' | xargs tar xvzf
nxml=`find -iname '*.xml'| wc -l`;(find -iname '*.xml' | xargs -n 1 -P 6 -I {} bash -c 'file={};cat $file | xq "." > ${file/xml/json}' &) && while(true);do find -iname '*.json' | wc -l | awk -v nxml=$nxml '{printf  "\rconversion xml>json en cours : " $1 "/" nxml}';sleep 1;done

## download_brpp_s3.sh
#!/bin/bash

for file in `curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/ | \
              sed 's/<Key>/\n/g' | sed 's/<.*//' | egrep -i 'deces-.*.txt.gz'`;\
                do \
                  (curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/echo $file > $file ) && \
                  echo $file downloaded;\
                done

## download_brpp_opendata.sh
#!/bin/bash
# ce fichier permet le téléchargement de l'intégralité des données insee et de les compresser
# il necessite l'installation de jq pour parser les json de l'api data.gouv.fr

curl -s https://www.data.gouv.fr/api/1/datasets/fichier-des-personnes-decedees/ | \
  jq '.resources[].url' | sed 's/^/curl -s /;s:/\(deces-.*\)":/\1" | gzip > \1.gz:' | sh

## download_brpp.sh
#!/bin/bash
#login et mdp doivent être url encoded (uriComponent et pas simplement uri)
set -e
set echo off
first_year=1970
login=MYBRPPLOGIN
password=MYURLENCODEDPASSWORD

# étape 1: création d'un jeton de session (stockage dans cookie1)
curl -s -c cookie1 https://echanges.insee.fr/ihm/download/brpp-deces -o session.html && echo initialisation de session || echo initialisation de session en échec

## dataiku_leboncoin.py
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

import hashlib
import re
import json
import requests

## dataiku_twittersearcher.py
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
# from birdy.twitter import UserClient, StreamClient
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
TWITTER_CONSUMER_KEY = "xxx"
	#first run
	cat wikidata_dead_french_deces_INSEE_1st.csv \| sed 's/{.}//;s/"//g' \| awk -F, '{print $1 "," $10100 "," $13}' \| sort -t, +0d -1 +2n -3 -u -k1,1 \| awk -F ',' '{print $1 "," $3}' > wikidata_dead_french_deces_INSEE_1st_request.csv
	cat wikidata_dead_french_deces_INSEE_1st_request.csv \| tail -n +2 \| awk -F ',' '{print $1 "\tP9058\t" "\"" $2 "\""}'> wikidata_dead_french_deces_INSEE_1st_request.qs
	curl https://quickstatements.toolforge.org/api.php -d action=import -d submit=1 -d username=Rhanka -d "batchname=test" --data-raw "token=$QS_TOKEN" --data-urlencode data@wikidata_dead_french_deces_INSEE_1st_request.qs

	#second run
	cat 20200302_wikidata_dead_french_unmatched_deces_INSEE.csv \| sed 's/{.}//;s/"[^"]"//g' \| awk -F, '($NF ~ /auto\|true\|check/){print $1 "," $11*100 "," $14 "," $NF }' \| sort -t, -k1,1 -k2,2nr \| sort -t, -u -k1,1 \| awk -F, '{print $1 "\tP9058\t" "\"" $3 "\""}' > 20200302_wikidata_dead_french_unmatched_deces_INSEE.qs
	curl https://quickstatements.toolforge.org/api.php -d action=impor
	select ?person ?personLabel ?firstnameLabel ?lastnameLabel ?birthdateLabel ?birthplaceLabel ?citizenshipLabel ?diedLabel where {
	?person wdt:P27 wd:Q142.
	?person wdt:P734 ?lastname.
	?person wdt:P735 ?firstname.
	?person wdt:P569 ?birthdate.
	?person wdt:P27 ?citizenship.
	?person wdt:P19 ?birthplace.
	?person wdt:P570 ?died;
	FILTER((?died >= "1970-01-01T00:00:00Z"^^xsd:dateTime))
	service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
	-proj webmercator \
	-affine where="code.indexOf('971')==0" shift=6355000,3130000 scale=3 \
	-affine where="code.indexOf('972')==0" shift=6680000,3305000 scale=3 \
	-affine where="code.indexOf('973')==0" shift=6160000,4520000 scale=0.7 \
	-affine where="code.indexOf('974')==0" shift=-5570000,7360000 scale=3 \
	-affine where="code.indexOf('976')==0" shift=-4085000,6390000 scale=3
	#!/bin/bash

	sudo apt-get install python pip wget
	pip install jq xq

	wget -rl1 https://echanges.dila.gouv.fr/OPENDATA/CASS/
	find -iname '*.tar.gz' \| xargs tar xvzf
	nxml=`find -iname '.xml'\| wc -l`;(find -iname '.xml' \| xargs -n 1 -P 6 -I {} bash -c 'file={};cat $file \| xq "." > ${file/xml/json}' &) && while(true);do find -iname '*.json' \| wc -l \| awk -v nxml=$nxml '{printf "\rconversion xml>json en cours : " $1 "/" nxml}';sleep 1;done
	#!/bin/bash

	for file in `curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/ \| \
	sed 's/<Key>/\n/g' \| sed 's/<.//' \| egrep -i 'deces-..txt.gz'`;\
	do \
	(curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/echo $file > $file ) && \
	echo $file downloaded;\
	done
	#!/bin/bash
	# ce fichier permet le téléchargement de l'intégralité des données insee et de les compresser
	# il necessite l'installation de jq pour parser les json de l'api data.gouv.fr

	curl -s https://www.data.gouv.fr/api/1/datasets/fichier-des-personnes-decedees/ \| \
	jq '.resources[].url' \| sed 's/^/curl -s /;s:/\(deces-.*\)":/\1" \| gzip > \1.gz:' \| sh
	#!/bin/bash
	#login et mdp doivent être url encoded (uriComponent et pas simplement uri)
	set -e
	set echo off
	first_year=1970
	login=MYBRPPLOGIN
	password=MYURLENCODEDPASSWORD

	# étape 1: création d'un jeton de session (stockage dans cookie1)
	curl -s -c cookie1 https://echanges.insee.fr/ihm/download/brpp-deces -o session.html && echo initialisation de session \|\| echo initialisation de session en échec
	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	# -- coding: utf-8 --
	import dataiku
	import pandas as pd, numpy as np
	from dataiku import pandasutils as pdu

	import hashlib
	import re
	import json
	import requests