Skip to content

Instantly share code, notes, and snippets.

View rhanka's full-sized avatar

Fabien Antoine rhanka

  • French Administration
  • Paris
View GitHub Profile
@rhanka
rhanka / ccass_to_json.sh
Last active June 16, 2021 23:03
cass to json
for file in $(find . -iname '*xml');do echo $file; (cat $file | xq '. | { chamber: (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.FORMATION | sub(".*_(?<x>...).*"; "\(.x)") | ascii_downcase), decision_date: .TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.DATE_DEC, ecli: (if (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.ECLI != null) then .TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.ECLI else .TEXTE_JURI_JUDI.META.META_COMMUN.ID end), jurisdiction: (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.JURIDICTION | sub("Cour de cassation"; "cc")), number: (if (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.NUMEROS_AFFAIRES.NUMERO_AFFAIRE | type=="string") then .TEXTE_JURI_JUDI.META.META_SPEC.META_JURI_JUDI.NUMEROS_AFFAIRES.NUMERO_AFFAIRE else .TEXTE_JURI_JUDI.META.META_COMMUN.ID end), publication: ["b","c"], solution: (if (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.SOLUTION != null) then (.TEXTE_JURI_JUDI.META.META_SPEC.META_JURI.SOLUTION | ascii_downcase) else "aucune" end), update_date: .TEXTE_JURI_JUDI.META.META_SPEC.ME
#first run
cat wikidata_dead_french_deces_INSEE_1st.csv | sed 's/{.*}//;s/"//g' | awk -F, '{print $1 "," $10*100 "," $13}' | sort -t, +0d -1 +2n -3 -u -k1,1 | awk -F ',' '{print $1 "," $3}' > wikidata_dead_french_deces_INSEE_1st_request.csv
cat wikidata_dead_french_deces_INSEE_1st_request.csv | tail -n +2 | awk -F ',' '{print $1 "\tP9058\t" "\"" $2 "\""}'> wikidata_dead_french_deces_INSEE_1st_request.qs
curl https://quickstatements.toolforge.org/api.php -d action=import -d submit=1 -d username=Rhanka -d "batchname=test" --data-raw "token=$QS_TOKEN" --data-urlencode data@wikidata_dead_french_deces_INSEE_1st_request.qs
#second run
cat 20200302_wikidata_dead_french_unmatched_deces_INSEE.csv | sed 's/{.*}//;s/"[^"]*"//g' | awk -F, '($NF ~ /auto|true|check/){print $1 "," $11*100 "," $14 "," $NF }' | sort -t, -k1,1 -k2,2nr | sort -t, -u -k1,1 | awk -F, '{print $1 "\tP9058\t" "\"" $3 "\""}' > 20200302_wikidata_dead_french_unmatched_deces_INSEE.qs
curl https://quickstatements.toolforge.org/api.php -d action=impor
@rhanka
rhanka / wikidata dead french people
Last active February 21, 2021 18:27
wikidata dead french people
select ?person ?personLabel ?firstnameLabel ?lastnameLabel ?birthdateLabel ?birthplaceLabel ?citizenshipLabel ?diedLabel where {
?person wdt:P27 wd:Q142.
?person wdt:P734 ?lastname.
?person wdt:P735 ?firstname.
?person wdt:P569 ?birthdate.
?person wdt:P27 ?citizenship.
?person wdt:P19 ?birthplace.
?person wdt:P570 ?died;
FILTER((?died >= "1970-01-01T00:00:00Z"^^xsd:dateTime))
service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
@rhanka
rhanka / Projection rapprochée départements français
Last active January 23, 2021 16:22
to use with mapshaper.org, thx to icem7, and gregoire david (france-geojson)
-proj webmercator \
-affine where="code.indexOf('971')==0" shift=6355000,3130000 scale=3 \
-affine where="code.indexOf('972')==0" shift=6680000,3305000 scale=3 \
-affine where="code.indexOf('973')==0" shift=6160000,4520000 scale=0.7 \
-affine where="code.indexOf('974')==0" shift=-5570000,7360000 scale=3 \
-affine where="code.indexOf('976')==0" shift=-4085000,6390000 scale=3
#!/bin/bash
sudo apt-get install python pip wget
pip install jq xq
wget -rl1 https://echanges.dila.gouv.fr/OPENDATA/CASS/
find -iname '*.tar.gz' | xargs tar xvzf
nxml=`find -iname '*.xml'| wc -l`;(find -iname '*.xml' | xargs -n 1 -P 6 -I {} bash -c 'file={};cat $file | xq "." > ${file/xml/json}' &) && while(true);do find -iname '*.json' | wc -l | awk -v nxml=$nxml '{printf "\rconversion xml>json en cours : " $1 "/" nxml}';sleep 1;done
#!/bin/bash
for file in `curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/ | \
sed 's/<Key>/\n/g' | sed 's/<.*//' | egrep -i 'deces-.*.txt.gz'`;\
do \
(curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/echo $file > $file ) && \
echo $file downloaded;\
done
#!/bin/bash
# ce fichier permet le téléchargement de l'intégralité des données insee et de les compresser
# il necessite l'installation de jq pour parser les json de l'api data.gouv.fr
curl -s https://www.data.gouv.fr/api/1/datasets/fichier-des-personnes-decedees/ | \
jq '.resources[].url' | sed 's/^/curl -s /;s:/\(deces-.*\)":/\1" | gzip > \1.gz:' | sh
@rhanka
rhanka / download_brpp.sh
Last active June 13, 2019 21:52
download BRPP INSEE data
#!/bin/bash
#login et mdp doivent être url encoded (uriComponent et pas simplement uri)
set -e
set echo off
first_year=1970
login=MYBRPPLOGIN
password=MYURLENCODEDPASSWORD
# étape 1: création d'un jeton de session (stockage dans cookie1)
curl -s -c cookie1 https://echanges.insee.fr/ihm/download/brpp-deces -o session.html && echo initialisation de session || echo initialisation de session en échec
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import hashlib
import re
import json
import requests
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
# from birdy.twitter import UserClient, StreamClient
import json
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
TWITTER_CONSUMER_KEY = "xxx"