This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
#get all schemas of the dump | |
schemas=`cat $1 | docker exec -i postgres pg_restore -l | grep SCHEMA | sed 's/^.*\s//' | sort | uniq` | |
for schema in $schemas | |
do echo $schema | |
for table in `cat $1 | docker exec -i postgres pg_restore -l | egrep "TABLE $schema" |sed "s/.*TABLE $schema //;s/ $schema//' | sort | uniq ` | |
do echo $table | |
cat $1 | docker exec -i postgres pg_restore -t $table | perl -e 'while(<>){if (/^\\.\s*$/) {$dump=0} if ($dump==1) {s/\\N//g;print} if (/COPY/){$dump=1;s/.*\((.*)\).*/$1/;s/,\s*/\t/g;print}}' > $table.csv | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
# -*- coding: utf-8 -*- | |
from multiprocessing import Process, Queue | |
import dataiku | |
from dataiku.customrecipe import get_input_names_for_role | |
from dataiku.customrecipe import get_output_names_for_role | |
from dataiku.customrecipe import get_recipe_config | |
import itertools | |
import logging | |
import pandas as pd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* resource.filetype: migrate OLD_DOMAINS to NEW_DOMAIN | |
* WARNING : it uses a basic 'replace' without a strict regex, be careful about the OLD_DOMAINS | |
*/ | |
var count = 0; | |
var urlRegex = /^https?\:\/\/alpha.datalab.mi.*$/i; | |
db.dataset.find({'resources.filetype': 'file'}).forEach(function(dataset) { | |
if (dataset.resources) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
# -*- coding: utf-8 -*- | |
import dataiku | |
import os | |
import json | |
import pandas as pd, numpy as np | |
from dataiku import pandasutils as pdu | |
os.environ['http_proxy'] = '' | |
import dataikuapi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
# -*- coding: utf-8 -*- | |
import dataiku | |
import pandas as pd, numpy as np | |
from dataiku import pandasutils as pdu | |
# from birdy.twitter import UserClient, StreamClient | |
import json | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
TWITTER_CONSUMER_KEY = "xxx" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
# -*- coding: utf-8 -*- | |
import dataiku | |
import pandas as pd, numpy as np | |
from dataiku import pandasutils as pdu | |
import hashlib | |
import re | |
import json | |
import requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#login et mdp doivent être url encoded (uriComponent et pas simplement uri) | |
set -e | |
set echo off | |
first_year=1970 | |
login=MYBRPPLOGIN | |
password=MYURLENCODEDPASSWORD | |
# étape 1: création d'un jeton de session (stockage dans cookie1) | |
curl -s -c cookie1 https://echanges.insee.fr/ihm/download/brpp-deces -o session.html && echo initialisation de session || echo initialisation de session en échec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ce fichier permet le téléchargement de l'intégralité des données insee et de les compresser | |
# il necessite l'installation de jq pour parser les json de l'api data.gouv.fr | |
curl -s https://www.data.gouv.fr/api/1/datasets/fichier-des-personnes-decedees/ | \ | |
jq '.resources[].url' | sed 's/^/curl -s /;s:/\(deces-.*\)":/\1" | gzip > \1.gz:' | sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
for file in `curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/ | \ | |
sed 's/<Key>/\n/g' | sed 's/<.*//' | egrep -i 'deces-.*.txt.gz'`;\ | |
do \ | |
(curl -s https://fichier-des-personnes-decedees.s3.fr-par.scw.cloud/echo $file > $file ) && \ | |
echo $file downloaded;\ | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
sudo apt-get install python pip wget | |
pip install jq xq | |
wget -rl1 https://echanges.dila.gouv.fr/OPENDATA/CASS/ | |
find -iname '*.tar.gz' | xargs tar xvzf | |
nxml=`find -iname '*.xml'| wc -l`;(find -iname '*.xml' | xargs -n 1 -P 6 -I {} bash -c 'file={};cat $file | xq "." > ${file/xml/json}' &) && while(true);do find -iname '*.json' | wc -l | awk -v nxml=$nxml '{printf "\rconversion xml>json en cours : " $1 "/" nxml}';sleep 1;done |
OlderNewer