Skip to content

Instantly share code, notes, and snippets.

@pachevalier
Created July 3, 2021 04:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pachevalier/7f8103d2ff5390b3372048eb3e7b313d to your computer and use it in GitHub Desktop.
Save pachevalier/7f8103d2ff5390b3372048eb3e7b313d to your computer and use it in GitHub Desktop.
Script to match egapro data with wikidata. See https://quickstatements.toolforge.org/#/batch/58280
library("tidyverse")
library("tricky")
library("httr")
library("jsonlite")
read_csv2("index-egalite-fh.csv") %>%
set_standard_names() %>%
glimpse()
read_csv2("index-egalite-fh.csv") %>%
set_standard_names() %>%
filter(siren == "382357721") %>%
select(raison_sociale, siren, annee, note) %>%
arrange(annee)
# Requete wikidata https://query.wikidata.org/#SELECT%20%3Fqid%20%3Fsiren%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP1616%20%3Fsiren%0A%20%20BIND%28SUBSTR%28STR%28%3Fitem%29%2C32%29%20as%20%3Fqid%29%0A%7D%0ALIMIT%201000
read_csv('query.csv')
read_csv2("index-egalite-fh.csv",
col_types = cols(siren = col_character())
) %>%
set_standard_names() %>%
select(siren, note, annee, raison_sociale) %>%
inner_join(
y = read_csv('query.csv',
col_types = cols(siren = col_character())
),
by = c("siren" = "siren")
) %>%
mutate(
s854 = paste0(
"\"https://index-egapro.travail.gouv.fr/consulter-index/#!/search?q=",
siren, "\""),
qal585 = paste0("+", annee, "-01-01T00:00:00Z/9"),
qal577 = paste0("+", annee + 1, "-01-01T00:00:00Z/9"),
qal1810 = raison_sociale,
S248 = "Q107394334"
) %>%
select(qid, P9279 = note, qal585, qal577, S248, s854) %>%
write_csv("towikidata.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment