Skip to content

Instantly share code, notes, and snippets.

@pachevalier
Last active February 4, 2021 22:59
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pachevalier/6194243119df584f80273185f274264c to your computer and use it in GitHub Desktop.
Save pachevalier/6194243119df584f80273185f274264c to your computer and use it in GitHub Desktop.
# This script is quick and dirty code. It is very preliminary and may be improved one day ;)
library("jsonlite")
library("tidyverse")
library("httr")
library("WikidataR")
paste0(
"https://www.wikidata.org/w/api.php?action=wbgetentities&",
"ids=Q6581097|Q6581072",
"&languages=fr&props=labels&format=json"
) %>%
GET() %>%
content() %>%
pluck("entities")
get_claims <- function(x) {
paste0(
"https://www.wikidata.org/w/api.php?action=wbgetentities&titles=",
URLencode(x),
"&sites=frwiki&format=json&props=claims"
) %>%
GET() %>%
content() %>%
pluck("entities", 1, "claims")
}
get_claims("Ada Lovelace")
get_gender <- function(x) {
gender <- get_claims(x) %>%
pluck("P21") %>%
pluck(1,"mainsnak", "datavalue", "value", "id")
gender <- ifelse(is.null(gender), "", gender)
tibble(x, gender)
}
get_gender("Ada Lovelace")
get_genderlinks <- function(title) {
df <- paste0(
"https://fr.wikipedia.org/w/api.php?",
"format=json&origin=*&action=query&prop=links&titles=",
URLencode(title),
"&pllimit=1000&plnamespace=0") %>%
GET() %>%
content() %>%
pluck("query", "pages", 1, "links") %>%
map_df(.x = ., .f = as_tibble) %>%
pull("title") %>%
set_names() %>%
map_df(.x = ., .f = get_gender)
print(nrow(df))
return(
df %>%
filter(gender != "")
)
}
get_genderlinks(title = "Sociologie") %>% count(gender)
get_genderlinks(title = "Science politique") %>% count(gender)
get_genderlinks(title = "Microéconomie") %>% count(gender)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment