Last active
September 13, 2019 23:18
-
-
Save avallecam/506f6c8f903ede5d689a0699d37fb101 to your computer and use it in GitHub Desktop.
De STATA a R: uso de las funciones del paquete labelled
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` r | |
library(tidyverse) | |
library(haven) | |
library(labelled) | |
# crear data frame ------------------------------ | |
set.seed(22) | |
dt_nolab <- tibble(age = floor(rnorm(6,25,10)), | |
sex = gl(2,1,6, labels = c("f","m")), | |
edu = rep(c("primaria","secundaria"),each=3)) %>% | |
#transformar vectores chr a fct | |
mutate_if(is.character,as.factor) | |
# labels? ----------------------------------------------------------------- | |
Hmisc::contents(dt_nolab) | |
#> | |
#> Data frame:dt_nolab 6 observations and 3 variables Maximum # NAs:0 | |
#> | |
#> | |
#> Levels Storage | |
#> age double | |
#> sex 2 integer | |
#> edu 2 integer | |
#> | |
#> +--------+-------------------+ | |
#> |Variable|Levels | | |
#> +--------+-------------------+ | |
#> | sex |f,m | | |
#> +--------+-------------------+ | |
#> | edu |primaria,secundaria| | |
#> +--------+-------------------+ | |
var_label(dt_nolab) | |
#> $age | |
#> NULL | |
#> | |
#> $sex | |
#> NULL | |
#> | |
#> $edu | |
#> NULL | |
look_for(dt_nolab) | |
#> variable | |
#> 1 age | |
#> 2 sex | |
#> 3 edu | |
#asignar etiquetas a variables ------------------ | |
dt <- dt_nolab %>% | |
labelled::set_variable_labels( | |
age = "Edad (años)", | |
sex = "Sexo de participantes", | |
edu = "Nivel educativo") | |
# verificar labels -------------------------------------------------------- | |
Hmisc::contents(dt) | |
#> | |
#> Data frame:dt 6 observations and 3 variables Maximum # NAs:0 | |
#> | |
#> | |
#> Labels Levels Storage | |
#> age Edad (años) double | |
#> sex Sexo de participantes 2 integer | |
#> edu Nivel educativo 2 integer | |
#> | |
#> +--------+-------------------+ | |
#> |Variable|Levels | | |
#> +--------+-------------------+ | |
#> | sex |f,m | | |
#> +--------+-------------------+ | |
#> | edu |primaria,secundaria| | |
#> +--------+-------------------+ | |
var_label(dt) | |
#> $age | |
#> [1] "Edad (años)" | |
#> | |
#> $sex | |
#> [1] "Sexo de participantes" | |
#> | |
#> $edu | |
#> [1] "Nivel educativo" | |
look_for(dt_nolab) | |
#> variable | |
#> 1 age | |
#> 2 sex | |
#> 3 edu | |
look_for(dt_nolab,details = T) | |
#> variable class type levels value_labels unique_values | |
#> 1 age numeric double 6 | |
#> 2 sex factor integer f; m 2 | |
#> 3 edu factor integer primaria; secundaria 2 | |
#> n_na na_values na_range | |
#> 1 0 | |
#> 2 0 | |
#> 3 0 | |
str(dt) | |
#> Classes 'tbl_df', 'tbl' and 'data.frame': 6 obs. of 3 variables: | |
#> $ age: num 19 49 35 27 22 43 | |
#> ..- attr(*, "label")= chr "Edad (años)" | |
#> $ sex: Factor w/ 2 levels "f","m": 1 2 1 2 1 2 | |
#> ..- attr(*, "label")= chr "Sexo de participantes" | |
#> $ edu: Factor w/ 2 levels "primaria","secundaria": 1 1 1 2 2 2 | |
#> ..- attr(*, "label")= chr "Nivel educativo" | |
attributes(dt$age) | |
#> $label | |
#> [1] "Edad (años)" | |
attributes(dt$age)$label | |
#> [1] "Edad (años)" | |
# grabar en dta ------------------------------ | |
#dt %>% haven::write_dta("data/new_db.dta") | |
# leer dta con labels ----------------------------------------------------- | |
#dt_new <- read_dta("data/new_db.dta") %>% as_factor() | |
dt_new <- dt | |
Hmisc::contents(dt_new) | |
#> | |
#> Data frame:dt_new 6 observations and 3 variables Maximum # NAs:0 | |
#> | |
#> | |
#> Labels Levels Storage | |
#> age Edad (años) double | |
#> sex Sexo de participantes 2 integer | |
#> edu Nivel educativo 2 integer | |
#> | |
#> +--------+-------------------+ | |
#> |Variable|Levels | | |
#> +--------+-------------------+ | |
#> | sex |f,m | | |
#> +--------+-------------------+ | |
#> | edu |primaria,secundaria| | |
#> +--------+-------------------+ | |
var_label(dt_new) | |
#> $age | |
#> [1] "Edad (años)" | |
#> | |
#> $sex | |
#> [1] "Sexo de participantes" | |
#> | |
#> $edu | |
#> [1] "Nivel educativo" | |
look_for(dt_new) | |
#> variable label | |
#> 1 age Edad (años) | |
#> 2 sex Sexo de participantes | |
#> 3 edu Nivel educativo | |
# dictionary -------------------------------------------------------- | |
#caso 1: si tienes un df con labels | |
#extraerlos con var_label() | |
dt_dict_list <- var_label(dt_new) | |
# formato común de diccionario | |
look_for(dt_new,details = T) | |
#> variable label class type levels | |
#> 1 age Edad (años) numeric double | |
#> 2 sex Sexo de participantes factor integer f; m | |
#> 3 edu Nivel educativo factor integer primaria; secundaria | |
#> value_labels unique_values n_na na_values na_range | |
#> 1 6 0 | |
#> 2 2 0 | |
#> 3 2 0 | |
#caso 2: df con variable name y variable label | |
#transformar a formato lista | |
dt_dict_wide_list <- | |
look_for(dt_new) %>% | |
as_tibble() %>% | |
pivot_wider(names_from = variable,values_from = label) %>% | |
as.list() %>% | |
print() | |
#> $age | |
#> [1] "Edad (años)" | |
#> | |
#> $edu | |
#> [1] "Nivel educativo" | |
#> | |
#> $sex | |
#> [1] "Sexo de participantes" | |
# from data to list -------------------------------------------------- | |
dt_dict_list | |
#> $age | |
#> [1] "Edad (años)" | |
#> | |
#> $sex | |
#> [1] "Sexo de participantes" | |
#> | |
#> $edu | |
#> [1] "Nivel educativo" | |
dt_dict_wide_list | |
#> $age | |
#> [1] "Edad (años)" | |
#> | |
#> $edu | |
#> [1] "Nivel educativo" | |
#> | |
#> $sex | |
#> [1] "Sexo de participantes" | |
# assing data labels ------------------------------------------------------ | |
dt_nolab <- dt_nolab %>% | |
mutate(log=c(rep(T,5),F)) %>% | |
select(sex,log,age,edu) | |
# limitante: | |
# requieres tener todas las variables de la lista en la base de datos | |
# al activar select(-sex) se obtiene error | |
Hmisc::contents(dt_nolab) | |
#> | |
#> Data frame:dt_nolab 6 observations and 4 variables Maximum # NAs:0 | |
#> | |
#> | |
#> Levels Storage | |
#> sex 2 integer | |
#> log logical | |
#> age double | |
#> edu 2 integer | |
#> | |
#> +--------+-------------------+ | |
#> |Variable|Levels | | |
#> +--------+-------------------+ | |
#> | sex |f,m | | |
#> +--------+-------------------+ | |
#> | edu |primaria,secundaria| | |
#> +--------+-------------------+ | |
dt_nolab_newlab <- dt_nolab %>% | |
mutate(log=c(rep(T,5),F)) %>% | |
select(sex,log,age,edu) %>% | |
#select(-sex) %>% #limitante | |
#set_variable_labels(.labels = dt_dict_list) #en caso 1 | |
set_variable_labels(.labels = dt_dict_wide_list) #en caso 2 | |
Hmisc::contents(dt_nolab_newlab) | |
#> | |
#> Data frame:dt_nolab_newlab 6 observations and 4 variables Maximum # NAs:0 | |
#> | |
#> | |
#> Labels Levels Storage | |
#> sex Sexo de participantes 2 integer | |
#> log logical | |
#> age Edad (años) double | |
#> edu Nivel educativo 2 integer | |
#> | |
#> +--------+-------------------+ | |
#> |Variable|Levels | | |
#> +--------+-------------------+ | |
#> | sex |f,m | | |
#> +--------+-------------------+ | |
#> | edu |primaria,secundaria| | |
#> +--------+-------------------+ | |
look_for(dt_nolab_newlab) %>% as_tibble() | |
#> # A tibble: 4 x 2 | |
#> variable label | |
#> <chr> <chr> | |
#> 1 sex Sexo de participantes | |
#> 2 log <NA> | |
#> 3 age Edad (años) | |
#> 4 edu Nivel educativo | |
``` | |
<sup>Created on 2019-08-08 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment