Skip to content

Instantly share code, notes, and snippets.

@avallecam
Last active September 13, 2019 23:18
Show Gist options
  • Save avallecam/506f6c8f903ede5d689a0699d37fb101 to your computer and use it in GitHub Desktop.
Save avallecam/506f6c8f903ede5d689a0699d37fb101 to your computer and use it in GitHub Desktop.
De STATA a R: uso de las funciones del paquete labelled
``` r
library(tidyverse)
library(haven)
library(labelled)
# crear data frame ------------------------------
set.seed(22)
dt_nolab <- tibble(age = floor(rnorm(6,25,10)),
sex = gl(2,1,6, labels = c("f","m")),
edu = rep(c("primaria","secundaria"),each=3)) %>%
#transformar vectores chr a fct
mutate_if(is.character,as.factor)
# labels? -----------------------------------------------------------------
Hmisc::contents(dt_nolab)
#>
#> Data frame:dt_nolab 6 observations and 3 variables Maximum # NAs:0
#>
#>
#> Levels Storage
#> age double
#> sex 2 integer
#> edu 2 integer
#>
#> +--------+-------------------+
#> |Variable|Levels |
#> +--------+-------------------+
#> | sex |f,m |
#> +--------+-------------------+
#> | edu |primaria,secundaria|
#> +--------+-------------------+
var_label(dt_nolab)
#> $age
#> NULL
#>
#> $sex
#> NULL
#>
#> $edu
#> NULL
look_for(dt_nolab)
#> variable
#> 1 age
#> 2 sex
#> 3 edu
#asignar etiquetas a variables ------------------
dt <- dt_nolab %>%
labelled::set_variable_labels(
age = "Edad (años)",
sex = "Sexo de participantes",
edu = "Nivel educativo")
# verificar labels --------------------------------------------------------
Hmisc::contents(dt)
#>
#> Data frame:dt 6 observations and 3 variables Maximum # NAs:0
#>
#>
#> Labels Levels Storage
#> age Edad (años) double
#> sex Sexo de participantes 2 integer
#> edu Nivel educativo 2 integer
#>
#> +--------+-------------------+
#> |Variable|Levels |
#> +--------+-------------------+
#> | sex |f,m |
#> +--------+-------------------+
#> | edu |primaria,secundaria|
#> +--------+-------------------+
var_label(dt)
#> $age
#> [1] "Edad (años)"
#>
#> $sex
#> [1] "Sexo de participantes"
#>
#> $edu
#> [1] "Nivel educativo"
look_for(dt_nolab)
#> variable
#> 1 age
#> 2 sex
#> 3 edu
look_for(dt_nolab,details = T)
#> variable class type levels value_labels unique_values
#> 1 age numeric double 6
#> 2 sex factor integer f; m 2
#> 3 edu factor integer primaria; secundaria 2
#> n_na na_values na_range
#> 1 0
#> 2 0
#> 3 0
str(dt)
#> Classes 'tbl_df', 'tbl' and 'data.frame': 6 obs. of 3 variables:
#> $ age: num 19 49 35 27 22 43
#> ..- attr(*, "label")= chr "Edad (años)"
#> $ sex: Factor w/ 2 levels "f","m": 1 2 1 2 1 2
#> ..- attr(*, "label")= chr "Sexo de participantes"
#> $ edu: Factor w/ 2 levels "primaria","secundaria": 1 1 1 2 2 2
#> ..- attr(*, "label")= chr "Nivel educativo"
attributes(dt$age)
#> $label
#> [1] "Edad (años)"
attributes(dt$age)$label
#> [1] "Edad (años)"
# grabar en dta ------------------------------
#dt %>% haven::write_dta("data/new_db.dta")
# leer dta con labels -----------------------------------------------------
#dt_new <- read_dta("data/new_db.dta") %>% as_factor()
dt_new <- dt
Hmisc::contents(dt_new)
#>
#> Data frame:dt_new 6 observations and 3 variables Maximum # NAs:0
#>
#>
#> Labels Levels Storage
#> age Edad (años) double
#> sex Sexo de participantes 2 integer
#> edu Nivel educativo 2 integer
#>
#> +--------+-------------------+
#> |Variable|Levels |
#> +--------+-------------------+
#> | sex |f,m |
#> +--------+-------------------+
#> | edu |primaria,secundaria|
#> +--------+-------------------+
var_label(dt_new)
#> $age
#> [1] "Edad (años)"
#>
#> $sex
#> [1] "Sexo de participantes"
#>
#> $edu
#> [1] "Nivel educativo"
look_for(dt_new)
#> variable label
#> 1 age Edad (años)
#> 2 sex Sexo de participantes
#> 3 edu Nivel educativo
# dictionary --------------------------------------------------------
#caso 1: si tienes un df con labels
#extraerlos con var_label()
dt_dict_list <- var_label(dt_new)
# formato común de diccionario
look_for(dt_new,details = T)
#> variable label class type levels
#> 1 age Edad (años) numeric double
#> 2 sex Sexo de participantes factor integer f; m
#> 3 edu Nivel educativo factor integer primaria; secundaria
#> value_labels unique_values n_na na_values na_range
#> 1 6 0
#> 2 2 0
#> 3 2 0
#caso 2: df con variable name y variable label
#transformar a formato lista
dt_dict_wide_list <-
look_for(dt_new) %>%
as_tibble() %>%
pivot_wider(names_from = variable,values_from = label) %>%
as.list() %>%
print()
#> $age
#> [1] "Edad (años)"
#>
#> $edu
#> [1] "Nivel educativo"
#>
#> $sex
#> [1] "Sexo de participantes"
# from data to list --------------------------------------------------
dt_dict_list
#> $age
#> [1] "Edad (años)"
#>
#> $sex
#> [1] "Sexo de participantes"
#>
#> $edu
#> [1] "Nivel educativo"
dt_dict_wide_list
#> $age
#> [1] "Edad (años)"
#>
#> $edu
#> [1] "Nivel educativo"
#>
#> $sex
#> [1] "Sexo de participantes"
# assing data labels ------------------------------------------------------
dt_nolab <- dt_nolab %>%
mutate(log=c(rep(T,5),F)) %>%
select(sex,log,age,edu)
# limitante:
# requieres tener todas las variables de la lista en la base de datos
# al activar select(-sex) se obtiene error
Hmisc::contents(dt_nolab)
#>
#> Data frame:dt_nolab 6 observations and 4 variables Maximum # NAs:0
#>
#>
#> Levels Storage
#> sex 2 integer
#> log logical
#> age double
#> edu 2 integer
#>
#> +--------+-------------------+
#> |Variable|Levels |
#> +--------+-------------------+
#> | sex |f,m |
#> +--------+-------------------+
#> | edu |primaria,secundaria|
#> +--------+-------------------+
dt_nolab_newlab <- dt_nolab %>%
mutate(log=c(rep(T,5),F)) %>%
select(sex,log,age,edu) %>%
#select(-sex) %>% #limitante
#set_variable_labels(.labels = dt_dict_list) #en caso 1
set_variable_labels(.labels = dt_dict_wide_list) #en caso 2
Hmisc::contents(dt_nolab_newlab)
#>
#> Data frame:dt_nolab_newlab 6 observations and 4 variables Maximum # NAs:0
#>
#>
#> Labels Levels Storage
#> sex Sexo de participantes 2 integer
#> log logical
#> age Edad (años) double
#> edu Nivel educativo 2 integer
#>
#> +--------+-------------------+
#> |Variable|Levels |
#> +--------+-------------------+
#> | sex |f,m |
#> +--------+-------------------+
#> | edu |primaria,secundaria|
#> +--------+-------------------+
look_for(dt_nolab_newlab) %>% as_tibble()
#> # A tibble: 4 x 2
#> variable label
#> <chr> <chr>
#> 1 sex Sexo de participantes
#> 2 log <NA>
#> 3 age Edad (años)
#> 4 edu Nivel educativo
```
<sup>Created on 2019-08-08 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment