Last active
November 5, 2024 10:51
-
-
Save mbacou/6285959 to your computer and use it in GitHub Desktop.
How to read and write STATA variable and value labels from R.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using read.dta() to load a sample STATA data file | |
dta <- read.dta("my_toy_data.dta") | |
# Let's describe dta | |
str(dta) | |
#'data.frame': 95202 obs. of 13 variables: | |
# $ hh : num 1.01e+09 1.01e+09 1.01e+09 1.01e+09 1.01e+09 ... | |
#$ h14aq2 : Factor w/ 61 levels "matooke","matooke",..: 50 51 34 47 57 41 22 1 13 53 ... | |
#$ h14aq3 : Factor w/ 93 levels "Kilogram (kg)",..: 51 26 85 1 NA 1 55 70 1 33 ... | |
#$ h14aq4 : num 0.5 4 4 1 NA 1.5 5 3 4 10 ... | |
#$ h14aq5 : num 125 2000 800 1600 1000 1200 1500 3000 2800 1000 ... | |
#$ h14aq6 : num NA NA NA NA NA NA NA NA NA NA ... | |
#$ h14aq7 : num NA NA NA NA NA NA NA NA NA NA ... | |
#$ h14aq8 : num NA NA NA NA NA NA NA NA NA NA ... | |
#$ h14aq9 : num NA NA NA NA NA NA NA NA NA NA ... | |
#$ h14aq10: num NA NA NA NA NA NA 4 NA NA NA ... | |
#$ h14aq11: num NA NA NA NA NA NA 1200 NA NA NA ... | |
#$ h14aq12: num 250 500 200 1600 NA 800 300 1000 700 100 ... | |
#$ h14aq13: num NA NA NA NA NA NA NA NA NA NA ... | |
#- attr(*, "datalabel")= chr "" | |
#- attr(*, "time.stamp")= chr "" | |
#- attr(*, "formats")= chr "%11.0f" "%8.0f" "%8.0f" "%8.2f" ... | |
#- attr(*, "types")= int 102 102 102 102 102 102 102 102 102 102 ... | |
#- attr(*, "val.labels")= chr "" "h14aq2" "h14aq3" "" ... | |
#- attr(*, "var.labels")= chr "household code" "code" "unit of quantity" "hh consumption out of purchase.quantity" ... | |
#- attr(*, "version")= int 7 | |
#- attr(*, "label.table")=List of 2 | |
#..$ h14aq2: Named num 101 102 103 104 105 106 107 108 109 110 ... | |
#.. ..- attr(*, "names")= chr "matooke" "matooke" "matooke" "matooke" ... | |
#..$ h14aq3: Named num 1 2 3 4 5 6 7 8 9 10 ... | |
#.. ..- attr(*, "names")= chr "Kilogram (kg)" "Gram" "Litre" "Small cup with handle (Akendo)" ... | |
# "val.labels", "var.labels", "label.table" are attributes specific to the STATA .dta format | |
# In this example they contain: | |
attr(dta, "var.labels") | |
#[1] "household code" | |
#[2] "code" | |
#[3] "unit of quantity" | |
#[4] "hh consumption out of purchase.quantity" | |
#[5] "hh consumption out of purchase. value" | |
#[6] "consumption out of purchase away from home.quantity" | |
#[7] "consumption out of purchase away from home.value" | |
#[8] "consumption out of home produce quantity" | |
#[9] "consumption out of home produce value" | |
#[10] "received in kind.quantity" | |
#[11] "received in kind.value" | |
#[12] "market price" | |
#[13] "farm gate price" | |
attr(dta, "val.labels") | |
#[1] "" "h14aq2" "h14aq3" "" "" "" "" "" | |
#[9] "" "" "" "" "" | |
# So there are value labels attached to columns 2:3 ("code" and "unit of quantity") | |
attr(dta, "label.table")$h14aq2 | |
#matooke matooke | |
#101 102 | |
#matooke matooke | |
#103 104 | |
#sweet potatoes fresh sweet potatoes dry | |
#105 106 | |
#cassava fresh cassava dry/flour | |
#107 108 | |
# etc... | |
attr(dta, "label.table")$h14aq3 | |
#Kilogram (kg) Gram | |
#1 2 | |
#Litre Small cup with handle (Akendo) | |
#3 4 | |
#Metre Square metre | |
#5 6 | |
#Yard Millilitre | |
#7 8 | |
# etc... | |
# Attach variable and value labels to a new R data.frame | |
attr(df, "var.labels") <- c("my var label 1", "my var label 2") | |
# To create and attach value labels | |
val1 <- c(apple=10, beans=20) | |
val2 <- c(kg=1, bag=2) | |
attr(df, "label.table") <- list(val1, val2) | |
attr(df, "val.labels") <- c("val1", "val2") | |
# And finally describe the dataset | |
attr(df, "datalabel") <- "Collected from MWI 2013" | |
# Author and date are nice to have too | |
attr(df, "time.stamp") <- Sys.Date() | |
attr(df, "author") <- "Melanie Bacou, mel@mbacou.com" | |
# When using write.dta(df), STATA will read in the attached datalabel, time.stamp, and value labels. | |
# Depending on STATA version it might also read in the variable labels (I'm having mixed luck here). | |
Having the same problem - I use write_dta from haven but cannot seem to make the value labels work (saved in a specific attribute in R).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@florenzion I came across your post as I am having trouble exporting value labels from R->STATA as well. Did you ever find a solution to this?