Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@apoorvalal
Last active April 13, 2022 19:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save apoorvalal/b7d69d451dfa262a43532ac9baaf1681 to your computer and use it in GitHub Desktop.
Save apoorvalal/b7d69d451dfa262a43532ac9baaf1681 to your computer and use it in GitHub Desktop.
walk through dir with stata files and export codebook
*! v0.1 Apoorva Lal
// returns dataset with dataset's <describe> output
// useful for very wide datasets / datasets with useless/uninformative
// variable names
// e.g. USAID's Demographic and Health Surveys
pr define varlabelbook
syntax [anything]
qui compress
qui ds `anything'
loc vl "`r(varlist)'"
loc nrows: word count `vl'
forv i = 1/`nrows' {
loc vname_`i' :word `i' of `vl'
loc var_label_`i':var l `vname_`i''
loc type_`i' :type `vname_`i''
if inlist("`type_`i''", "byte","int") {
count_uniq `vname_`i''
loc distinct_vals_`i' = `r(nv)'
if `r(nv)' <= 20 {
loc la_`vname_`i'': val l `vname_`i''
loc label_exists = cond("`la_`vname_`i'''" != "",1,0)
if `label_exists'{
loc values_and_labels_`i' ""
qui levelsof `vname_`i'', loc(`vname_`i''_l) clean
foreach val in ``vname_`i''_l' {
loc vlab: label `la_`vname_`i''' `val'
loc curr_value_label = subinstr("`vlab'",`"""',"",.) // " for pesky embedded quotes
loc values_and_labels_`i' "`values_and_labels_`i''; `val': `curr_value_label'"
}
}
}
}
else {
if "`=substr("`type_`i''",1,3)'" == "str" { // "
count_uniq `vname_`i''
loc distinct_vals_`i' = `r(nv)'
}
else {
loc distinct_vals_`i' = 99999 // set number of distinct values to 99999 if
}
}
}
qui count
loc nobs_tot `r(N)'
clear
set obs `nrows'
qui g variable = ""
qui g type = ""
qui g var_label = ""
qui g distinct_vals = .
qui g vals_and_labels = ""
qui replace variable = "NObs" if _n == 1
qui replace distinct_vals = `nobs_tot' if _n == 1
forv i = 2/`nrows' {
qui replace variable = "`vname_`i''" if _n == `i'
qui replace var_label = "`var_label_`i''" if _n == `i'
qui replace distinct_vals = `distinct_vals_`i'' if _n == `i'
qui replace type = "`type_`i''" if _n == `i'
qui replace vals_and_labels = "`values_and_labels_`i''" if _n == `i'
}
end
clear all
gl root "C:/Users/alal/Downloads/STAR01_PUBLIC"
cd $root
gl files: dir "$root" files "*.dta"
foreach f in $files {
loc f_cb = subinstr("`f'",".dta","_cb.csv",.)
use "`f'", clear
varlabelbook // relies on https://github.com/apoorvalal/misc_stata_ados/blob/master/varlabelbook.ado
export delimited using "`f_cb'", replace
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment