Last active
January 5, 2019 03:51
-
-
Save statgeek/2de1faf1644dc8160fe721056202f111 to your computer and use it in GitHub Desktop.
SAS - Summarize data into missing and non missing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*This program creates a report with the number and percent of | |
missing data for each variable in the data set. | |
The ony change should be to the macro variable, INPUT_DSN. | |
Author: F. Khurshed | |
Date: 2019-01-04 | |
*/ | |
*create sample data to work with; | |
data class; | |
set sashelp.class; | |
if age=14 then | |
call missing(height, weight, sex); | |
if name='Alfred' then | |
call missing(sex, age, height); | |
label age="Fancy Age Label"; | |
run; | |
*set input data set name; | |
%let INPUT_DSN = class; | |
%let OUTPUT_DSN = want; | |
*create format for missing; | |
proc format; | |
value $ missfmt ' '="Missing" other="Not Missing"; | |
value nmissfmt .="Missing" other="Not Missing"; | |
run; | |
*Proc freq to count missing/non missing; | |
ods select none; | |
*turns off the output so the results do not get too messy; | |
ods table onewayfreqs=temp; | |
proc freq data=&INPUT_DSN.; | |
table _all_ / missing; | |
format _numeric_ nmissfmt. _character_ $missfmt.; | |
run; | |
ods select all; | |
*Format output; | |
data long; | |
length variable $32. variable_value $50.; | |
set temp; | |
Variable=scan(table, 2); | |
Variable_Value=strip(trim(vvaluex(variable))); | |
presentation=catt(frequency, " (", trim(put(percent/100, percent7.1)), ")"); | |
keep variable variable_value frequency percent cum: presentation; | |
label variable='Variable' variable_value='Variable Value'; | |
run; | |
proc sort data=long; | |
by variable; | |
run; | |
*make it a wide data set for presentation, with values as N (Percent); | |
proc transpose data=long out=wide_presentation (drop=_name_); | |
by variable; | |
id variable_value; | |
var presentation; | |
run; | |
*transpose only N; | |
proc transpose data=long out=wide_N prefix=N_; | |
by variable; | |
id variable_value; | |
var frequency; | |
run; | |
*transpose only percents; | |
proc transpose data=long out=wide_PCT prefix=PCT_; | |
by variable; | |
id variable_value; | |
var percent; | |
run; | |
*final output file; | |
data &Output_DSN.; | |
merge wide_N wide_PCT wide_presentation; | |
by variable; | |
drop _name_; | |
label N_Missing='# Missing' N_Not_Missing='# Not Missing' | |
PCT_Missing='% Missing' N_Not_Missing='% Not Missing' Missing='Missing' | |
Not_missing='Not Missing'; | |
run; | |
title "Missing Report of &INPUT_DSN."; | |
proc print data=&output_dsn. noobs label; | |
run; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment