Created
December 22, 2016 21:05
-
-
Save statgeek/b308ac2cfc9b4db0ee3d793567627af0 to your computer and use it in GitHub Desktop.
Calculate Chi Square for a summary table in SAS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Author: F. Khurshed | |
Date: June 17, 2011 | |
Purpose: This macro creates a set of chisquare tables appended together. It includes the chisquare value. | |
January 17, 2012 | |
*Adding in the ability for the macro to add the total column | |
February 6, 2012 | |
*Add in row of overall by cross category | |
*/ | |
%macro summary_table(dsetin, varlist, varcross, rc, missing, dsetout); | |
/* | |
dsetin= data set in | |
varlist= varlist to perform the operations | |
varcross=var to be across the top row | |
rc=row or column, depending on what you want the percentages to be | |
dsetout=name of output dataset | |
*/ | |
/*For testing, macro variables settings: | |
%let dsetin=sashelp.class; | |
%let var=age; | |
%let rc=col; | |
%let varcross=sex; | |
%let missing=missing; | |
%let dsetout=test; | |
*/ | |
*delete old dataset; | |
proc datasets nodetails nolist; | |
delete &dsetout; | |
quit; | |
*Add in overall row at the top and the totals; | |
ods select none; | |
proc freq data=&dsetin; | |
table &varcross/chisq out=temp1; | |
ods output OneWayChiSq=chisq; | |
run; | |
data temp2; | |
set temp1 end=eof; | |
length value $50.; | |
value=cats(left(count),"(",put(percent, 8.1),"%)"); | |
var='Overall'; | |
keep var &varcross count value; | |
run; | |
*Transpose data to better structure; | |
proc transpose data=temp2 out=temp3 (drop = _name_ ) prefix=&varcross._; | |
var value; | |
id &varcross; | |
run; | |
*Get total count; | |
proc sql noprint; | |
select count(*) into :tot_count | |
from &dsetin; | |
quit; | |
*Get overall total; | |
proc sql noprint; | |
select count(*) into :tot_count | |
from &dsetin; | |
quit; | |
*Get P-Value from chisq table; | |
proc sql noprint; | |
select nvalue1 into :p_value | |
from chisq | |
where name1='P_PCHI'; | |
quit; | |
data &dsetout; | |
set temp3; | |
length vart $50 category $50.; | |
vart='Overall'; | |
category='N(%)'; | |
overall="&tot_count."||" (100.0%)"; | |
chisq=&p_value; | |
run; | |
/**/ | |
/*proc datasets nodetails nolist;*/ | |
/* delete temp1-temp4 chisq;*/ | |
/*quit;*/ | |
*loop through variable list; | |
%let i=1; | |
%do %while (%scan(&varlist, &i, " ") ^=%str()); | |
%let var=%scan(&varlist, &i, " "); | |
%put &i &var &rc &varcross; | |
data _null_; | |
set &dsetin; | |
call symput("var_fmt", vformat(&var)); | |
run; | |
*Get frequency counts and stats; | |
ods select none; | |
proc freq data=&dsetin; | |
tables &var.*&varcross./out=temp1 chisq outpct &missing; | |
ods output chisq=chisq; | |
run; | |
*recode missing and concat into one variable; | |
data temp2; | |
set temp1; | |
length value $50.; | |
value=cats(left(count),"(",put(pct_&rc, 8.1),"%)"); | |
keep &var &varcross count value; | |
run; | |
proc sort data=temp2; | |
by &var &varcross; | |
run; | |
*Transpose data to better structure; | |
proc transpose data=temp2 out=temp3 (drop = _name_ ) prefix=&varcross._; | |
by &var.; | |
var value; | |
id &varcross; | |
run; | |
*Get the overall counts for each dataset as well; | |
proc freq data=&dsetin; | |
tables &var/out=overall1 &missing; | |
run; | |
data overall2; | |
length overall $50.; | |
set overall1; | |
overall=cats(left(count),"(",put(percent, 8.1),"%)"); | |
keep &var overall; | |
run; | |
*Merge in the overall column; | |
data temp3a; | |
merge temp3 overall2; | |
by &var.; | |
run; | |
*Add in the test stats; | |
data temp4; | |
length vart $256. category $256.; | |
set temp3a; | |
if _n_=1 then do; | |
set chisq (where=(Statistic="Chi-Square") keep=prob statistic rename=(prob=chisq)); | |
end; | |
vart=vlabel(&var.); | |
category=put(&var., &var_fmt); | |
drop &var. statistic; | |
run; | |
*Append datasets; | |
proc append data=temp4 base=&dsetout force; | |
run; | |
/*drop temp tables so theres no accidents*/ | |
proc datasets nodetails nolist; | |
delete temp1-temp4 chisq; | |
quit; | |
*Increment counter; | |
%let i=%eval(&i+1); | |
%end; | |
data &dsetout; | |
set &dsetout; | |
length p_value $8.; | |
by vart notsorted; | |
if first.vart then do; | |
test='Chi Square'; | |
p_value=put(chisq, 8.4); | |
end; | |
drop chisq; | |
run; | |
ods select all; | |
%mend summary_table; | |
data cars; | |
set sashelp.cars; | |
call streaminit(100); | |
group= rand('bernoulli', 0.5); | |
run; | |
%summary_table(dsetin=cars, dsetout=test, varlist=cylinders origin Type, | |
varcross=Group, rc=c, missing=missing); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment