Skip to content

Instantly share code, notes, and snippets.

@statgeek
Created December 22, 2016 21:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save statgeek/b308ac2cfc9b4db0ee3d793567627af0 to your computer and use it in GitHub Desktop.
Save statgeek/b308ac2cfc9b4db0ee3d793567627af0 to your computer and use it in GitHub Desktop.
Calculate Chi Square for a summary table in SAS
/*
Author: F. Khurshed
Date: June 17, 2011
Purpose: This macro creates a set of chisquare tables appended together. It includes the chisquare value.
January 17, 2012
*Adding in the ability for the macro to add the total column
February 6, 2012
*Add in row of overall by cross category
*/
%macro summary_table(dsetin, varlist, varcross, rc, missing, dsetout);
/*
dsetin= data set in
varlist= varlist to perform the operations
varcross=var to be across the top row
rc=row or column, depending on what you want the percentages to be
dsetout=name of output dataset
*/
/*For testing, macro variables settings:
%let dsetin=sashelp.class;
%let var=age;
%let rc=col;
%let varcross=sex;
%let missing=missing;
%let dsetout=test;
*/
*delete old dataset;
proc datasets nodetails nolist;
delete &dsetout;
quit;
*Add in overall row at the top and the totals;
ods select none;
proc freq data=&dsetin;
table &varcross/chisq out=temp1;
ods output OneWayChiSq=chisq;
run;
data temp2;
set temp1 end=eof;
length value $50.;
value=cats(left(count),"(",put(percent, 8.1),"%)");
var='Overall';
keep var &varcross count value;
run;
*Transpose data to better structure;
proc transpose data=temp2 out=temp3 (drop = _name_ ) prefix=&varcross._;
var value;
id &varcross;
run;
*Get total count;
proc sql noprint;
select count(*) into :tot_count
from &dsetin;
quit;
*Get overall total;
proc sql noprint;
select count(*) into :tot_count
from &dsetin;
quit;
*Get P-Value from chisq table;
proc sql noprint;
select nvalue1 into :p_value
from chisq
where name1='P_PCHI';
quit;
data &dsetout;
set temp3;
length vart $50 category $50.;
vart='Overall';
category='N(%)';
overall="&tot_count."||" (100.0%)";
chisq=&p_value;
run;
/**/
/*proc datasets nodetails nolist;*/
/* delete temp1-temp4 chisq;*/
/*quit;*/
*loop through variable list;
%let i=1;
%do %while (%scan(&varlist, &i, " ") ^=%str());
%let var=%scan(&varlist, &i, " ");
%put &i &var &rc &varcross;
data _null_;
set &dsetin;
call symput("var_fmt", vformat(&var));
run;
*Get frequency counts and stats;
ods select none;
proc freq data=&dsetin;
tables &var.*&varcross./out=temp1 chisq outpct &missing;
ods output chisq=chisq;
run;
*recode missing and concat into one variable;
data temp2;
set temp1;
length value $50.;
value=cats(left(count),"(",put(pct_&rc, 8.1),"%)");
keep &var &varcross count value;
run;
proc sort data=temp2;
by &var &varcross;
run;
*Transpose data to better structure;
proc transpose data=temp2 out=temp3 (drop = _name_ ) prefix=&varcross._;
by &var.;
var value;
id &varcross;
run;
*Get the overall counts for each dataset as well;
proc freq data=&dsetin;
tables &var/out=overall1 &missing;
run;
data overall2;
length overall $50.;
set overall1;
overall=cats(left(count),"(",put(percent, 8.1),"%)");
keep &var overall;
run;
*Merge in the overall column;
data temp3a;
merge temp3 overall2;
by &var.;
run;
*Add in the test stats;
data temp4;
length vart $256. category $256.;
set temp3a;
if _n_=1 then do;
set chisq (where=(Statistic="Chi-Square") keep=prob statistic rename=(prob=chisq));
end;
vart=vlabel(&var.);
category=put(&var., &var_fmt);
drop &var. statistic;
run;
*Append datasets;
proc append data=temp4 base=&dsetout force;
run;
/*drop temp tables so theres no accidents*/
proc datasets nodetails nolist;
delete temp1-temp4 chisq;
quit;
*Increment counter;
%let i=%eval(&i+1);
%end;
data &dsetout;
set &dsetout;
length p_value $8.;
by vart notsorted;
if first.vart then do;
test='Chi Square';
p_value=put(chisq, 8.4);
end;
drop chisq;
run;
ods select all;
%mend summary_table;
data cars;
set sashelp.cars;
call streaminit(100);
group= rand('bernoulli', 0.5);
run;
%summary_table(dsetin=cars, dsetout=test, varlist=cylinders origin Type,
varcross=Group, rc=c, missing=missing);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment