Skip to content

Instantly share code, notes, and snippets.

@josephlei
Created April 16, 2015 18:42
Show Gist options
  • Save josephlei/37e60921af9ff4733f27 to your computer and use it in GitHub Desktop.
Save josephlei/37e60921af9ff4733f27 to your computer and use it in GitHub Desktop.
SAS Macro definition for splitting one large file into smaller ones based on a key column containing discrete values
OPTIONS NOMPRINT MCOMPILENOTE=ALL NOSYMBOLGEN NOMLOGIC;
DATA SASUSER.COUNTY_SAWS_REF;
LENGTH COUNTY $13 SAWS $3;
INFILE DATALINES DSD DLM=",";
INPUT COUNTY $ SAWS $;
DATALINES;
ALAMEDA,CW
ALPINE,CIV
AMADOR,CIV
BUTTE,CIV
CALAVERAS,CIV
COLUSA,CIV
CONTRACOSTA,CW
DELNORTE,CIV
ELDORADO,CIV
FRESNO,CW
GLENN,CIV
HUMBOLDT,CIV
IMPERIAL,CIV
INYO,CIV
KERN,CIV
KINGS,CIV
LAKE,CIV
LASSEN,CIV
LOSANGELES,LDR
MADERA,CIV
MARIN,CIV
MARIPOSA,CIV
MENDOCINO,CIV
MERCED,CIV
MODOC,CIV
MONO,CIV
MONTEREY,CIV
NAPA,CIV
NEVADA,CIV
ORANGE,CW
PLACER,CW
PLUMAS,CIV
RIVERSIDE,CIV
SACRAMENTO,CW
SANBENITO,CIV
SANBERNARDINO,CIV
SANDIEGO,CW
SANFRANCISCO,CW
SANJOAQUIN,CIV
SANLUISOBISPO,CW
SANMATEO,CW
SANTABARBARA,CW
SANTACLARA,CW
SANTACRUZ,CW
SHASTA,CIV
SIERRA,CIV
SISKIYOU,CIV
SOLANO,CW
SONOMA,CW
STANISLAUS,CIV
SUTTER,CIV
TEHAMA,CIV
TRINITY,CIV
TULARE,CW
TUOLUMNE,CIV
VENTURA,CW
YOLO,CW
YUBA,CIV
;
RUN;
*CREATE MVARS;
DATA _NULL_;
SET SASUSER.COUNTY_SAWS_REF END=EOF;
CALL SYMPUTX("SAWSREF"||LEFT(_N_), SAWS,"G");
CALL SYMPUTX("COUNTYREF"||LEFT(_N_), LEFT(COUNTY),"G");
*CREATE MACRO VAR "SAWSREF1 TO SAWSREF58" THAT HOLDS CIV, CW, LDR;
*CREATE MACRO VARS "COUNTYREF1 TO COUNTYREF58 THAT HOLDS COMPRESSED COUNTY NAMES;
*QA POINT, IF COUNTREFS (_N_ VAR) > 58 DISCRETE VALUES WILL TRIGGER A WARNING;
IF EOF THEN
DO;
CALL SYMPUTX("COUNTREFS", _N_);
IF _N_ > 58 THEN
PUT "ERROR: TOO MANY MVARS, EXAMINE CAREFULLY!";
END;
RUN;
%MACRO EXPORTCOUNTIES(SETNAME2=,OUTPATH2=G:\);
%DO I=1 %TO 58;
PROC EXPORT DATA=&&COUNTYREF&I
OUTFILE="&OUTPATH2\&SETNAME2._&&SAWSREF&I.._&&COUNTYREF&I...CSV"
DBMS=CSV REPLACE;
RUN;
%END;
%MEND;
%macro split (
data = /*Name of SAS data set to split */ ,
var = /*Name of the variable you wish to split "by" */ ,
expected= 58 /*How many separate files are you expecting? */ ,
setname = output /*What is the name of the output "set" */ ,
outpath = G:\ /*Output dir, for example C:\Users\*/,
countysplit=N /*Are these split files to be exported by county? This option includes consortia specifiers (CW, CIV, LDR)*/,
export=N
);
*validate split key, eliminate zeros, null values, anything shorter than length 2;
data splitme errors;
set &data;
if length(&var) GE 2 then output splitme;
else output errors;
run;
*from "validated data," create discrete data set with unique values of split keys;
proc sort data=splitme (keep=&var) out=getkeysfromhere nodupkey;
by &var;
run;
*modify "getkeysfromhere" data set to include a compressed key;
data getkeysfromhere;
set getkeysfromhere end=eof;
length compressedkey $20;
compressedkey=upcase(compress(&var,,"nk"));
call symputx("splitvar"||left(_n_), compressedkey);
*create macro var "numsplitvar" that holds the number of distinct keys;
*QA point, any splitvar with >58 discrete values will trigger a warning;
if eof then do;
call symputx("numsplitvar", _n_);
if _n_ > &expected then put "ERROR: TOO MANY OUTPUTS, EXAMINE CAREFULLY!";
end;
run;
*QA debug point;
%put _user_;
data
%do i=1 %to &numsplitvar;
&&splitvar&i
%end;
;
set splitme end=eof;
select (upcase(compress(&var,,"nk")));
%do i=1 %to &numsplitvar;
when ("&&splitvar&i") output &&splitvar&i;
%end;
OTHERWISE ;
END;
run;
%if %sysfunc(upcase(&export))=Y %then %do;
%IF %sysfunc(upcase(&COUNTYSPLIT=Y)) %THEN %EXPORTCOUNTIES(SETNAME2=&SETNAME, OUTPATH2=&OUTPATH);
%ELSE %DO I=1 %TO &NUMSPLITVAR;
PROC EXPORT DATA=&&SPLITVAR&I
OUTFILE="&OUTPATH.\&SETNAME._&&SPLITVAR&I...CSV"
DBMS=CSV REPLACE;
RUN;
%END;
%end;
%MEND SPLIT;
%SPLIT(DATA=WORK.SETONE, VAR=COUNTY, EXPECTED=58, SETNAME=SETONERESULTS,COUNTYSPLIT=Y, EXPORT=Y, outpath=G:\);
/**********************************************************************************************
DOCUMENTATION AND CHANGE LOG:
24FEB2015, ADDED ABILITY TO USE CONSORTIA MODIFIERS VIA EXPORTCOUNTIES CODE
24FEB2015, SPECIAL CHAR PROBLEM RESOLVED VIA COMPRESS WITH NK FLAGS
08APR2015, NON COUNTY EXPORT MISSING A BACKSLASH, FIXED IN %ELSE %DO STATEMENT, OK TO GO
**********************************************************************************************/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment