Skip to content

Instantly share code, notes, and snippets.

@beugley
Created November 18, 2019 16:59
Show Gist options
  • Save beugley/c62f4cd496c32e2684f3a98c59b79a27 to your computer and use it in GitHub Desktop.
Save beugley/c62f4cd496c32e2684f3a98c59b79a27 to your computer and use it in GitHub Desktop.
SAS script to convert a data set to text format
*options mprint mlogic symbolgen;
options errorabend fullstimer compress=binary;
options nofmterr validvarname=any;
/*
** SAS script to convert a data set to text-delimited format. The output file
** will be written to either the same directory where the SAS data set resides
** or to a user-specified file.
**
** sysparm must contain 5 :-separated values:
** 1) the name of a SAS data set, including full path and extension.
** 2) the name of the output file; leave this null and the output will
** be written to the same directory where the SAS data set resides using
** the same name, but with an extension that reflects the delimiter,
** instead of .sas7bdat.
** 3) the number of rows in the subset; specify MAX or nothing to convert
** all rows.
** 4) the delimiter for the output file. Valid options are "comma", "tab",
** "ctrl-A", and "vertical-bar". Default is tab.
** 5) Optional unique path and name to use for temp data set. If set, then
** it must be unique for concurrent runs. If not set, then the temp
** data set will be put in the WORK dir.
** Examples: /prod/user/sam/lob/directory/file.sas7bdat:/tmp/output.tsv:10000::
** /prod/user/sam/lob/directory/file.sas7bdat::MAX::
** /prod/user/sam/lob/directory/file.sas7bdat::10000:ctrl-A:
*/
%put sysparm: "&sysparm";
%let SASFILE = %sysfunc(scan(&sysparm,1,:,m));
%let OUTFILE = %sysfunc(scan(&sysparm,2,:,m));
%let ROWCOUNT = %sysfunc(scan(&sysparm,3,:,m));
%let DELIMITER = %sysfunc(scan(&sysparm,4,:,m));
%let TEMP_DS = %sysfunc(scan(&sysparm,5,:,m));
%put SASFILE: "&SASFILE";
%put OUTFILE: "&OUTFILE";
%put ROWCOUNT: "&ROWCOUNT";
%put DELIMITER: "&DELIMITER";
%put TEMP_DS: "&TEMP_DS";
/*
** Parse the path, filename, and data set name from &SASFILE.
*/
%let FILENAME = %sysfunc(scan(&SASFILE,-1,/));
%let DSLEN = %eval(%sysfunc(find(&FILENAME,.sas7bdat))-1);
%let DSNAME = %sysfunc(substr(&FILENAME,1,&DSLEN));
%let PATHLEN = %eval(%sysfunc(find(&SASFILE,&FILENAME))-1);
%let PATHNAME = %sysfunc(substr(&SASFILE,1,&PATHLEN));
%put FILENAME: &FILENAME;
%put DSNAME: &DSNAME;
%put PATHNAME: &PATHNAME;
%macro set_delimiter;
%global EXTENSION;
%if ("&DELIMITER" = "" or "&DELIMITER" = "tab") %then
%do;
%let DELIMITER = '09'x;
%let EXTENSION = tsv;
%end;
%else %if ("&DELIMITER" = "ctrl-A") %then
%do;
%let DELIMITER = '01'x;
%let EXTENSION = txt;
%end;
%else %if ("&DELIMITER" = "vertical-bar") %then
%do;
%let DELIMITER = '|';
%let EXTENSION = txt;
%end;
%else %if ("&DELIMITER" = "comma") %then
%do;
%let DELIMITER = ',';
%let EXTENSION = csv;
%end;
%else
%do;
%put ERROR: Invalid delimiter '&DELIMITER';
%abort abend 5;
%end;
%mend set_delimiter;
%set_delimiter;
%put DELIMITER: "&DELIMITER";
%put EXTENSION: "&EXTENSION";
%macro set_outfile;
%if ("&OUTFILE" = "") %then
%do;
%let OUTFILE = &PATHNAME./&DSNAME..&EXTENSION;
%end;
%mend set_outfile;
%set_outfile;
%put OUTFILE: "&OUTFILE";
%macro set_rowcount;
%if ("&ROWCOUNT" = "") %then
%do;
%let ROWCOUNT = MAX;
%end;
%mend set_rowcount;
%set_rowcount;
%put ROWCOUNT: "&ROWCOUNT";
%macro set_temp_ds;
%global TEMP_DSNAME;
%if ("&TEMP_DS" = "") %then
%do;
%let TEMP_DSNAME = WORK.&DSNAME;
%end;
%else
%do;
%let TEMP_DSNAME = %sysfunc(scan(&TEMP_DS,-1,/));
%let TEMP_PATHLEN = %eval(%sysfunc(find(&TEMP_DS,&TEMP_DSNAME))-1);
%let TEMP_PATHNAME = %sysfunc(substr(&TEMP_DS,1,&TEMP_PATHLEN));
%let TEMP_DSNAME=WORKDIR.&TEMP_DSNAME;
libname WORKDIR "&TEMP_PATHNAME";
%end;
%mend set_temp_ds;
%set_temp_ds;
%put TEMP_DSNAME: "&TEMP_DSNAME";
libname SAM "&PATHNAME";
/*
** If an index is missing or the data set is otherwise damaged, fix it before
** continuing.
** COMMENTED-OUT BECAUSE THIS FAILS IF THE SAS DATA SET WAS CREATED ON A
** DIFFERENT ARCHITECTURE.
proc datasets library=SAM;
repair &DSNAME;
quit;
*/
/*
** Get number of variables, numeric-variables, and character-variables in the
** data set.
*/
proc sql noprint;
select strip(putn(count(*),'6.')) into :NUM_COLUMNS
from sashelp.vcolumn
where libname = 'SAM'
and upper(memname) = upper("&DSNAME");
select strip(putn(count(*),'6.')) into :NUM_NUMERIC_COLUMNS
from sashelp.vcolumn
where libname = 'SAM'
and upper(memname) = upper("&DSNAME")
and type = 'num';
select strip(putn(count(*),'6.')) into :NUM_CHAR_COLUMNS
from sashelp.vcolumn
where libname = 'SAM'
and upper(memname) = upper("&DSNAME")
and type = 'char';
quit;
%put NUM_COLUMNS: &NUM_COLUMNS;
%put NUM_NUMERIC_COLUMNS: &NUM_NUMERIC_COLUMNS;
%put NUM_CHAR_COLUMNS: &NUM_CHAR_COLUMNS;
/*
** Get lists of variables, numeric-variables, and character-variables.
*/
proc sql noprint;
select name into :COLS1 - :COLS&NUM_COLUMNS
from sashelp.vcolumn
where libname = 'SAM'
and upper(memname) = upper("&DSNAME")
order by varnum;
select name into :NUMERIC_COLS1 - :NUMERIC_COLS&NUM_NUMERIC_COLUMNS
from sashelp.vcolumn
where libname = 'SAM'
and upper(memname) = upper("&DSNAME")
and type = 'num'
order by varnum;
select name into :CHAR_COLS1 - :CHAR_COLS&NUM_CHAR_COLUMNS
from sashelp.vcolumn
where libname = 'SAM'
and upper(memname) = upper("&DSNAME")
and type = 'char'
order by varnum;
quit;
/*
** Create a temporary version of the data set with all delimiters,
** carriage-returns, and line-feeds converted to spaces.
** Use the BEST32 format for all numeric variables to ensure no loss of
** precision when exporting to text-delimited format.
** Remove formats from all character variables to ensure that the actual value
** is exported.
*/
%macro ConvertSpecialChars;
data &TEMP_DSNAME;
set SAM.&DSNAME(obs=&ROWCOUNT);
%do I = 1 %to &NUM_NUMERIC_COLUMNS;
format "&&NUMERIC_COLS&I"n BEST32.;
%end;
%do I = 1 %to &NUM_CHAR_COLUMNS;
format "&&CHAR_COLS&I"n;
"&&CHAR_COLS&I"n = translate("&&CHAR_COLS&I"n,' ',&DELIMITER.||'0a'x||'0d'x);
%end;
run;
%mend ConvertSpecialChars;
%ConvertSpecialChars;
/*
** Export the converted data set to TSV format.
** NOTE: This has been replaced by the %export macro because of proc-export
** limitations.
proc export data=&TEMP_DSNAME
outfile="&OUTFILE"
dbms=tab
replace;
run;
*/
/*
** Export the converted data set to text-delimited format.
** This steps exists because proc-export has a limit of LRECL=32767 which fails
** on data sets with very large numbers/lengths of columns.
*/
%macro export;
data _null_;
set &TEMP_DSNAME;
file "&OUTFILE" delimiter=&DELIMITER DSD DROPOVER LRECL=5000000;
* Print column names on the first iteration;
if _n_ = 1 then
do;
put
%do I = 1 %to &NUM_COLUMNS;
"&&COLS&I"
%if (&I < &NUM_COLUMNS) %then
%do;
&DELIMITER
%end;
%end;
;
end;
* Print all variables;
put (_all_) (+0);
run;
%mend export;
%export;
proc sql;
drop table &TEMP_DSNAME;
quit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment