Skip to content

Instantly share code, notes, and snippets.

@8bit-pixies
Created December 14, 2012 08:50
Show Gist options
  • Save 8bit-pixies/4283737 to your computer and use it in GitHub Desktop.
Save 8bit-pixies/4283737 to your computer and use it in GitHub Desktop.
regex example showing one possible way of dealing with "unstructured-like" data within SAS.
data test;
input a $1-200;
cards;
Organisation trades in a high risk country China, [JURIS: CHN] and is
also involed in charity [INDUSTRY: CHARITY]. Customers acquisition
date is [ACQUISITION_DATE: 10JUL1999]
Customer A is a [JOB: SOLE TRADER]
;
run;
data test1;
set test;
format testing1-testing10 $100.;
drop re start stop position length i;
array testing{10} testing1-testing10;
re = prxparse('/\[([\w\s:-]+)\]/');
start = 1;
stop = length(a);
call prxnext(re, start, stop, a, position, length);
i = 0;
do while (position > 0);
i= i+1;
testing{i} = substr(a, position+1, length-2);
/*shifted by 1 for the first bracket, -1 to undo the shift and -1 to
remove last bracket*/
*put found= position= length=;
call prxnext(re, start, stop, a, position, length);
end;
output;
run;
proc sort data = test1;
by a;
run;
proc transpose data = test1 out = test2;
var testing1-testing3;
by a;
run;
data test3;
set test2;
re = prxparse("/(.*?):(.*)/");
drop re name_re col1;
name_re = prxmatch(re,COL1);
field_name = prxposn(re,1,COL1);
names = strip(prxposn(re,2,COL1));
run;
/*re-transpose*/
proc transpose data = test3 out = test4 (drop = _NAME_);
by a;
id field_name;
var names;
run;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment