Skip to content

Instantly share code, notes, and snippets.

@statgeek
Created December 20, 2017 16:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save statgeek/eb8228cdf07cb3f3ee5f1494e4c3b10f to your computer and use it in GitHub Desktop.
Save statgeek/eb8228cdf07cb3f3ee5f1494e4c3b10f to your computer and use it in GitHub Desktop.
SAS - take a sentence, split into individual words and find all two word combinations
*Create sample data;
data random_sentences;
infile cards truncover;
informat sentence $256.;
input sentence $256.;
cards;
This is a random sentence
This is another random sentence
Happy Birthday
My job sucks.
This is a good idea, not.
This is an awesome idea!
How are you today?
Does this make sense?
Have a great day!
;
;
;
;
*Partition into words;
data f1;
set random_sentences;
id=_n_;
nwords=countw(sentence);
nchar=length(compress(sentence));
do word_order=1 to nwords;
word=scan(sentence, word_order);
output;
end;
run;
proc sql;
create table words2 as
select t1.sentence, lowcase(t1.word) as word1, lowcase(t2.word) as word2
from f1 as t1
cross join f1 as t2
where t1.sentence=t2.sentence
and t1.word_order > t2.word_order
order by t1.sentence, t1.word_order;
quit;
proc freq data=words2 noprint order=freq;
table word1*word2 /list out=want;
run;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment