statgeek/Two Word Combinations.sas

## Two Word Combinations.sas
*Create sample data;
data random_sentences;
	infile cards truncover;
	informat sentence $256.;
	input sentence $256.;
	cards;
This is a random sentence
This is another random sentence
Happy Birthday
My job sucks.
This is a good idea, not.
This is an awesome idea!
How are you today?
Does this make sense?
Have a great day!
;

;
;
;
*Partition into words;
data f1;
	set random_sentences;
	id=_n_;
	nwords=countw(sentence);
	nchar=length(compress(sentence));

	do word_order=1 to nwords;
		word=scan(sentence, word_order);
		output;
	end;
run;

proc sql;
	create table words2 as
		select t1.sentence, lowcase(t1.word) as word1, lowcase(t2.word) as word2
			from f1 as t1
				cross join f1 as t2
			where t1.sentence=t2.sentence
				and t1.word_order > t2.word_order
			order by t1.sentence, t1.word_order;
quit;

proc freq data=words2 noprint order=freq;
	table word1*word2 /list out=want;
run;
	*Create sample data;
	data random_sentences;
	infile cards truncover;
	informat sentence $256.;
	input sentence $256.;
	cards;
	This is a random sentence
	This is another random sentence
	Happy Birthday
	My job sucks.
	This is a good idea, not.
	This is an awesome idea!
	How are you today?
	Does this make sense?
	Have a great day!
	;

	;
	;
	;
	*Partition into words;
	data f1;
	set random_sentences;
	id=_n_;
	nwords=countw(sentence);
	nchar=length(compress(sentence));

	do word_order=1 to nwords;
	word=scan(sentence, word_order);
	output;
	end;
	run;

	proc sql;
	create table words2 as
	select t1.sentence, lowcase(t1.word) as word1, lowcase(t2.word) as word2
	from f1 as t1
	cross join f1 as t2
	where t1.sentence=t2.sentence
	and t1.word_order > t2.word_order
	order by t1.sentence, t1.word_order;
	quit;

	proc freq data=words2 noprint order=freq;
	table word1*word2 /list out=want;
	run;