Last active
September 17, 2021 13:45
-
-
Save einarwh/726bf397255a105f064bab01fb66c9cb to your computer and use it in GitHub Desktop.
Term frequency in PostScript (Exercises in Programming Style by Cristina Videira Lopes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% Read file input using a buffer of the specified size. | |
% Stack: Z fn -> A | |
% Z: Buffer size | |
% fn: File name | |
% A: Array of lines of input | |
/read-input-bufsize { % Z fn | |
[ % Z fn [ | |
3 1 roll % [ Z fn | |
(r) file % [ Z F | |
{ | |
dup % [ ... Z F F | |
2 index string % [ ... Z F F buf | |
readline % [ ... Z F (s) | |
{ | |
3 1 roll % [ ... (s) Z F | |
} | |
{ | |
3 1 roll % [ ... (s) Z F | |
closefile % [ ... (s) Z | |
pop % [ ... (s) | |
] % [ ... (s) ] | |
exit | |
} ifelse | |
} loop % Rs | |
} def | |
% Read file input with 80 char buffer. | |
% Stack: fn -> A | |
% fn: File name | |
% A: Array of lines of input | |
/read-text { % fn | |
80 exch read-input-bufsize % A | |
} def | |
% Read file input with 600 char buffer. | |
% Stack: fn -> S | |
% fn: File name | |
% S: String of stop words | |
/read-stop-words { % fn | |
600 exch read-input-bufsize % A | |
0 get % S | |
} def | |
% Map the provided operation over an array of elements. | |
% Stack: A {op} -> A' | |
% A: Array of elements | |
% {op} -> Operation to be called for each element e in A | |
% A': Array of mapped elements | |
/map-array { % A {op} | |
[ 3 1 roll % [ A {op} | |
exch % [ {op} A | |
{ % [ {op} e | |
1 index % [ {op} e {op} | |
exec % [ {op} r %%% r is result of executing {op} on e | |
exch % [ r {op} | |
} forall % [ ... {op} | |
pop % [ ... | |
] % [ r1 r2 ... ] => [ r1 r2 ... ] | |
} def | |
% Filter an array of elements using the provided predicate. | |
% Stack: A {p} -> A' | |
% A: Array of elements | |
% {p} -> Predicate to be called for each element e in A | |
% A': Array of elements that satisfy predicate {p} | |
/filter-array { % A {p} | |
[ 3 1 roll % [ A {p} | |
exch % [ {p} A | |
{ % [ {p} e | |
2 copy % [ {p} e {p} e | |
exch % [ {p} e e {p} | |
exec % [ {p} e include? | |
{ % [ {p} e %%% yes, keep e | |
exch % [ e {p} | |
} | |
{ % [ {p} e %%% no, reject e | |
pop % [ {p} | |
} ifelse % [ e? {p} | |
} forall % [ ... {p} | |
pop % [ ... | |
] % [ e... ] | |
} def | |
% Convert any uppercase letter to lowercase. | |
% The idea is to add the offset between uppercase and lowercase letters | |
% to any character that is "too small" to be a lowercase letter. | |
% Stack: S -> S' | |
% S: Input string | |
% S': Result string, with uppercase letters transformed to lowercase | |
/upper-to-lower { % S | |
dup length % S L | |
0 1 % S L 0 1 | |
3 -1 roll % S 0 1 L | |
1 sub % S 0 1 L-1 | |
{ % S i | |
2 copy get % S i ch | |
dup % S i ch ch | |
(a) 0 get % S i ch ch 'a' | |
lt % S i ch ch<'a'? | |
{ % S i ch %%% ch<'a' | |
32 add % S i ch+32 | |
2 index % S i ch+32 S | |
3 1 roll % S S i ch+32 | |
put % S | |
} | |
{ % S i ch %%% ch>='a' | |
pop pop % S | |
} ifelse | |
} for | |
} def | |
% Write a blank at the provided index in a string. | |
% Stack: S i -> S' | |
% S: Input string | |
% i: Index for the blank | |
% S': Result string, with S[i] overwritten with a blank | |
/set-whitespace { % S i | |
1 index exch % S S i | |
( ) 0 get % S S i w | |
put % S | |
} def | |
% Overwrite anything that's not a lowercase letter with a blank. | |
% Stack: S i -> S' | |
% S: Input string | |
% i: Index for the blank | |
% S': Result string, with S[i] overwritten with a blank | |
/non-letters-to-whitespace { % S | |
dup length % S L | |
0 1 % S L 0 1 | |
3 -1 roll % S 0 1 L | |
1 sub % S 0 1 L-1 | |
{ % S i | |
2 copy get % S i ch | |
dup % S i ch ch | |
(a) 0 get % S i ch ch 'a' | |
lt % S i ch ch<'a'? | |
{ % S i ch %%% ch<'a' | |
pop % S i | |
set-whitespace % S | |
} | |
{ % S i ch %%%% ch>='a' | |
(z) 0 get % S i ch 'z' | |
gt % S i ch>'z'? | |
{ % S i %%% ch>'z' | |
set-whitespace % S | |
} | |
{ % S i %%% ch<='z' | |
pop % S | |
} ifelse | |
} ifelse | |
} for | |
} def | |
% Clean input strings (convert to lowercase and remove noise). | |
% Stack: A -> A' | |
% A: Input array of "dirty" strings | |
% A': Result array of "clean" strings | |
/clean-input { % A | |
[ exch % [ A | |
{ % [ L | |
upper-to-lower % [ L' | |
non-letters-to-whitespace % [ L'' | |
} forall | |
] | |
} def | |
% Split a string into an array of substrings based on separator. | |
% Stack: s z -> A | |
% s: Input string | |
% z: Separator string | |
% A: Array of substrings resulting from splitting input string. | |
/split-string | |
{ % s z %%% z: separator s: string | |
[ % s z [ | |
3 1 roll % [ s z | |
{ % [ s z | |
search % [ s' h found? %%% s': rest, h: head z | |
{ % [ s' z h %%% yes, found | |
3 1 roll % [ h s' z | |
} | |
{ % [ s %%% not found | |
exit % [ s | |
} ifelse | |
} loop % [ s ... | |
] % [ s ... ] | |
} def | |
% Split a line of text into words. | |
% Stack: S -> A | |
% S: String of text | |
% A: Array of individual words | |
/to-words { % L | |
( ) split-string % A | |
{ length 0 ne } filter-array % A' %%% No empty strings | |
} def | |
% Concatenate the arrays contained in an array of arrays. | |
% Stack: A -> A' | |
% A: Input array of arrays [A1, A2, ..., An ] | |
% A': Result array A1@A2@...@An | |
/flatten { % A = [A1, A2, ... AN] | |
[ exch % [ A | |
{ % [ Ai | |
{} forall % [ a1, a2, a3 ... | |
} forall | |
] % [ a1, ..., an ] | |
} def | |
% Create a procedure to identify stop words. | |
% This involves a bit of meta-programming. | |
% The idea is to close over the array of stop words | |
% and capture it inside the procedure. | |
% Stack: -> {op} | |
% {op}: The stop word predicate. | |
/stop-word-procedure % | |
{ | |
[ | |
(stop_words.txt) | |
read-stop-words | |
(,) split-string | |
{ % w W | |
false % w W F | |
3 1 roll % F w W | |
{ % F w v | |
1 index % F w v w | |
eq % F w v=w? | |
{ % F w %%% v=w | |
exch pop % w | |
true exch % T w | |
exit | |
} if | |
} forall % F w | |
pop % F | |
} | |
/exec cvx | |
] cvx | |
} def | |
% Evaluate the stop word procedure to create the predicate | |
% and bind it to the is-stop-word name. | |
/is-stop-word stop-word-procedure def | |
% Create an iterator for an array, to keep track of where we are. | |
% Assumption: the array has at least one element. | |
% Stack: A -> [ A ix more? ] | |
% A: Input array | |
% ix: To hold the current index for the iterator (originally set to 0) | |
% more?: To indicate if there are more elements left | |
/iter-state { % A | |
dup length % A L | |
0 gt % A L>0? | |
0 exch % A 0 L>0? | |
3 array % A 0 L>0? A' | |
astore % A' | |
} def | |
% Advance an iterator for an array. | |
% Stack: I -> I' | |
% I: Input iterator | |
% I': Result iterator (if more? is false in I, I' is the same as I) | |
/next-state { % I | |
dup % I I | |
aload pop % I A i ? | |
{ % I A i %%% true | |
1 add % I A i+1 | |
1 index length % I A i+1 L | |
1 index % I A i+1 L i+1 | |
gt % I A i+1 L>i+1? | |
3 array astore % I I' | |
exch pop % I' | |
} | |
{ % I A i %%% false | |
pop pop % I | |
} ifelse | |
} def | |
% Use the provided sorter procedure to choose which iterator to advance. | |
% If either iterator is exhausted, the other will be chosen without needing to compare. | |
% If both iterators are exhausted, we have a problem. | |
% Stack: I1 I2 {S} => I1' I2' a | |
% I1: An iterator for an array | |
% I2: An iterator for another array | |
% {S}: Sorter procedure, should return true if the first element should sort first | |
% I1': Potentially advanced iterator for the first array | |
% I2': Potentially advanced iterator for the second array | |
/choose-next % I1 I2 {S} | |
{ | |
2 index % I1 I2 {S} I1 | |
aload pop % I1 I2 {S} A1 i1 more1? | |
{ % I1 I2 {S} A1 i1 %%% i1 in range | |
get % I1 I2 {S} a1 | |
2 index % I1 I2 {S} a1 I2 | |
aload pop % I1 I2 {S} a1 A2 i2 more2? | |
{ % I1 I2 {S} a1 A2 i2 %%% i2 in range | |
get % I1 I2 {S} a1 a2 | |
2 copy % I1 I2 {S} a1 a2 a1 a2 | |
5 -1 roll % I1 I2 a1 a2 a1 a2 {S} | |
exec % I1 I2 a1 a2 choose-a1? | |
{ % I1 I2 a1 a2 %%% yes, choose a1 | |
pop % I1 I2 a1 | |
3 -1 roll % I2 a1 I1 | |
next-state % I2 a1 I1' | |
3 1 roll % I1' I2 a1 | |
} | |
{ % I1 I2 a1 a2 %%% no, choose a2 | |
exch pop % I1 I2 a2 | |
exch % I1 a2 I2 | |
next-state % I1 a2 I2' | |
exch % I1 I2' a2 | |
} ifelse % I1 I2 a | |
} | |
{ % I1 I2 {S} a1 A2 i2 %%% i1 in range, i2 out of range, choose i1 | |
pop pop % I1 I2 {S} a1 | |
exch pop % I1 I2 a1 | |
3 -1 roll % I2 a1 I1 | |
next-state % I2 a1 I1' | |
3 1 roll % I1' I2 a1 | |
} ifelse % I1 I2 a | |
} | |
{ % I1 I2 {S} A1 i1 %%% i1 out of range, hope i2 is in range | |
pop pop pop % I1 I2 | |
dup % I1 I2 I2 | |
aload pop % I1 I2 A2 i2 more2? | |
{ % I1 I2 A2 i2 %%% i2 is in range | |
get % I1 I2 a2 | |
exch % I1 a2 I2 | |
next-state % I1 a2 I2' | |
exch % I1 I2' a2 | |
} | |
{ % I1 I2 A2 i2 %%% i2 is out of range too | |
(out of range!\n) print | |
pop pop % I1 I2 | |
(err) % I1 I2 (err) | |
} ifelse % I1 I2 a | |
} ifelse % I1 I2 a | |
} def | |
% The merge step in merge sort. | |
% Merges two sorted arrays into a single sorted array using the provided sorter procedure. | |
% Stack: A1 A2 {S} -> A | |
% A1: First sorted array | |
% A2: Second sorted array | |
% {S}: Sorter procedure, should return true if the first element should sort first | |
% A: Resulting merged and sorted array | |
/merge { % A1 A2 {S} | |
3 1 roll % {S} A1 A2 | |
2 copy % {S} A1 A2 A1 A2 | |
length % {S} A1 A2 A1 L2 | |
exch % {S} A1 A2 L2 A1 | |
length % {S} A1 A2 L2 L1 | |
add % {S} A1 A2 L %%% L=L1+L2 | |
array % {S} A1 A2 A | |
3 1 roll % {S} A A1 A2 | |
iter-state % {S} A A1 I2 | |
exch % {S} A I2 A1 | |
iter-state % {S} A I2 I1 | |
2 index % {S} A I2 I1 A | |
length % {S} A I2 I1 L | |
1 sub % {S} A I2 I1 L-1 | |
0 exch % {S} A I2 I1 j=0 L-1 | |
1 exch % {S} A I2 I1 j=0 k=1 L-1 | |
{ % {S} A I2 I1 i | |
3 1 roll % {S} A i I2 I1 | |
4 index % {S} A i I2 I1 {S} | |
choose-next % {S} A i I2 I1 a | |
4 index % {S} A i I2 I1 a A | |
5 -1 roll % {S} A I2 I1 a A i | |
3 -1 roll % {S} A I2 I1 A i a | |
put % {S} A I2 I1 | |
} for % {S} A I2 I1 | |
pop pop % {S} A | |
exch pop % A | |
} def | |
% Implements merge-sort, using the provided sorter procedure to sort elements | |
% The idea is to split an unsorted array in half, sort each half recursively, | |
% and merge the resulting sorted arrays. The base case of an array with a single | |
% element is trivially sorted, terminating the recursion. | |
% Stack: A {S} => A' | |
% A: Unsorted input array | |
% {S}: Sorter procedure, should return true if the first element should sort first | |
% A': Sorted result array | |
/merge-sort { % A {S} | |
exch % {S} A | |
dup length % {S} A L | |
1 % {S} A L 1 | |
gt % {S} A L>1 | |
{ % {S} A %%% L>1 | |
dup length % {S} A L | |
2 idiv % {S} A L/2 | |
split-at % {S} A1 A2 | |
2 index % {S} A1 A2 {S} | |
merge-sort % {S} A1 A2' | |
exch % {S} A2' A1 | |
2 index % {S} A2' A1 {S} | |
merge-sort % {S} A2' A1' | |
3 -1 roll % A2' A1' {S} | |
merge % A | |
} | |
{ % {S} A %%% L<=0 | |
exch pop % A | |
} ifelse % A | |
} def | |
% Split an array in two at the provided index. | |
% Stack: A i -> A' A'' | |
% A: Input array to be split | |
% i: Index where the array should be split | |
% A': Array with elements [0:i-1] | |
% A'': Array with elements [i:L-1] | |
/split-at { % A i | |
2 copy % A i A i | |
0 exch % A i A 0 1 | |
getinterval % A i A'=A[0,i-1] | |
3 1 roll % A' A i | |
1 index length % A' A i L | |
1 index % A' A i L i | |
sub % A' A i c | |
getinterval % A' A''=A[i,L-1] | |
} def | |
% Find the first index in the array where the next element is not the same as the given one. | |
% This is useful if you have already sorted the array and want to group equal elements. | |
% Stack: A e -> i | |
% A: Input array | |
% e: Element to compare with | |
% i: Index of first element in A not equal to e | |
/find-change-index { % A e | |
exch % e A | |
-1 % e A -1 | |
3 1 roll % -1 e A | |
dup length 1 sub % -1 e A L-1 | |
0 exch % -1 e A j=0 L-1 | |
1 exch % -1 e A j=0 k=1 L-1 | |
{ % -1 e A i | |
2 copy get % -1 e A i x | |
3 index % -1 e A i x e | |
eq % -1 e A i x=e? | |
{ % -1 e A i %%% x=e, keep looking | |
pop % -1 e A | |
} | |
{ % -1 e A i %%% x!=e, found change | |
4 -1 roll % e A i -1 | |
pop % e A i | |
3 1 roll % i e A | |
exit | |
} ifelse | |
} for % i e A | |
pop pop % i | |
} def | |
% Split an array when encountering an element not equal to the given one. | |
% Stack: A e -> A1 A2 | |
% A: Input array | |
% e: Element to compare with | |
% A1: Array with clones of e encountered at the start of A | |
% A2: Remainder of elements, at least the first of which is not equal to e | |
/split-on-change { % A e | |
1 index % A e A | |
exch % A A e | |
find-change-index % A i | |
dup 0 ge % A i i>=0? | |
{ % A i %%% i>=0, change | |
split-at % A1 A2 | |
} | |
{ % A i %%% i<0, no change | |
pop [] % A [] | |
} ifelse | |
} def | |
% Take a sorted list of words and single out a subgroup containing instances of the first word. | |
% Stack: A -> A1 A2 | |
% A: Input array, assumed to be a sorted list of words | |
% A1: Array with all the instances found for the first word in A | |
% A2: Remainder of words | |
/create-word-group { % A | |
dup 0 get % A e | |
split-on-change % A1 A2 | |
} def | |
% Take a sorted list of words and create subgroups containing the instances for each word. | |
% Stack: A -> A' | |
% A: Input array, assumed to be a sorted list of words | |
% A': Nested array containing subarrays for the instances of each word in A | |
/group-words { % A | |
[ exch % [ A | |
{ | |
create-word-group % [ ... A1 A2 | |
dup length % [ ... A L | |
0 eq % [ ... A L=0? | |
{ % [ ... [] | |
pop % [ ... | |
exit % [ ... | |
} if | |
} loop | |
] % [ A1 ... AN ] | |
} def | |
% Count the instances of words in an array. | |
% The array is assumed to contain instances of a single word. | |
% Stack: A -> A' | |
% A: Input array of instances of the same word (e.g. [(mr) (mr) (mr)]) | |
% A': Output array with word count and word (e.g. [3 (mr)]) | |
/word-count { % A | |
dup length % A L | |
exch % L A | |
0 get % L w | |
2 array % L w A' | |
astore % A'=[L w] | |
} def | |
% Compare two words for ordering. | |
% Stack: w1 w2 -> n | |
% n < 0 => w1 < w2 | |
% n = 0 => w1 = w2 | |
% n > 0 => w1 > w2 | |
/compare-words { % w1 w2 | |
0 % w1 w2 0 | |
3 1 roll % 0 w1 w2 | |
2 copy % 0 w1 w2 w1 w2 | |
length % 0 w1 w2 w1 L2 | |
exch length % 0 w1 w2 L2 L1 | |
min % 0 w1 w2 L | |
1 sub % 0 w1 w2 L-1 | |
0 exch % 0 w1 w2 j=0 L-1 | |
1 exch % 0 w1 w2 j=0 k=1 L-1 | |
{ % 0 w1 w2 i | |
3 copy % 0 w1 w2 i w1 w2 i | |
get % 0 w1 w2 i w1 c2 | |
3 1 roll % 0 w1 w2 c2 i w1 | |
exch % 0 w1 w2 c2 w1 i | |
get % 0 w1 w2 c2 c1 | |
exch % 0 w1 w2 c1 c2 | |
sub % 0 w1 w2 c1-c2 | |
dup 0 eq % 0 w1 w2 d same? | |
{ % 0 w1 w2 d %%% yes, same | |
pop % 0 w1 w2 | |
} | |
{ % 0 w1 w2 d %%% no, different | |
4 -1 roll % w1 w2 d 0 | |
pop % w1 w2 d | |
3 1 roll % d w1 w2 | |
exit | |
} ifelse % d w1 w2 | |
} for | |
2 index % d w1 w2 d | |
0 eq % d w1 w2 d=0? | |
{ % 0 w1 w2 %%% equal so far, shortest is smallest | |
3 -1 roll % w1 w2 0 | |
pop % w1 w2 | |
length % w1 L2 | |
exch length % L2 L1 | |
exch % L1 L2 | |
sub % L1-L2 | |
} | |
{ % d w1 w2 %%% d!=0 | |
pop pop % d | |
} ifelse | |
} def | |
% Check if a word is smaller or equal to another. | |
% Stack: w1 w2 -> b | |
% w1: First word | |
% w2: Second word | |
% b: boolean result (true => w1 <= w2, false => w1 > w2) | |
/le-word { % w1 w2 | |
compare-words % d | |
0 le % d<=0? | |
} def | |
% Check if a word count is greater than another. | |
% Stack: [n1 w1] [n2 w2] -> b | |
% [n1 w1]: Word count (n1) for w1 | |
% [n2 w2]: Word count (n2) for w2 | |
% b: boolean result (true => n1 > n2, false => n2 >= n1) | |
/gt-count { % [n1 w1] [n2 w2] | |
0 get % [n1 w1] n2 | |
exch % n2 [n1 w1] | |
0 get % n2 n1 | |
exch % n1 n2 | |
gt % n1>n2? | |
} def | |
% Take the n first elements of an array. | |
% If n is greater than the length of the array, take all elements. | |
% Stack: A n -> A' | |
% A: Input array | |
% n: Number of elements to take | |
% A': Up to the n first elements of A | |
/take { % A n | |
[ % A n [ | |
3 1 roll % [ A n | |
1 index length % [ A n L | |
min 1 sub % [ A m | |
0 exch % [ A j=0 m | |
1 exch % [ A j=0 k=1 m | |
{ % [ A i | |
1 index exch % [ A A i | |
get % [ A ai | |
exch % [ ai A | |
} for | |
pop % [ a0 ... an | |
] % A' | |
} def | |
% Main program. | |
/main { | |
(pride-and-prejudice.txt) | |
read-text | |
clean-input | |
{ to-words } map-array flatten | |
{ length 1 gt } filter-array | |
{ is-stop-word not } filter-array | |
{ le-word } merge-sort | |
group-words | |
{ word-count } map-array | |
{ gt-count } merge-sort | |
25 take | |
{ == } forall | |
} def | |
% To run: | |
% gs -DNOSAFER tf.ps |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment