Skip to content

Instantly share code, notes, and snippets.

@einarwh
Last active September 17, 2021 13:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save einarwh/726bf397255a105f064bab01fb66c9cb to your computer and use it in GitHub Desktop.
Save einarwh/726bf397255a105f064bab01fb66c9cb to your computer and use it in GitHub Desktop.
Term frequency in PostScript (Exercises in Programming Style by Cristina Videira Lopes)
% Read file input using a buffer of the specified size.
% Stack: Z fn -> A
% Z: Buffer size
% fn: File name
% A: Array of lines of input
/read-input-bufsize { % Z fn
[ % Z fn [
3 1 roll % [ Z fn
(r) file % [ Z F
{
dup % [ ... Z F F
2 index string % [ ... Z F F buf
readline % [ ... Z F (s)
{
3 1 roll % [ ... (s) Z F
}
{
3 1 roll % [ ... (s) Z F
closefile % [ ... (s) Z
pop % [ ... (s)
] % [ ... (s) ]
exit
} ifelse
} loop % Rs
} def
% Read file input with 80 char buffer.
% Stack: fn -> A
% fn: File name
% A: Array of lines of input
/read-text { % fn
80 exch read-input-bufsize % A
} def
% Read file input with 600 char buffer.
% Stack: fn -> S
% fn: File name
% S: String of stop words
/read-stop-words { % fn
600 exch read-input-bufsize % A
0 get % S
} def
% Map the provided operation over an array of elements.
% Stack: A {op} -> A'
% A: Array of elements
% {op} -> Operation to be called for each element e in A
% A': Array of mapped elements
/map-array { % A {op}
[ 3 1 roll % [ A {op}
exch % [ {op} A
{ % [ {op} e
1 index % [ {op} e {op}
exec % [ {op} r %%% r is result of executing {op} on e
exch % [ r {op}
} forall % [ ... {op}
pop % [ ...
] % [ r1 r2 ... ] => [ r1 r2 ... ]
} def
% Filter an array of elements using the provided predicate.
% Stack: A {p} -> A'
% A: Array of elements
% {p} -> Predicate to be called for each element e in A
% A': Array of elements that satisfy predicate {p}
/filter-array { % A {p}
[ 3 1 roll % [ A {p}
exch % [ {p} A
{ % [ {p} e
2 copy % [ {p} e {p} e
exch % [ {p} e e {p}
exec % [ {p} e include?
{ % [ {p} e %%% yes, keep e
exch % [ e {p}
}
{ % [ {p} e %%% no, reject e
pop % [ {p}
} ifelse % [ e? {p}
} forall % [ ... {p}
pop % [ ...
] % [ e... ]
} def
% Convert any uppercase letter to lowercase.
% The idea is to add the offset between uppercase and lowercase letters
% to any character that is "too small" to be a lowercase letter.
% Stack: S -> S'
% S: Input string
% S': Result string, with uppercase letters transformed to lowercase
/upper-to-lower { % S
dup length % S L
0 1 % S L 0 1
3 -1 roll % S 0 1 L
1 sub % S 0 1 L-1
{ % S i
2 copy get % S i ch
dup % S i ch ch
(a) 0 get % S i ch ch 'a'
lt % S i ch ch<'a'?
{ % S i ch %%% ch<'a'
32 add % S i ch+32
2 index % S i ch+32 S
3 1 roll % S S i ch+32
put % S
}
{ % S i ch %%% ch>='a'
pop pop % S
} ifelse
} for
} def
% Write a blank at the provided index in a string.
% Stack: S i -> S'
% S: Input string
% i: Index for the blank
% S': Result string, with S[i] overwritten with a blank
/set-whitespace { % S i
1 index exch % S S i
( ) 0 get % S S i w
put % S
} def
% Overwrite anything that's not a lowercase letter with a blank.
% Stack: S i -> S'
% S: Input string
% i: Index for the blank
% S': Result string, with S[i] overwritten with a blank
/non-letters-to-whitespace { % S
dup length % S L
0 1 % S L 0 1
3 -1 roll % S 0 1 L
1 sub % S 0 1 L-1
{ % S i
2 copy get % S i ch
dup % S i ch ch
(a) 0 get % S i ch ch 'a'
lt % S i ch ch<'a'?
{ % S i ch %%% ch<'a'
pop % S i
set-whitespace % S
}
{ % S i ch %%%% ch>='a'
(z) 0 get % S i ch 'z'
gt % S i ch>'z'?
{ % S i %%% ch>'z'
set-whitespace % S
}
{ % S i %%% ch<='z'
pop % S
} ifelse
} ifelse
} for
} def
% Clean input strings (convert to lowercase and remove noise).
% Stack: A -> A'
% A: Input array of "dirty" strings
% A': Result array of "clean" strings
/clean-input { % A
[ exch % [ A
{ % [ L
upper-to-lower % [ L'
non-letters-to-whitespace % [ L''
} forall
]
} def
% Split a string into an array of substrings based on separator.
% Stack: s z -> A
% s: Input string
% z: Separator string
% A: Array of substrings resulting from splitting input string.
/split-string
{ % s z %%% z: separator s: string
[ % s z [
3 1 roll % [ s z
{ % [ s z
search % [ s' h found? %%% s': rest, h: head z
{ % [ s' z h %%% yes, found
3 1 roll % [ h s' z
}
{ % [ s %%% not found
exit % [ s
} ifelse
} loop % [ s ...
] % [ s ... ]
} def
% Split a line of text into words.
% Stack: S -> A
% S: String of text
% A: Array of individual words
/to-words { % L
( ) split-string % A
{ length 0 ne } filter-array % A' %%% No empty strings
} def
% Concatenate the arrays contained in an array of arrays.
% Stack: A -> A'
% A: Input array of arrays [A1, A2, ..., An ]
% A': Result array A1@A2@...@An
/flatten { % A = [A1, A2, ... AN]
[ exch % [ A
{ % [ Ai
{} forall % [ a1, a2, a3 ...
} forall
] % [ a1, ..., an ]
} def
% Create a procedure to identify stop words.
% This involves a bit of meta-programming.
% The idea is to close over the array of stop words
% and capture it inside the procedure.
% Stack: -> {op}
% {op}: The stop word predicate.
/stop-word-procedure %
{
[
(stop_words.txt)
read-stop-words
(,) split-string
{ % w W
false % w W F
3 1 roll % F w W
{ % F w v
1 index % F w v w
eq % F w v=w?
{ % F w %%% v=w
exch pop % w
true exch % T w
exit
} if
} forall % F w
pop % F
}
/exec cvx
] cvx
} def
% Evaluate the stop word procedure to create the predicate
% and bind it to the is-stop-word name.
/is-stop-word stop-word-procedure def
% Create an iterator for an array, to keep track of where we are.
% Assumption: the array has at least one element.
% Stack: A -> [ A ix more? ]
% A: Input array
% ix: To hold the current index for the iterator (originally set to 0)
% more?: To indicate if there are more elements left
/iter-state { % A
dup length % A L
0 gt % A L>0?
0 exch % A 0 L>0?
3 array % A 0 L>0? A'
astore % A'
} def
% Advance an iterator for an array.
% Stack: I -> I'
% I: Input iterator
% I': Result iterator (if more? is false in I, I' is the same as I)
/next-state { % I
dup % I I
aload pop % I A i ?
{ % I A i %%% true
1 add % I A i+1
1 index length % I A i+1 L
1 index % I A i+1 L i+1
gt % I A i+1 L>i+1?
3 array astore % I I'
exch pop % I'
}
{ % I A i %%% false
pop pop % I
} ifelse
} def
% Use the provided sorter procedure to choose which iterator to advance.
% If either iterator is exhausted, the other will be chosen without needing to compare.
% If both iterators are exhausted, we have a problem.
% Stack: I1 I2 {S} => I1' I2' a
% I1: An iterator for an array
% I2: An iterator for another array
% {S}: Sorter procedure, should return true if the first element should sort first
% I1': Potentially advanced iterator for the first array
% I2': Potentially advanced iterator for the second array
/choose-next % I1 I2 {S}
{
2 index % I1 I2 {S} I1
aload pop % I1 I2 {S} A1 i1 more1?
{ % I1 I2 {S} A1 i1 %%% i1 in range
get % I1 I2 {S} a1
2 index % I1 I2 {S} a1 I2
aload pop % I1 I2 {S} a1 A2 i2 more2?
{ % I1 I2 {S} a1 A2 i2 %%% i2 in range
get % I1 I2 {S} a1 a2
2 copy % I1 I2 {S} a1 a2 a1 a2
5 -1 roll % I1 I2 a1 a2 a1 a2 {S}
exec % I1 I2 a1 a2 choose-a1?
{ % I1 I2 a1 a2 %%% yes, choose a1
pop % I1 I2 a1
3 -1 roll % I2 a1 I1
next-state % I2 a1 I1'
3 1 roll % I1' I2 a1
}
{ % I1 I2 a1 a2 %%% no, choose a2
exch pop % I1 I2 a2
exch % I1 a2 I2
next-state % I1 a2 I2'
exch % I1 I2' a2
} ifelse % I1 I2 a
}
{ % I1 I2 {S} a1 A2 i2 %%% i1 in range, i2 out of range, choose i1
pop pop % I1 I2 {S} a1
exch pop % I1 I2 a1
3 -1 roll % I2 a1 I1
next-state % I2 a1 I1'
3 1 roll % I1' I2 a1
} ifelse % I1 I2 a
}
{ % I1 I2 {S} A1 i1 %%% i1 out of range, hope i2 is in range
pop pop pop % I1 I2
dup % I1 I2 I2
aload pop % I1 I2 A2 i2 more2?
{ % I1 I2 A2 i2 %%% i2 is in range
get % I1 I2 a2
exch % I1 a2 I2
next-state % I1 a2 I2'
exch % I1 I2' a2
}
{ % I1 I2 A2 i2 %%% i2 is out of range too
(out of range!\n) print
pop pop % I1 I2
(err) % I1 I2 (err)
} ifelse % I1 I2 a
} ifelse % I1 I2 a
} def
% The merge step in merge sort.
% Merges two sorted arrays into a single sorted array using the provided sorter procedure.
% Stack: A1 A2 {S} -> A
% A1: First sorted array
% A2: Second sorted array
% {S}: Sorter procedure, should return true if the first element should sort first
% A: Resulting merged and sorted array
/merge { % A1 A2 {S}
3 1 roll % {S} A1 A2
2 copy % {S} A1 A2 A1 A2
length % {S} A1 A2 A1 L2
exch % {S} A1 A2 L2 A1
length % {S} A1 A2 L2 L1
add % {S} A1 A2 L %%% L=L1+L2
array % {S} A1 A2 A
3 1 roll % {S} A A1 A2
iter-state % {S} A A1 I2
exch % {S} A I2 A1
iter-state % {S} A I2 I1
2 index % {S} A I2 I1 A
length % {S} A I2 I1 L
1 sub % {S} A I2 I1 L-1
0 exch % {S} A I2 I1 j=0 L-1
1 exch % {S} A I2 I1 j=0 k=1 L-1
{ % {S} A I2 I1 i
3 1 roll % {S} A i I2 I1
4 index % {S} A i I2 I1 {S}
choose-next % {S} A i I2 I1 a
4 index % {S} A i I2 I1 a A
5 -1 roll % {S} A I2 I1 a A i
3 -1 roll % {S} A I2 I1 A i a
put % {S} A I2 I1
} for % {S} A I2 I1
pop pop % {S} A
exch pop % A
} def
% Implements merge-sort, using the provided sorter procedure to sort elements
% The idea is to split an unsorted array in half, sort each half recursively,
% and merge the resulting sorted arrays. The base case of an array with a single
% element is trivially sorted, terminating the recursion.
% Stack: A {S} => A'
% A: Unsorted input array
% {S}: Sorter procedure, should return true if the first element should sort first
% A': Sorted result array
/merge-sort { % A {S}
exch % {S} A
dup length % {S} A L
1 % {S} A L 1
gt % {S} A L>1
{ % {S} A %%% L>1
dup length % {S} A L
2 idiv % {S} A L/2
split-at % {S} A1 A2
2 index % {S} A1 A2 {S}
merge-sort % {S} A1 A2'
exch % {S} A2' A1
2 index % {S} A2' A1 {S}
merge-sort % {S} A2' A1'
3 -1 roll % A2' A1' {S}
merge % A
}
{ % {S} A %%% L<=0
exch pop % A
} ifelse % A
} def
% Split an array in two at the provided index.
% Stack: A i -> A' A''
% A: Input array to be split
% i: Index where the array should be split
% A': Array with elements [0:i-1]
% A'': Array with elements [i:L-1]
/split-at { % A i
2 copy % A i A i
0 exch % A i A 0 1
getinterval % A i A'=A[0,i-1]
3 1 roll % A' A i
1 index length % A' A i L
1 index % A' A i L i
sub % A' A i c
getinterval % A' A''=A[i,L-1]
} def
% Find the first index in the array where the next element is not the same as the given one.
% This is useful if you have already sorted the array and want to group equal elements.
% Stack: A e -> i
% A: Input array
% e: Element to compare with
% i: Index of first element in A not equal to e
/find-change-index { % A e
exch % e A
-1 % e A -1
3 1 roll % -1 e A
dup length 1 sub % -1 e A L-1
0 exch % -1 e A j=0 L-1
1 exch % -1 e A j=0 k=1 L-1
{ % -1 e A i
2 copy get % -1 e A i x
3 index % -1 e A i x e
eq % -1 e A i x=e?
{ % -1 e A i %%% x=e, keep looking
pop % -1 e A
}
{ % -1 e A i %%% x!=e, found change
4 -1 roll % e A i -1
pop % e A i
3 1 roll % i e A
exit
} ifelse
} for % i e A
pop pop % i
} def
% Split an array when encountering an element not equal to the given one.
% Stack: A e -> A1 A2
% A: Input array
% e: Element to compare with
% A1: Array with clones of e encountered at the start of A
% A2: Remainder of elements, at least the first of which is not equal to e
/split-on-change { % A e
1 index % A e A
exch % A A e
find-change-index % A i
dup 0 ge % A i i>=0?
{ % A i %%% i>=0, change
split-at % A1 A2
}
{ % A i %%% i<0, no change
pop [] % A []
} ifelse
} def
% Take a sorted list of words and single out a subgroup containing instances of the first word.
% Stack: A -> A1 A2
% A: Input array, assumed to be a sorted list of words
% A1: Array with all the instances found for the first word in A
% A2: Remainder of words
/create-word-group { % A
dup 0 get % A e
split-on-change % A1 A2
} def
% Take a sorted list of words and create subgroups containing the instances for each word.
% Stack: A -> A'
% A: Input array, assumed to be a sorted list of words
% A': Nested array containing subarrays for the instances of each word in A
/group-words { % A
[ exch % [ A
{
create-word-group % [ ... A1 A2
dup length % [ ... A L
0 eq % [ ... A L=0?
{ % [ ... []
pop % [ ...
exit % [ ...
} if
} loop
] % [ A1 ... AN ]
} def
% Count the instances of words in an array.
% The array is assumed to contain instances of a single word.
% Stack: A -> A'
% A: Input array of instances of the same word (e.g. [(mr) (mr) (mr)])
% A': Output array with word count and word (e.g. [3 (mr)])
/word-count { % A
dup length % A L
exch % L A
0 get % L w
2 array % L w A'
astore % A'=[L w]
} def
% Compare two words for ordering.
% Stack: w1 w2 -> n
% n < 0 => w1 < w2
% n = 0 => w1 = w2
% n > 0 => w1 > w2
/compare-words { % w1 w2
0 % w1 w2 0
3 1 roll % 0 w1 w2
2 copy % 0 w1 w2 w1 w2
length % 0 w1 w2 w1 L2
exch length % 0 w1 w2 L2 L1
min % 0 w1 w2 L
1 sub % 0 w1 w2 L-1
0 exch % 0 w1 w2 j=0 L-1
1 exch % 0 w1 w2 j=0 k=1 L-1
{ % 0 w1 w2 i
3 copy % 0 w1 w2 i w1 w2 i
get % 0 w1 w2 i w1 c2
3 1 roll % 0 w1 w2 c2 i w1
exch % 0 w1 w2 c2 w1 i
get % 0 w1 w2 c2 c1
exch % 0 w1 w2 c1 c2
sub % 0 w1 w2 c1-c2
dup 0 eq % 0 w1 w2 d same?
{ % 0 w1 w2 d %%% yes, same
pop % 0 w1 w2
}
{ % 0 w1 w2 d %%% no, different
4 -1 roll % w1 w2 d 0
pop % w1 w2 d
3 1 roll % d w1 w2
exit
} ifelse % d w1 w2
} for
2 index % d w1 w2 d
0 eq % d w1 w2 d=0?
{ % 0 w1 w2 %%% equal so far, shortest is smallest
3 -1 roll % w1 w2 0
pop % w1 w2
length % w1 L2
exch length % L2 L1
exch % L1 L2
sub % L1-L2
}
{ % d w1 w2 %%% d!=0
pop pop % d
} ifelse
} def
% Check if a word is smaller or equal to another.
% Stack: w1 w2 -> b
% w1: First word
% w2: Second word
% b: boolean result (true => w1 <= w2, false => w1 > w2)
/le-word { % w1 w2
compare-words % d
0 le % d<=0?
} def
% Check if a word count is greater than another.
% Stack: [n1 w1] [n2 w2] -> b
% [n1 w1]: Word count (n1) for w1
% [n2 w2]: Word count (n2) for w2
% b: boolean result (true => n1 > n2, false => n2 >= n1)
/gt-count { % [n1 w1] [n2 w2]
0 get % [n1 w1] n2
exch % n2 [n1 w1]
0 get % n2 n1
exch % n1 n2
gt % n1>n2?
} def
% Take the n first elements of an array.
% If n is greater than the length of the array, take all elements.
% Stack: A n -> A'
% A: Input array
% n: Number of elements to take
% A': Up to the n first elements of A
/take { % A n
[ % A n [
3 1 roll % [ A n
1 index length % [ A n L
min 1 sub % [ A m
0 exch % [ A j=0 m
1 exch % [ A j=0 k=1 m
{ % [ A i
1 index exch % [ A A i
get % [ A ai
exch % [ ai A
} for
pop % [ a0 ... an
] % A'
} def
% Main program.
/main {
(pride-and-prejudice.txt)
read-text
clean-input
{ to-words } map-array flatten
{ length 1 gt } filter-array
{ is-stop-word not } filter-array
{ le-word } merge-sort
group-words
{ word-count } map-array
{ gt-count } merge-sort
25 take
{ == } forall
} def
% To run:
% gs -DNOSAFER tf.ps
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment