Created
March 12, 2021 07:33
-
-
Save benhoyt/770a6ce13e307b8cd2e7f1e7c4731c70 to your computer and use it in GitHub Desktop.
Forth: print frequencies of unique words in stdin, most frequent first
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
200 constant max-line | |
create line max-line allot \ Buffer for read-line | |
wordlist constant counts \ Hash table of words to count | |
variable num-uniques 0 num-uniques ! | |
\ Allocate space for new string and copy bytes, return new string. | |
: copy-string ( addr u -- addr' u ) | |
dup >r dup allocate throw | |
dup >r swap move r> r> ; | |
\ Convert character to lowercase. | |
: to-lower ( C -- c ) | |
dup [char] A [ char Z 1+ ] literal within if | |
32 + | |
then ; | |
\ Convert string to lowercase in place. | |
: lower-in-place ( addr u -- ) | |
over + swap ?do | |
i c@ to-lower i c! | |
loop ; | |
\ Count given word in hash table. | |
: count-word ( addr u -- ) | |
2dup counts search-wordlist if | |
\ Increment existing word | |
execute 1 swap +! | |
2drop | |
else | |
\ Insert new (copied) word with count 1 | |
copy-string | |
2dup lower-in-place | |
['] create execute-parsing 1 , | |
1 num-uniques +! | |
then ; | |
\ Scan till space and return remaining string and word. | |
: scan-word ( addr u -- rem-addr rem addr word-len ) | |
2dup bl scan | |
2tuck rot swap - nip ; | |
\ Process a line by splitting into words. | |
: process-line ( addr u -- ) | |
begin | |
bl skip \ Skip spaces | |
scan-word \ Scan till space (or end) | |
dup if | |
count-word | |
else | |
2drop | |
then | |
dup 0= until | |
2drop ; | |
\ Add word from wordlist to array at given offset. | |
: add-word ( addr offset nt -- addr offset+cell true ) | |
>r 2dup + r> swap ! | |
cell+ true ; | |
\ Less-than for words (true if count is *greater* for reverse sort). | |
: count< ( nt1 nt2 -- ) | |
>r name>string counts search-wordlist drop execute @ | |
r> name>string counts search-wordlist drop execute @ | |
> ; | |
\ In-place merge sort taken from Rosetta Code: | |
\ https://rosettacode.org/wiki/Sorting_algorithms/Merge_sort#Forth | |
: merge-step ( right mid left -- right mid+ left+ ) | |
over @ over @ count< if | |
over @ >r | |
2dup - over dup cell+ rot move | |
r> over ! | |
>r cell+ 2dup = if rdrop dup else r> then | |
then | |
cell+ ; | |
: merge ( right mid left -- right left ) | |
dup >r begin | |
2dup > | |
while | |
merge-step | |
repeat | |
2drop r> ; | |
: mid ( l r -- mid ) | |
over - 2/ cell negate and + ; | |
: mergesort ( right left -- right left ) | |
2dup cell+ <= if | |
exit | |
then | |
swap 2dup mid recurse rot recurse merge ; | |
: sort ( addr len -- ) | |
cells over + swap mergesort 2drop ; | |
\ Show "word count" line for each word (unsorted). | |
: show-words ( -- ) | |
num-uniques @ cells allocate throw | |
0 ['] add-word counts traverse-wordlist drop | |
dup num-uniques @ sort | |
num-uniques @ 0 ?do | |
dup i cells + @ | |
dup name>string type space | |
name>interpret execute @ . cr | |
loop | |
drop ; | |
: main ( -- ) | |
counts set-current | |
begin | |
line max-line stdin read-line throw | |
while | |
line swap process-line | |
repeat | |
drop | |
show-words ; | |
main | |
bye |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment