Created
October 1, 2012 02:45
-
-
Save dyoo/3809206 to your computer and use it in GitHub Desktop.
counting words with tildes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
;; Counts words, treating tilde-words as escaped words. | |
;; Based on the SO question: | |
;; | |
;; http://stackoverflow.com/questions/12666553/efficient-word-count-method-using-regex-or-something | |
(require rackunit) | |
;; count: input-port -> number | |
(define (count ip) | |
(define (OUTSIDE ip counter) | |
(define ch (read-char ip)) | |
(cond | |
[(eof-object? ch) | |
counter] | |
[(char=? ch #\~) | |
(TILDEWORD ip (add1 counter))] | |
[(char-whitespace? ch) | |
(OUTSIDE ip counter)] | |
[else | |
(WORD ip (add1 counter))])) | |
(define (WORD ip counter) | |
(define ch (read-char ip)) | |
(cond | |
[(eof-object? ch) | |
counter] | |
[(char=? ch #\~) | |
(TILDEWORD ip (add1 counter))] | |
[(char-whitespace? ch) | |
(OUTSIDE ip counter)] | |
[else | |
(WORD ip counter)])) | |
(define (TILDEWORD ip counter) | |
(define ch (read-char ip)) | |
(cond | |
[(eof-object? ch) | |
counter] | |
[(char=? ch #\~) | |
(OUTSIDE ip counter)] | |
[(char-whitespace? ch) | |
(TILDEWORD ip counter)] | |
[else | |
(TILDEWORD ip counter)])) | |
(OUTSIDE ip 0)) | |
;; Quick and dirty tests | |
(check-equal? (count (open-input-string "")) 0) | |
(check-equal? (count (open-input-string " ")) 0) | |
(check-equal? (count (open-input-string "hello")) 1) | |
(check-equal? (count (open-input-string " ")) 0) | |
(check-equal? (count (open-input-string " hello")) 1) | |
(check-equal? (count (open-input-string "hello ")) 1) | |
(check-equal? (count (open-input-string " hello ")) 1) | |
(check-equal? (count (open-input-string " hello world ")) 2) | |
(check-equal? (count (open-input-string "~ hello world ~")) 1) | |
(check-equal? (count (open-input-string "~ hello ~ ~ world ~ ")) 2) | |
(check-equal? (count (open-input-string "~ hello world~ ")) 1) | |
(check-equal? (count (open-input-string "one two three")) | |
3) | |
(check-equal? (count (open-input-string "hello~happy fabulous world~testing is good")) | |
5) | |
;; The following has not been defined by the problem. | |
;; I'm assuming that ill-formed tildes | |
;; will be treated as a single word up to end of file. | |
(check-equal? (count (open-input-string "~")) 1) | |
(check-equal? (count (open-input-string "hello~")) 2) | |
(check-equal? (count (open-input-string "hello ~world")) 2) | |
(check-equal? (count (open-input-string "~ hello ~ world ~ ")) 3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment