lojic/byte-string-support.rkt Secret

## byte-string-support.rkt
#lang racket

(define space (char->integer #\space))
(define zero (char->integer #\0))
(define nine (char->integer #\9))
(define max-ascii 127)

;; Indicate wither the byte is in [ 0 .. 9 ]
(define (byte-numeric? b)
  (and (>= b zero)
       (<= b nine)))

;; Return the first byte of a byte string
(define (bytes-car bytes)
  (bytes-ref bytes 0))

;; Return all but the first byte of a byte string
(define (bytes-cdr bytes)
  (subbytes bytes 1))

;; Return a byte string with each occurrence removed
;; val: byte?
(define (bytes-delete bytes val)
  (define len (bytes-length bytes))
  (define (has-val? i)
    (cond [(= i len) #f]
          [(= val (bytes-ref bytes i)) #t]
          [else (has-val? (+ i 1))]))
  (if (> len 0)
      (cond [(has-val? 0)
             (define result (make-bytes len))
             (define dst 0)
             (for ([b (in-bytes bytes)])
               (cond [(= b val) '()]
                     [else (bytes-set! result dst b)
                           (set! dst (+ dst 1))]))
             (subbytes result 0 dst)]
            [else bytes])
      bytes))

;; Return a byte string with each byte for which pred returns true removed
;; pred: procedure?
(define (bytes-delete-pred bytes pred)
  (define len (bytes-length bytes))
  (define (has-val? i)
    (cond [(= i len) #f]
          [(pred (bytes-ref bytes i)) #t]
          [else (has-val? (+ i 1))]))
  (if (> len 0)
      (cond [(has-val? 0)
             (define result (make-bytes len))
             (define dst 0)
             (for ([b (in-bytes bytes)])
               (cond [(pred b) '()]
                     [else (bytes-set! result dst b)
                           (set! dst (+ dst 1))]))
             (subbytes result 0 dst)]
            [else bytes])
      bytes))

;; Return a byte string with each occurrence of "from" replaced by "to"
;; from: byte?
;; to: byte?
(define (bytes-replace bytes from to)
  (define len (bytes-length bytes))
  (define (has-from? i)
    (cond [(= i len) #f]
          [(= from (bytes-ref bytes i)) #t]
          [else (has-from? (+ i 1))]))
  (cond [(has-from? 0)
         (define result (make-bytes len))
         (for ([i (in-range len)]
               [b (in-bytes bytes)])
              (if (= b from)
                  (bytes-set! result i to)
                  (bytes-set! result i b)))
         result]
        [else bytes]))

;; Split a byte string into a list of byte strings based on the specified sep byte.
;; NOTE: Only handles case of a single separator byte
(define (bytes-split bytes sep)
  (define len (bytes-length bytes))
  (define (helper beg end result)
    (cond
     ; Begin is beyond end of byte string
     [(= beg len)
      (if (= sep (bytes-ref bytes (- len 1)))
          (reverse (cons #"" result))
          (reverse result))]
     ; End is beyond end of byte string
     [(= end len) (reverse (cons (subbytes bytes beg end) result))]
     ; End is at sep
     [(= sep (bytes-ref bytes end))
      (let ([i (+ end 1)])
        (helper i i (cons (subbytes bytes beg end) result)))]
     ; Increment end
     [else (helper beg (+ end 1) result)]))
  (if (> len 0)
      (helper 0 0 '())
      (list bytes)))

;; Returns a new string where runs of the same character are replaced
;; by a single character.
;; (string-squeeze "abbaacdccee") -> "abacdce"
(define (bytes-squeeze bytes)
  (define result (make-bytes (bytes-length bytes)))
  (define last null)
  (define i -1)
  (for ([c (in-bytes bytes)])
    (when (not (eqv? c last))
      (set! i (+ i 1))
      (set! last c)
      (bytes-set! result i c)))
  (subbytes result 0 (+ i 1)))

;; Trims leading/trailing bytes that are <= <space> or > <max ascii>
(define (bytes-trim bytes)
  (define len (bytes-length bytes))
  (let left ([i 0])
    (if (< i len)
        (let ([left-byte (bytes-ref bytes i)])
          (if (or (<= left-byte space)
                  (> left-byte max-ascii))
              (left (+ i 1))
              (let right ([j (- len 1)])
                (if (>= j i)
                    (let ([right-byte (bytes-ref bytes j)])
                      (if (or (<= right-byte space)
                              (> right-byte max-ascii))
                          (right (- j 1))
                          (subbytes bytes i (+ j 1))))
                    #""))))
        #"")))

;; Indicate whether the argument is a byte string and is not empty
(define (non-empty-bytes? bytes)
  (and (bytes? bytes)
       (> (bytes-length bytes) 0)))

(provide byte-numeric? bytes-car bytes-cdr bytes-delete bytes-delete-pred bytes-replace bytes-split bytes-squeeze bytes-trim max-ascii non-empty-bytes? space)

## parser.rkt
#lang racket

(require "./byte-string-support.rkt")
(require "./soundex.rkt")
;(require profile)

(define backslash (char->integer #\\))
(define comma     (char->integer #\,))
(define zero      (char->integer #\0))

(define (parse-file ipath epath apath)
  (call-with-input-file ipath
    (λ (i-in)
      (call-with-output-file epath
        (λ (e-out)
          (call-with-output-file apath
            (λ (a-out)
              (parse-records i-in e-out a-out))
            #:exists 'replace))
          #:exists 'replace))))

(define (parse-records i-in e-out a-out)
  (define num-deletes 0)
  (define num-cases 0)
  (define num-aliases 0)

  ;; Extract header
  (fprintf e-out
           "~a\n"
           (string-append
            "copy extracts (case_key,county_name,last_name,last_name_soundex,first_name,"
            "first_name_soundex,middle_name,middle_name_soundex,suffix,addr_street1,addr_street2,"
            "addr_city,addr_state,addr_zip5,addr_zip4,race,sex,birth_year,birth_month,birth_day,"
            "last_4_ssn,driver_license,driver_license_state,citation_number,added_to_acis_date,"
            "extract_date,personal_identifier,case_disposed,court_type) from stdin;"))

  ;; SubjectAlias header
  (fprintf a-out
           "~a\n"
           (string-append
            "copy subject_aliases (case_key,line_number,last_name,last_name_soundex,first_name,"
            "first_name_soundex,middle_name,suffix) from stdin;"))

  ;; Main loop
  (for ([line-no (in-naturals 1)]
        [line (in-bytes-lines i-in)])
    (define record-type (subbytes line 0 2))

    (match record-type
      [#"00" null ]
      [#"01" (parse-case e-out line-no line)
            (set! num-cases (+ num-cases 1)) ]
      [#"02" (parse-alias a-out line-no line)
            (set! num-aliases (+ num-aliases 1)) ]
      [#"99" null ]
      [_ (log-error parse-records line-no (format "invalid record type: ~a" record-type)) ]))

  (fprintf e-out "\\.\n")
  (fprintf a-out "\\.\n"))

(define (parse-alias a-out line-no line)
  ;; Only use 27 because the '@' character is in the last position!
  (define-values (last-name first-name middle-name suffix) (parse-name (subbytes line 20 47)))
  (define last-name-soundex (if last-name (soundex last-name) #f))
  (define first-name-soundex (if first-name (soundex first-name) #f))

  (fprintf a-out "~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\n"
           (opt-field (parse-case-key line))
           (opt-field (parse-alias-line (subbytes line 18 20)))
           (opt-field last-name)
           (opt-field last-name-soundex)
           (opt-field first-name)
           (opt-field first-name-soundex)
           (opt-field middle-name)
           (opt-field suffix)))

(define (parse-case e-out line-no line)
  (define-values (last-name first-name middle-name suffix) (parse-name (subbytes line 30 58)))
  (define last-name-soundex (if last-name (soundex last-name) #f))
  (define first-name-soundex (if first-name (soundex first-name) #f))

  (fprintf e-out "~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\n"
           (opt-field (parse-case-key line))
           (opt-field (parse-bytes line 18 30)) ; county name
           (opt-field last-name)
           (opt-field last-name-soundex)
           (opt-field first-name)
           (opt-field first-name-soundex)
           (opt-field middle-name)
           (opt-field suffix)
           (opt-field (parse-bytes line 58 78))   ; addr_street1
           (opt-field (parse-bytes line 78 93))   ; addr_street2
           (opt-field (parse-bytes line 93 108))  ; city
           (opt-field (parse-bytes line 108 110)) ; state
           (opt-field (parse-bytes line 110 115)) ; zip5
           (opt-field (parse-bytes line 115 119)) ; zip4
           (opt-field (parse-bytes line 119 120)) ; race
           (opt-field (parse-bytes line 120 121)) ; sex
           (opt-field (parse-bytes line 121 125)) ; birth_year
           (opt-field (parse-bytes line 125 127)) ; birth_month
           (opt-field (parse-bytes line 127 129)) ; birth_day
           (opt-field (parse-bytes line 129 133)) ; last_4_ssn
           (opt-field (parse-bytes line 133 153)) ; driver_license
           (opt-field (parse-bytes line 153 155)) ; driver_license_state
           (opt-field (parse-bytes line 155 163)) ; citation_number
           (opt-field (parse-bytes line 163 171)) ; added_to_acis_date
           (opt-field (parse-bytes line 171 179)) ; extract_date
           (opt-field (parse-bytes line 179 189)) ; personal_identifier
           (opt-field (parse-bytes line 189 190)) ; case_disposed
           (opt-field (parse-bytes line 190 193)))) ; court_type

(define (parse-alias-line bytes)
  (if (= zero (bytes-ref bytes 0))
      (subbytes bytes 1 2)
      bytes))

(define (parse-case-key line)
  (bytes-trim (subbytes line 2 18)))

(define (parse-name bytes)
  (define tokens (bytes-split (bytes-delete bytes backslash) comma))
  (define len (length tokens))

  (values
   (and (> len 0) (bytes-trim (car tokens)))
   (and (> len 1) (bytes-trim (cadr tokens)))
   (and (> len 2) (bytes-trim (caddr tokens)))
   (and (> len 3) (bytes-trim (cadddr tokens)))))

(define (opt-field bytes)
  (if (and bytes (> (bytes-length bytes) 0))
      bytes
      #"\\N"))

(define (parse-bytes line beg end)
  (bytes-trim (bytes-delete (subbytes line beg end) backslash)))

(define (log-error proc-sym line-no str)
  (printf "*error* Parser:~a line ~a: ~a" (object-name proc-sym) line-no str))

(define (run)
  (parse-file "cr-4000"
              "e1-file"
              "a1-file"))

;(profile-thunk run #:use-errortrace? #t)
(time (run))

## soundex.rkt
#lang racket

(require "./byte-string-support.rkt")

;; --------------------------------------------------------------------------------------------
;; Public Interface
;; --------------------------------------------------------------------------------------------

;; Convert a byte string to a soundex code. If the input contains no
;; alphabetic bytes, return #f
(define (soundex original)
  (define clean (bytes-delete-pred original (λ (b) (or (< b a-code)
                                                       (> b z-code)))))

  (if (non-empty-bytes? clean)
      (soundex-inner clean)
      #f))

;; --------------------------------------------------------------------------------------------
;; Private
;; --------------------------------------------------------------------------------------------

(define a-code (char->integer #\A))
(define e-code (char->integer #\E))
(define h-code (char->integer #\H))
(define i-code (char->integer #\I))
(define o-code (char->integer #\O))
(define u-code (char->integer #\U))
(define w-code (char->integer #\W))
(define y-code (char->integer #\Y))
(define z-code (char->integer #\Z))

(define num-letters (+ (- z-code a-code) 1))

(define soundex-chars #"ABCDEFGHIJKLMNOPQRSTUVWXYZ")
(define soundex-codes #"A123E12HI22455O12623U1W2Y2")

(define soundex-code-length 4)

(define (soundex-inner original)
  ;; 1. Save the first letter.
  (define first_letter (bytes-car original))

  ;; 2. Remove all occurrences of 'H' and 'W' EXCEPT first letter.
  (define str (bytes-append
                (subbytes original 0 1)
                (bytes-delete-pred
                 (if (> (bytes-length original) 1)
                     (bytes-cdr original)
                     #"")
                 (λ (b) (or (= b h-code) (= b w-code))))))

  ;; 3. Replace all consonants (including the first letter) with digits as follows:
  ;;    b, f, p, v             -> 1
  ;;    c, g, j, k, q, s, x, z -> 2
  ;;    d, t                   -> 3
  ;;    l                      -> 4
  ;;    m, n                   -> 5
  ;;    r                      -> 6
  (set! str (bytes-tr str tr-map))

  ;; 4. Replace all adjacent same digits with one digit.
  (set! str (bytes-squeeze str))

  ;; 5. Remove all occurrences of A, E, I, O, U, Y EXCEPT the first letter.
  (set! str (let ([len (bytes-length str)])
              (bytes-append
               (subbytes str 0 1)
               (bytes-delete-pred
                (if (> len 1)
                    (subbytes str 1 len)
                    #"")
                (λ (b) (or (= b a-code)
                           (= b e-code)
                           (= b i-code)
                           (= b o-code)
                           (= b u-code)
                           (= b y-code)))))))

  ;; 6. If first symbol is a digit replace it with letter saved on step 1.
  (when (byte-numeric? (bytes-ref str 0))
    (bytes-set! str 0 first_letter))


  ;; 7. Append zeros or truncate so that the result is a letter followed by 3 digits.
  (let ([len (bytes-length str)])
    (cond [(< len soundex-code-length) (subbytes (bytes-append str #"000") 0 soundex-code-length)]
          [(> len soundex-code-length) (subbytes str 0 soundex-code-length)]
          [else str])))

;; Create a translation map to convert letters to soundex digits.
(define tr-map
  (let ([base a-code]
        [v (make-vector num-letters)])
    (for ([c1 (in-bytes soundex-chars)]
          [c2 (in-bytes soundex-codes)])
      (vector-set! v (- c1 base) c2))
    v))

;; Translate bytes in a byte string based on a translation map
(define (bytes-tr bytes map)
  (define result (make-bytes (bytes-length bytes)))
  (for ([c (in-bytes bytes)]
        [i (in-naturals)])
    (let ([k (- c a-code)])
      (if (<= 0 k (- num-letters 1))
          (bytes-set! result i (vector-ref map k))
          (bytes-set! result i c))))
  result)

(provide soundex)
	#lang racket

	(define space (char->integer #\space))
	(define zero (char->integer #\0))
	(define nine (char->integer #\9))
	(define max-ascii 127)

	;; Indicate wither the byte is in [ 0 .. 9 ]
	(define (byte-numeric? b)
	(and (>= b zero)
	(<= b nine)))

	;; Return the first byte of a byte string
	(define (bytes-car bytes)
	(bytes-ref bytes 0))

	;; Return all but the first byte of a byte string
	(define (bytes-cdr bytes)
	(subbytes bytes 1))

	;; Return a byte string with each occurrence removed
	;; val: byte?
	(define (bytes-delete bytes val)
	(define len (bytes-length bytes))
	(define (has-val? i)
	(cond [(= i len) #f]
	[(= val (bytes-ref bytes i)) #t]
	[else (has-val? (+ i 1))]))
	(if (> len 0)
	(cond [(has-val? 0)
	(define result (make-bytes len))
	(define dst 0)
	(for ([b (in-bytes bytes)])
	(cond [(= b val) '()]
	[else (bytes-set! result dst b)
	(set! dst (+ dst 1))]))
	(subbytes result 0 dst)]
	[else bytes])
	bytes))

	;; Return a byte string with each byte for which pred returns true removed
	;; pred: procedure?
	(define (bytes-delete-pred bytes pred)
	(define len (bytes-length bytes))
	(define (has-val? i)
	(cond [(= i len) #f]
	[(pred (bytes-ref bytes i)) #t]
	[else (has-val? (+ i 1))]))
	(if (> len 0)
	(cond [(has-val? 0)
	(define result (make-bytes len))
	(define dst 0)
	(for ([b (in-bytes bytes)])
	(cond [(pred b) '()]
	[else (bytes-set! result dst b)
	(set! dst (+ dst 1))]))
	(subbytes result 0 dst)]
	[else bytes])
	bytes))

	;; Return a byte string with each occurrence of "from" replaced by "to"
	;; from: byte?
	;; to: byte?
	(define (bytes-replace bytes from to)
	(define len (bytes-length bytes))
	(define (has-from? i)
	(cond [(= i len) #f]
	[(= from (bytes-ref bytes i)) #t]
	[else (has-from? (+ i 1))]))
	(cond [(has-from? 0)
	(define result (make-bytes len))
	(for ([i (in-range len)]
	[b (in-bytes bytes)])
	(if (= b from)
	(bytes-set! result i to)
	(bytes-set! result i b)))
	result]
	[else bytes]))

	;; Split a byte string into a list of byte strings based on the specified sep byte.
	;; NOTE: Only handles case of a single separator byte
	(define (bytes-split bytes sep)
	(define len (bytes-length bytes))
	(define (helper beg end result)
	(cond
	; Begin is beyond end of byte string
	[(= beg len)
	(if (= sep (bytes-ref bytes (- len 1)))
	(reverse (cons #"" result))
	(reverse result))]
	; End is beyond end of byte string
	[(= end len) (reverse (cons (subbytes bytes beg end) result))]
	; End is at sep
	[(= sep (bytes-ref bytes end))
	(let ([i (+ end 1)])
	(helper i i (cons (subbytes bytes beg end) result)))]
	; Increment end
	[else (helper beg (+ end 1) result)]))
	(if (> len 0)
	(helper 0 0 '())
	(list bytes)))

	;; Returns a new string where runs of the same character are replaced
	;; by a single character.
	;; (string-squeeze "abbaacdccee") -> "abacdce"
	(define (bytes-squeeze bytes)
	(define result (make-bytes (bytes-length bytes)))
	(define last null)
	(define i -1)
	(for ([c (in-bytes bytes)])
	(when (not (eqv? c last))
	(set! i (+ i 1))
	(set! last c)
	(bytes-set! result i c)))
	(subbytes result 0 (+ i 1)))

	;; Trims leading/trailing bytes that are <= <space> or > <max ascii>
	(define (bytes-trim bytes)
	(define len (bytes-length bytes))
	(let left ([i 0])
	(if (< i len)
	(let ([left-byte (bytes-ref bytes i)])
	(if (or (<= left-byte space)
	(> left-byte max-ascii))
	(left (+ i 1))
	(let right ([j (- len 1)])
	(if (>= j i)
	(let ([right-byte (bytes-ref bytes j)])
	(if (or (<= right-byte space)
	(> right-byte max-ascii))
	(right (- j 1))
	(subbytes bytes i (+ j 1))))
	#""))))
	#"")))

	;; Indicate whether the argument is a byte string and is not empty
	(define (non-empty-bytes? bytes)
	(and (bytes? bytes)
	(> (bytes-length bytes) 0)))

	(provide byte-numeric? bytes-car bytes-cdr bytes-delete bytes-delete-pred bytes-replace bytes-split bytes-squeeze bytes-trim max-ascii non-empty-bytes? space)
	#lang racket

	(require "./byte-string-support.rkt")
	(require "./soundex.rkt")
	;(require profile)

	(define backslash (char->integer #\\))
	(define comma (char->integer #\,))
	(define zero (char->integer #\0))

	(define (parse-file ipath epath apath)
	(call-with-input-file ipath
	(λ (i-in)
	(call-with-output-file epath
	(λ (e-out)
	(call-with-output-file apath
	(λ (a-out)
	(parse-records i-in e-out a-out))
	#:exists 'replace))
	#:exists 'replace))))

	(define (parse-records i-in e-out a-out)
	(define num-deletes 0)
	(define num-cases 0)
	(define num-aliases 0)

	;; Extract header
	(fprintf e-out
	"~a\n"
	(string-append
	"copy extracts (case_key,county_name,last_name,last_name_soundex,first_name,"
	"first_name_soundex,middle_name,middle_name_soundex,suffix,addr_street1,addr_street2,"
	"addr_city,addr_state,addr_zip5,addr_zip4,race,sex,birth_year,birth_month,birth_day,"
	"last_4_ssn,driver_license,driver_license_state,citation_number,added_to_acis_date,"
	"extract_date,personal_identifier,case_disposed,court_type) from stdin;"))

	;; SubjectAlias header
	(fprintf a-out
	"~a\n"
	(string-append
	"copy subject_aliases (case_key,line_number,last_name,last_name_soundex,first_name,"
	"first_name_soundex,middle_name,suffix) from stdin;"))

	;; Main loop
	(for ([line-no (in-naturals 1)]
	[line (in-bytes-lines i-in)])
	(define record-type (subbytes line 0 2))

	(match record-type
	[#"00" null ]
	[#"01" (parse-case e-out line-no line)
	(set! num-cases (+ num-cases 1)) ]
	[#"02" (parse-alias a-out line-no line)
	(set! num-aliases (+ num-aliases 1)) ]
	[#"99" null ]
	[_ (log-error parse-records line-no (format "invalid record type: ~a" record-type)) ]))

	(fprintf e-out "\\.\n")
	(fprintf a-out "\\.\n"))

	(define (parse-alias a-out line-no line)
	;; Only use 27 because the '@' character is in the last position!
	(define-values (last-name first-name middle-name suffix) (parse-name (subbytes line 20 47)))
	(define last-name-soundex (if last-name (soundex last-name) #f))
	(define first-name-soundex (if first-name (soundex first-name) #f))

	(fprintf a-out "~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\n"
	(opt-field (parse-case-key line))
	(opt-field (parse-alias-line (subbytes line 18 20)))
	(opt-field last-name)
	(opt-field last-name-soundex)
	(opt-field first-name)
	(opt-field first-name-soundex)
	(opt-field middle-name)
	(opt-field suffix)))

	(define (parse-case e-out line-no line)
	(define-values (last-name first-name middle-name suffix) (parse-name (subbytes line 30 58)))
	(define last-name-soundex (if last-name (soundex last-name) #f))
	(define first-name-soundex (if first-name (soundex first-name) #f))

	(fprintf e-out "~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\t~a\n"
	(opt-field (parse-case-key line))
	(opt-field (parse-bytes line 18 30)) ; county name
	(opt-field last-name)
	(opt-field last-name-soundex)
	(opt-field first-name)
	(opt-field first-name-soundex)
	(opt-field middle-name)
	(opt-field suffix)
	(opt-field (parse-bytes line 58 78)) ; addr_street1
	(opt-field (parse-bytes line 78 93)) ; addr_street2
	(opt-field (parse-bytes line 93 108)) ; city
	(opt-field (parse-bytes line 108 110)) ; state
	(opt-field (parse-bytes line 110 115)) ; zip5
	(opt-field (parse-bytes line 115 119)) ; zip4
	(opt-field (parse-bytes line 119 120)) ; race
	(opt-field (parse-bytes line 120 121)) ; sex
	(opt-field (parse-bytes line 121 125)) ; birth_year
	(opt-field (parse-bytes line 125 127)) ; birth_month
	(opt-field (parse-bytes line 127 129)) ; birth_day
	(opt-field (parse-bytes line 129 133)) ; last_4_ssn
	(opt-field (parse-bytes line 133 153)) ; driver_license
	(opt-field (parse-bytes line 153 155)) ; driver_license_state
	(opt-field (parse-bytes line 155 163)) ; citation_number
	(opt-field (parse-bytes line 163 171)) ; added_to_acis_date
	(opt-field (parse-bytes line 171 179)) ; extract_date
	(opt-field (parse-bytes line 179 189)) ; personal_identifier
	(opt-field (parse-bytes line 189 190)) ; case_disposed
	(opt-field (parse-bytes line 190 193)))) ; court_type

	(define (parse-alias-line bytes)
	(if (= zero (bytes-ref bytes 0))
	(subbytes bytes 1 2)
	bytes))

	(define (parse-case-key line)
	(bytes-trim (subbytes line 2 18)))

	(define (parse-name bytes)
	(define tokens (bytes-split (bytes-delete bytes backslash) comma))
	(define len (length tokens))

	(values
	(and (> len 0) (bytes-trim (car tokens)))
	(and (> len 1) (bytes-trim (cadr tokens)))
	(and (> len 2) (bytes-trim (caddr tokens)))
	(and (> len 3) (bytes-trim (cadddr tokens)))))

	(define (opt-field bytes)
	(if (and bytes (> (bytes-length bytes) 0))
	bytes
	#"\\N"))

	(define (parse-bytes line beg end)
	(bytes-trim (bytes-delete (subbytes line beg end) backslash)))

	(define (log-error proc-sym line-no str)
	(printf "error Parser:~a line ~a: ~a" (object-name proc-sym) line-no str))

	(define (run)
	(parse-file "cr-4000"
	"e1-file"
	"a1-file"))

	;(profile-thunk run #:use-errortrace? #t)
	(time (run))
	#lang racket

	(require "./byte-string-support.rkt")

	;; --------------------------------------------------------------------------------------------
	;; Public Interface
	;; --------------------------------------------------------------------------------------------

	;; Convert a byte string to a soundex code. If the input contains no
	;; alphabetic bytes, return #f
	(define (soundex original)
	(define clean (bytes-delete-pred original (λ (b) (or (< b a-code)
	(> b z-code)))))

	(if (non-empty-bytes? clean)
	(soundex-inner clean)
	#f))

	;; --------------------------------------------------------------------------------------------
	;; Private
	;; --------------------------------------------------------------------------------------------

	(define a-code (char->integer #\A))
	(define e-code (char->integer #\E))
	(define h-code (char->integer #\H))
	(define i-code (char->integer #\I))
	(define o-code (char->integer #\O))
	(define u-code (char->integer #\U))
	(define w-code (char->integer #\W))
	(define y-code (char->integer #\Y))
	(define z-code (char->integer #\Z))

	(define num-letters (+ (- z-code a-code) 1))

	(define soundex-chars #"ABCDEFGHIJKLMNOPQRSTUVWXYZ")
	(define soundex-codes #"A123E12HI22455O12623U1W2Y2")

	(define soundex-code-length 4)

	(define (soundex-inner original)
	;; 1. Save the first letter.
	(define first_letter (bytes-car original))

	;; 2. Remove all occurrences of 'H' and 'W' EXCEPT first letter.
	(define str (bytes-append
	(subbytes original 0 1)
	(bytes-delete-pred
	(if (> (bytes-length original) 1)
	(bytes-cdr original)
	#"")
	(λ (b) (or (= b h-code) (= b w-code))))))

	;; 3. Replace all consonants (including the first letter) with digits as follows:
	;; b, f, p, v -> 1
	;; c, g, j, k, q, s, x, z -> 2
	;; d, t -> 3
	;; l -> 4
	;; m, n -> 5
	;; r -> 6
	(set! str (bytes-tr str tr-map))

	;; 4. Replace all adjacent same digits with one digit.
	(set! str (bytes-squeeze str))

	;; 5. Remove all occurrences of A, E, I, O, U, Y EXCEPT the first letter.
	(set! str (let ([len (bytes-length str)])
	(bytes-append
	(subbytes str 0 1)
	(bytes-delete-pred
	(if (> len 1)
	(subbytes str 1 len)
	#"")
	(λ (b) (or (= b a-code)
	(= b e-code)
	(= b i-code)
	(= b o-code)
	(= b u-code)
	(= b y-code)))))))

	;; 6. If first symbol is a digit replace it with letter saved on step 1.
	(when (byte-numeric? (bytes-ref str 0))
	(bytes-set! str 0 first_letter))


	;; 7. Append zeros or truncate so that the result is a letter followed by 3 digits.
	(let ([len (bytes-length str)])
	(cond [(< len soundex-code-length) (subbytes (bytes-append str #"000") 0 soundex-code-length)]
	[(> len soundex-code-length) (subbytes str 0 soundex-code-length)]
	[else str])))

	;; Create a translation map to convert letters to soundex digits.
	(define tr-map
	(let ([base a-code]
	[v (make-vector num-letters)])
	(for ([c1 (in-bytes soundex-chars)]
	[c2 (in-bytes soundex-codes)])
	(vector-set! v (- c1 base) c2))
	v))

	;; Translate bytes in a byte string based on a translation map
	(define (bytes-tr bytes map)
	(define result (make-bytes (bytes-length bytes)))
	(for ([c (in-bytes bytes)]
	[i (in-naturals)])
	(let ([k (- c a-code)])
	(if (<= 0 k (- num-letters 1))
	(bytes-set! result i (vector-ref map k))
	(bytes-set! result i c))))
	result)

	(provide soundex)