Skip to content

Instantly share code, notes, and snippets.

@ideamonk
Created September 6, 2012 22:50
Show Gist options
  • Save ideamonk/3661015 to your computer and use it in GitHub Desktop.
Save ideamonk/3661015 to your computer and use it in GitHub Desktop.
Parsing English
;;; Author: Abhishek Mishra <ideamonk@gmail.com>
; ; http://www.reddit.com/r/dailyprogrammer/comments/zfeb2/9062012_challenge_96_intermediate_parsing_english/
; ; seven hundred and fourty-four million
; (-> 7 (* 100) (+ 40 4) (* 1000000) )
; ; ten-million and ninety-four
; (-> 10 (* 1000000) (+ 90 4) )
; ; One-Thousand and Thirty-Four
; (-> 1 (* 1000) (+ 30 4) )
; ; Two -Billion and One-Hundred-Fourty-Five-Thousand
; (-> 2 (* 1000000000) (+ (-> 1 (* 100) (+ 40 5) (* 1000) ) ) )
; (-> 2 (* 1000000000) (+ (-> 1 (* 100) (+ 40) (+ 5) (* 1000))))
; ; eleven-thousand and eight-hundred
; (-> 11 (* 1000) (+ (-> 8 (* 100)) ) )
; ; three-thousand and twenty
; (-> 3 (* 1000) (+ (-> 20)))
; assuming multipliers have locality indicated with '-'
; e.g. hundred and twenty-thousand means: 100 and 20,000
; hundred and twenty thousand means: (100 and 20) thousands
(def mapping {
:zero " (+ 0) "
:one " (+ 1) "
:two " (+ 2) "
:three " (+ 3) "
:four " (+ 4) "
:five " (+ 5) "
:six " (+ 6) "
:seven " (+ 7) "
:eight " (+ 8) "
:nine " (+ 9) "
:ten " (+ 10) "
:eleven " (+ 11) "
:twelve " (+ 12) "
:thirteen " (+ 13) "
:fourteen " (+ 14) "
:fifteen " (+ 15) "
:sixteen " (+ 16) "
:seventeen " (+ 17) "
:eighteen " (+ 18) "
:nineteen " (+ 19) "
:twenty " (+ 20) "
:thirty " (+ 30) "
:forty " (+ 40) "
:fifty " (+ 50) "
:sixty " (+ 60) "
:seventy " (+ 70) "
:eighty " (+ 80) "
:ninety " (+ 90) "
; multipliers -
:hundred " (* 100) "
:thousand " (* 1000) "
:million " (* 1000000) "
:billion " (* 1000000000) "
:trillion " (* 1000000000000) "
})
(defn parse [l]
(let [f (first l)]
(cond
(= f "and") (concat ["(+ "] (parse [(first (rest l))]) [") "] (parse (rest (rest l))) )
(not (empty? l)) (concat ["(-> "] (map str (map mapping (map keyword (re-seq #"\w+" f)))) (parse (rest l)) [")"])
)))
(defn gimme-number [s]
(load-string (apply str (parse (re-seq #"[a-z-]+" (.toLowerCase s))))))
(gimme-number "Two-Million and One-Hundred-Forty-Five Thousand")
; 2000145000
(gimme-number "Forty-Five-Hundred")
; 4500
(gimme-number "Forty-Five-Hundred and ninety-eight")
; 4598
(gimme-number "three-thousand and twenty")
; 3020
(gimme-number "One-Thousand and Thirty-Four")
; 1034
(gimme-number "Ten-Million and Ninety-Four")
; 10000094
(gimme-number "Seven-Hundred and Forty-Four Million")
; 744000000
(gimme-number "Five-hundred and fifty-five million and four-hundred and forty thousand and twenty-five")
;555000440025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment