Skip to content

Instantly share code, notes, and snippets.

@okayu3
Last active August 29, 2015 13:58
Show Gist options
  • Save okayu3/10016261 to your computer and use it in GitHub Desktop.
Save okayu3/10016261 to your computer and use it in GitHub Desktop.
#xyzzy で 漢数字を数字に直す。
;;* 漢数字パッケージ
;; 漢数字→算用数字へ変換します。
;; (calc-onthespot-kansuji "千百八十三万百廿一")
;;→ 11830121
;; (calc-onthespot-kansuji-exchange "4/0.8万*三百八十七")
;;→ "4/8000.0*387"
;; (calc-onthespot-kansuji-exchange "国会法第壱百弐拾壱条の二第一項の規定")
;;→ "国会法第121条の2第1項の規定" ←文中の任意の漢数字を算用数字に変換します。
;;
;;
;;** 改修/改造するあなたに
;;アルゴリズムは次のようになっています。
;;
;; 1.「〇一二...九」などの1文字で数を表す漢字については、文字変換しておきます。
;; → calc-onthespot-cnv-kanjinum
;; 2. ここで全角数字、カンマ・ピリオドなどを半角に変換します。
;; → map-to-half-width-string ascii:t
;; 3. トークンに分解します。[calc-onthespot-kansuji-token]
;; 3-1. ここに入ってきている段階で、半角算用数字+カンマ・ピリオド+ケタ数詞(十百千万億兆京垓)のみになっているはずです。
;; 知らない文字が入っていたらここではじきます。
;; 3-2. 「十百千」の前に数字がない場合に "1" を補填します。
;; 3-3. ケタ数詞 と その間の数字に分解します。
;; 4. トークン列の前から処理します。[calc-onthespot-kansuji]
;; 4-1. 算用数字 は ワークAにセットします。
;; 4-2. ケタ数詞が「十,百,千」なら、ワークAに対して ケタ数詞に対応する数を掛け、ワークBに加算。ワークAは0にします。
;; 4-3. ケタ数詞が「万,億,兆,..」なら、ワークBに対して ケタ数詞に対応する数を掛け、答えに加算。ワークA,Bは0にします。
;; 4-4. トークン列処理が済んでから、残っているワークA,Bの値を答えに足します。
;;
;;
;;** 注意点
;; a1. calc-modeと計算や記法を合せるために、計算時、数値文字列作成時には必ず
;; *read-default-float-format* を 'double-float にセットします。
;; a2. 10^24 じょ は、正しくは「杼」の字ではなく、この文字のノギヘンの文字です。
;;
(defvar *calc-onthespot-numeration-class* nil)
(defvar *calc-onthespot-numeration-class-none* nil)
(defvar *calc-onthespot-numeration-regex-any* nil)
(let* ((num-names "十百千万億兆京垓杼穣溝澗正載極")
(num-names2 "\\(?:恒河沙\\|阿僧祇\\|那由他\\|不可思議\\|無量大数\\)")
(cls-nms (concat "\\(?:[" num-names "]\\|" num-names2 "\\)"))
(cls-full (concat "\\(?:[0-9.," num-names "]\\|" num-names2 "\\)"))
(cls-dn (concat "\\(?:[0-9" num-names "]\\|" num-names2 "\\)"))
(cls-digit0 "[0-9]")
(cls-digit1 "[0-9.]")
(cls-digit2 "[0-9.,]")
)
(setq *calc-onthespot-numeration-class* cls-nms)
;; (setq *calc-onthespot-numeration-class* (concat "[" num-names "]"))
;; (setq *calc-onthespot-numeration-class-none* (concat "[^0-9,." num-names "]"))
;; (setq *calc-onthespot-numeration-regex-any* (concat "[0-9,.]*[" num-names "]+[0-9,." num-names "]*"))
;; (setq *calc-onthespot-numeration-regex-any* (concat "[" num-names "][0-9.," num-names "]*[0-9" num-names "]\\|[0-9.][0-9.,]*[" num-names "][0-9.," num-names "]*[0-9]\\|[0-9.][0-9.," num-names "]*[" num-names "]\\|[" num-names "]"))
(setq *calc-onthespot-numeration-regex-any* (concat
(concat cls-nms cls-full "*" cls-dn) "\\|"
(concat cls-digit1 cls-digit2 "*" cls-nms cls-full "*" cls-digit0) "\\|"
(concat cls-digit1 cls-full "*" cls-nms) "\\|"
(concat cls-nms)))
;; [百][0-9.,百]*[0-9百]
;; [0-9.][0-9.,]*[百][0-9.,百]*[0-9]
;; [0-9.][0-9.,百]*[百]
;; [百]
)
(defvar *calc-onthespot-kansuji-alist* nil)
(setq *calc-onthespot-kansuji-alist*
(list (list "十" . (1 10))
(list "百" . (1 100))
(list "千" . (1 1000))
(list "万" . (2 10000))
(list "億" . (2 (expt 10 8)))
(list "兆" . (2 (expt 10 12)))
(list "京" . (2 (expt 10 16)))
(list "垓" . (2 (expt 10 20)))
(list "杼" . (2 (expt 10 24)))
(list "穣" . (2 (expt 10 28)))
(list "溝" . (2 (expt 10 32)))
(list "澗" . (2 (expt 10 36)))
(list "正" . (2 (expt 10 40)))
(list "載" . (2 (expt 10 44)))
(list "極" . (2 (expt 10 48)))
(list "恒河沙" . (2 (expt 10 52)))
(list "阿僧祇" . (2 (expt 10 56)))
(list "那由他" . (2 (expt 10 60)))
(list "不可思議" . (2 (expt 10 64)))
(list "無量大数" . (2 (expt 10 68)))
))
(defun calc-onthespot-cnv-kanjinum (str)
(let ((lst-cnv
(list '("〇" "0")
'("一" "1")
'("二" "2")
'("三" "3")
'("四" "4")
'("五" "5")
'("六" "6")
'("七" "7")
'("八" "8")
'("九" "9")
'("壱" "1")
'("弐" "2")
'("参" "3")
'("伍" "5")
'("拾" "十")
'("廿" "2十")
'("萬" "万")
)))
(dolist (rule lst-cnv)
(setf str (substitute-string str (car rule) (cadr rule))))
str
))
;;(calc-onthespot-kansuji-token "千百八十三万百十一")
;;→("1" "千" "1" "百" "8" "十" "3" "万" "1" "百" "1" "十" "1")
(defun calc-onthespot-kansuji-token (str)
(let* (loc
(n-cls *calc-onthespot-numeration-class*)
(lst-tokens '())
(regex0 (concat "\\(" n-cls "\\)\\([十百千]\\)"))
(regex (concat "[0-9.,]+\\|" n-cls)))
(setf str (calc-onthespot-cnv-kanjinum str))
(setf str (map-to-half-width-string str :ascii t))
;; (if (string-match *calc-onthespot-numeration-class-none* str)
;; (error "Syntax error: ~c" str))
(if (not (calc-onthespot-kansuji-check-syntax str))
(error "Syntax error: ~c" str))
(if (string-match "^[十百千]" str) (setf str (concat "1" str)))
(setf loc -1)
(while (setf loc (string-match regex0 str (+ loc 1)))
(let ((matched (match-string 2)))
(setf str (substitute-string str matched (concat "1" matched) :start (+ loc 1) :end (+ 1 (+ loc (length matched)))))))
(setf loc 0)
(let ((matched nil))
(while (setf loc (string-match regex str (+ loc (length matched))))
(setf matched (match-string 0))
(push matched lst-tokens)))
(reverse lst-tokens)
))
(defun calc-onthespot-kansuji-check-syntax (str)
(string-match (concat "[0-9.,]+\\|" *calc-onthespot-numeration-regex-any*) str)
(equal (match-string 0) str)
)
;;漢数字→数値
(defun calc-onthespot-kansuji (str)
(let ((lst-token (calc-onthespot-kansuji-token str))
(*read-default-float-format* 'double-float)
(ans 0)
(wk-unit 0)
(wk-pre 0)
)
(if lst-token
(progn
(dolist (token lst-token)
(let ((rule (cdr (assoc token *calc-onthespot-kansuji-alist* :test #'equal))))
(cond ((and rule (= 1 (car rule)))
(setf wk-unit (+ wk-unit (* wk-pre (cadr rule))))
(setf wk-pre 0))
((and rule (= 2 (car rule)))
(setf wk-unit (+ wk-unit wk-pre))
(setf ans (+ ans (* wk-unit (cadr rule))))
(setf wk-unit 0)
(setf wk-pre 0))
((and (not rule)
(string-match "^[0-9\\.,]+$" token))
(setf token (substitute-string token "," ""))
(setf wk-pre (read-from-string token)))
(t
(error "Syntax error: ~c" token)))))
(setf ans (+ ans (+ wk-unit wk-pre)))
))
ans))
;;文字列中の漢数字を全て変換
(defun calc-onthespot-kansuji-exchange (calc-input)
(let ((str (map-to-half-width-string (calc-onthespot-cnv-kanjinum calc-input) :ascii t)))
(if (string-match *calc-onthespot-numeration-class* str)
(let (kansuji-num
kansuji-dsc
(*read-default-float-format* 'double-float)
(matched nil)
(regex *calc-onthespot-numeration-regex-any*))
(while (string-match regex str)
(setf matched (match-string 0))
(setf kansuji-num (calc-onthespot-kansuji matched))
(setf kansuji-dsc (format nil "~A" kansuji-num))
(setf str (substitute-string str (regexp-quote matched) kansuji-dsc)))
))
str)
)
;; (defun calc-onthespot-kansuji-0 (str)
;; (let ((lst-token (calc-onthespot-kansuji-token str))
;; (ans 0)
;; (wk-unit 0)
;; (wk-pre 0)
;; )
;; (if (not (calc-onthespot-kansuji-recognizable str))
;; (error "Syntax error: ~c" str))
;; (if lst-token
;; (progn
;; (dolist (token lst-token)
;; (cond ((equal "十" token)
;; (setf wk-unit (+ wk-unit (* wk-pre 10)))
;; (setf wk-pre 0))
;; ((equal "百" token)
;; (setf wk-unit (+ wk-unit (* wk-pre 100)))
;; (setf wk-pre 0))
;; ((equal "千" token)
;; (setf wk-unit (+ wk-unit (* wk-pre 1000)))
;; (setf wk-pre 0))
;; ((equal "万" token)
;; (setf wk-unit (+ wk-unit wk-pre))
;; (setf ans (+ ans (* wk-unit 10000)))
;; (setf wk-unit 0)
;; (setf wk-pre 0))
;; ((equal "億" token)
;; (setf wk-unit (+ wk-unit wk-pre))
;; (setf ans (+ ans (* wk-unit 100000000)))
;; (setf wk-unit 0)
;; (setf wk-pre 0))
;; ((equal "兆" token)
;; (setf wk-unit (+ wk-unit wk-pre))
;; (setf ans (+ ans (* wk-unit 1000000000000)))
;; (setf wk-unit 0)
;; (setf wk-pre 0))
;; ((equal "京" token)
;; (setf wk-unit (+ wk-unit wk-pre))
;; (setf ans (+ ans (* wk-unit 10000000000000000)))
;; (setf wk-unit 0)
;; (setf wk-pre 0))
;; ((string-match "^[0-9\\.,]+$" token)
;; (setf token (substitute-string token "," ""))
;; (setf wk-pre (read-from-string token)))
;; (t
;; (error "Syntax error: ~c" token))))
;; (setf ans (+ ans (+ wk-unit wk-pre)))
;; )
;; )
;; ans))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment