Skip to content

Instantly share code, notes, and snippets.

@genmeblog
Last active June 18, 2020 09:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save genmeblog/3f12c557cfe082d11f4ed25ee5ef3b07 to your computer and use it in GitHub Desktop.
Save genmeblog/3f12c557cfe082d11f4ed25ee5ef3b07 to your computer and use it in GitHub Desktop.
Format sequence of doubles to seq of aligned strings
(ns format-sequence
"Format sequence of doubles"
(:require [clojure.pprint :refer [cl-format]]
[clojure.test :refer [deftest is]]))
(set! *warn-on-reflection* true)
(set! *unchecked-math* :warn-on-boxed)
;; maximum double power for precise calculations
(def ^:private ^:const ^long kp-max 22)
;; powers for scientific notation
(def ^:private tbl [1e-1,
1e00, 1e01, 1e02, 1e03, 1e04, 1e05, 1e06, 1e07, 1e08, 1e09,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22])
(defn- left
"What is the power of number"
^long [^double x]
(-> x Math/log10 Math/floor unchecked-long inc))
(defn- find-nsig
"Shift decimal places until non-zero value is found"
^long [^double alpha ^long digits]
(loop [a alpha
d digits]
(let [a- (/ a 10.0)]
(if (and (= a- (Math/floor a-)))
(recur a- (dec d))
(max 1 d)))))
(defn- right
"Calculate maximum digits on the right side of the dot."
^long [^double x ^long digits]
(let [alpha (Math/round (* x ^double (tbl (inc digits))))]
(if (zero? alpha)
1
(find-nsig alpha digits))))
(defn- fix-left
"Fix number of digits on the left side. For scientific notations and non-positive exponent (lft) it should be leading digits + sign."
[^double x ^long lft e?]
(let [sgn (if (neg? x) 1 0)]
(if (or e? (not (pos? lft)))
(+ sgn 1)
(+ sgn lft))))
(defn- precision
[^double x ^long digits ^long threshold]
(if (zero? x)
[false 0 1 1] ;; zero is reprezented as 0.0
(let [digits (max 1 (min 10 digits)) ;; constrain digits to 1-10 range
r (Math/abs x)
lft (left r) ;; digits on the left side of dot
alft (Math/abs lft)
e? (>= alft threshold)
r-prec (cond
(< alft threshold) r ;; normal number
(< alft kp-max) (if (neg? lft) ;; scientific number (using table to shift values)
(* r ^double (tbl (inc (- lft))))
(/ r ^double (tbl (inc lft))))
:else (/ r (Math/pow 10.0 (dec lft)))) ;; very big or very small case
rght (right r-prec digits) ;; desired precision on the right side
exp (if (> alft 100) 3 2) ;; size of the exponent
lft (fix-left x lft e?)]
[e? exp lft rght])))
(defn- fit-precision
"Find best matching presision for given sequence."
[xs ^long digits ^long threshold]
(reduce (fn [[ce? ^long cexp ^long clft ^long crght ^long non-finite-len] x]
(let [^double x (if (instance? Float x)
(Double/valueOf (str x))
(or x ##NaN))]
(if (Double/isFinite x)
(let [[e? ^long exp ^long lft ^long rght] (precision x digits threshold)]
(if (and e? (pos? threshold))
(reduced (fit-precision xs digits 0)) ;; switch to scientific notation
[(or e? ce?)
(max exp cexp)
(max lft clft)
(max rght crght)
non-finite-len]))
[ce? cexp clft crght (max non-finite-len (if (= x ##-Inf) 4 3))])))
[false Integer/MIN_VALUE Integer/MIN_VALUE Integer/MIN_VALUE 0] xs))
;; public functions
(defn formatter
"Create formatter for given:
* `xs` - sequence of doubles
* `digits` - maximum precision
* `threshold` - what is absolute power to switch to scientific notation
Returns formatter."
([xs] (formatter xs 8))
([xs ^long digits] (formatter xs digits 8))
([xs ^long digits ^long threshold]
(let [[e? ^long exp ^long lft ^long rght ^long non-finite-len] (fit-precision xs digits threshold)
w (max non-finite-len (if e?
(+ lft rght exp 3) ;; 3 = "." + sign of E + "E"
(+ lft rght 1))) ;; 1 for "."
fmt (if e?
(str "~" w "," rght "," exp "E")
(str "~" w "," rght "F"))
non-finite-fmt (str "~" w "@A")]
(fn [x]
(let [^double x (or x ##NaN)]
(if (Double/isFinite x)
(cl-format nil fmt x)
(cl-format nil non-finite-fmt (cond
(== ##Inf x) "Inf"
(== ##-Inf x) "-Inf"
:else "NaN"))))))))
(defn format-sequence
"Format sequence of double for given:
* `xs` - sequence of doubles
* `digits` - maximum precision
* `threshold` - what is absolute power to switch to scientific notation
Returns sequence of strings."
([xs] (format-sequence xs 8))
([xs ^long digits] (format-sequence xs digits 8))
([xs ^long digits ^long threshold]
(let [fmt (formatter xs digits threshold)]
(map fmt xs))))
;; cases
(def a [0.000001 0.00001 0.0001 0.001 0.01 0.1 0.0
1.0 10.0 100.0 1000.0 10000.0 100000.0])
(format-sequence a)
;; => (" 0.000001"
;; " 0.000010"
;; " 0.000100"
;; " 0.001000"
;; " 0.010000"
;; " 0.100000"
;; " 0.000000"
;; " 1.000000"
;; " 10.000000"
;; " 100.000000"
;; " 1000.000000"
;; " 10000.000000"
;; "100000.000000")
(format-sequence a 5 4)
;; => ("1.0E-06"
;; "1.0E-05"
;; "1.0E-04"
;; "1.0E-03"
;; "1.0E-02"
;; "1.0E-01"
;; "0.0E+00"
;; "1.0E+00"
;; "1.0E+01"
;; "1.0E+02"
;; "1.0E+03"
;; "1.0E+04"
;; "1.0E+05")
(def b [10.0 10.1 10.11 10.111 10.1111 10.11111
1.0 1.1 1.11 1.111 1.1111 1.11111
0.0 0.1 0.11 0.111 0.1111 -0.11111])
(format-sequence b)
;; => ("10.00000"
;; "10.10000"
;; "10.11000"
;; "10.11100"
;; "10.11110"
;; "10.11111"
;; " 1.00000"
;; " 1.10000"
;; " 1.11000"
;; " 1.11100"
;; " 1.11110"
;; " 1.11111"
;; " 0.00000"
;; " 0.10000"
;; " 0.11000"
;; " 0.11100"
;; " 0.11110"
;; "-0.11111")
(format-sequence b 5 2)
;; => (" 1.00000E+01"
;; " 1.01000E+01"
;; " 1.01100E+01"
;; " 1.01110E+01"
;; " 1.01111E+01"
;; " 1.01111E+01"
;; " 1.00000E+00"
;; " 1.10000E+00"
;; " 1.11000E+00"
;; " 1.11100E+00"
;; " 1.11110E+00"
;; " 1.11111E+00"
;; " 0.00000E+00"
;; " 1.00000E-01"
;; " 1.10000E-01"
;; " 1.11000E-01"
;; " 1.11100E-01"
;; "-1.11110E-01")
(def c (range -5 4 0.8795833))
(format-sequence c)
;; => ("-5.0000000"
;; "-4.1204167"
;; "-3.2408334"
;; "-2.3612501"
;; "-1.4816668"
;; "-0.6020835"
;; " 0.2774998"
;; " 1.1570831"
;; " 2.0366664"
;; " 2.9162497"
;; " 3.7958330")
(format-sequence c 4)
;; => ("-5.0000"
;; "-4.1204"
;; "-3.2408"
;; "-2.3613"
;; "-1.4817"
;; "-0.6021"
;; " 0.2775"
;; " 1.1571"
;; " 2.0367"
;; " 2.9162"
;; " 3.7958")
(format-sequence c 4 0)
;; => ("-5.0000E+00"
;; "-4.1204E+00"
;; "-3.2408E+00"
;; "-2.3613E+00"
;; "-1.4817E+00"
;; "-6.0208E-01"
;; " 2.7750E-01"
;; " 1.1571E+00"
;; " 2.0367E+00"
;; " 2.9162E+00"
;; " 3.7958E+00")
(def d [-1.0e-20 -1.334e-100 3.43e100 4.556e20
1.0e-20 1.334e-100 -3.43e100 -41.556e20
0.999e-300 -0.999e300])
(format-sequence d)
;; => ("-1.0000E-020"
;; "-1.3340E-100"
;; " 3.4300E+100"
;; " 4.5560E+020"
;; " 1.0000E-020"
;; " 1.3340E-100"
;; "-3.4300E+100"
;; "-4.1556E+021"
;; " 9.9900E-301"
;; "-9.9900E+299")
(def e [-1.0e99 1.0e99])
(format-sequence e)
;; => ("-1.0E+99" " 1.0E+99")
(def f [-1.0e100 1.0e100])
(format-sequence f)
;; => ("-1.0E+100" " 1.0E+100")
(def g [0.002 0.0002 0.000333 0.1 -0.0003 0.0])
(format-sequence g)
;; => (" 0.002000"
;; " 0.000200"
;; " 0.000333"
;; " 0.100000"
;; "-0.000300"
;; " 0.000000")
(def h [0.002 0.0002 0.00333 0.00001 -0.0003 0.022 0.0001])
(format-sequence h)
;; => (" 0.00200"
;; " 0.00020"
;; " 0.00333"
;; " 0.00001"
;; "-0.00030"
;; " 0.02200"
;; " 0.00010")
(def i [10.0 ##NaN ##Inf ##-Inf 100 0.001 nil])
(format-sequence i)
;; => (" 10.000"
;; " NaN"
;; " Inf"
;; " -Inf"
;; "100.000"
;; " 0.001"
;; " NaN")
(format-sequence i 0 0)
;; => ("1.0E+01"
;; " NaN"
;; " Inf"
;; " -Inf"
;; "1.0E+02"
;; "1.0E-03"
;; " NaN")
(def j (map float [39.81 36.35 43.22 28.37 25.45
-39.81 36.351 43.221 28.371 25.451]))
(format-sequence j)
;; => (" 39.810"
;; " 36.350"
;; " 43.220"
;; " 28.370"
;; " 25.450"
;; "-39.810"
;; " 36.351"
;; " 43.221"
;; " 28.371"
;; " 25.451")
(deftest regression-tests
(is (= (format-sequence j)
'(" 39.810" " 36.350" " 43.220" " 28.370" " 25.450" "-39.810" " 36.351" " 43.221" " 28.371" " 25.451")))
(is (= (format-sequence i 0 0)
'("1.0E+01" " NaN" " Inf" " -Inf" "1.0E+02" "1.0E-03" " NaN")))
(is (= (format-sequence a)
'(" 0.000001" " 0.000010" " 0.000100" " 0.001000" " 0.010000" " 0.100000" " 0.000000" " 1.000000" " 10.000000" " 100.000000" " 1000.000000" " 10000.000000" "100000.000000")))
(is (= (format-sequence a 5 4)
'("1.0E-06" "1.0E-05" "1.0E-04" "1.0E-03" "1.0E-02" "1.0E-01" "0.0E+00" "1.0E+00" "1.0E+01" "1.0E+02" "1.0E+03" "1.0E+04" "1.0E+05")))
(is (= (format-sequence b)
'("10.00000" "10.10000" "10.11000" "10.11100" "10.11110" "10.11111" " 1.00000" " 1.10000" " 1.11000" " 1.11100" " 1.11110" " 1.11111" " 0.00000" " 0.10000" " 0.11000" " 0.11100" " 0.11110" "-0.11111")))
(is (= (format-sequence b 5 2)
'(" 1.00000E+01" " 1.01000E+01" " 1.01100E+01" " 1.01110E+01" " 1.01111E+01" " 1.01111E+01" " 1.00000E+00" " 1.10000E+00" " 1.11000E+00" " 1.11100E+00" " 1.11110E+00" " 1.11111E+00" " 0.00000E+00" " 1.00000E-01" " 1.10000E-01" " 1.11000E-01" " 1.11100E-01" "-1.11110E-01")))
(is (= (format-sequence c)
'("-5.0000000" "-4.1204167" "-3.2408334" "-2.3612501" "-1.4816668" "-0.6020835" " 0.2774998" " 1.1570831" " 2.0366664" " 2.9162497" " 3.7958330")))
(is (= (format-sequence c 4)
'("-5.0000" "-4.1204" "-3.2408" "-2.3613" "-1.4817" "-0.6021" " 0.2775" " 1.1571" " 2.0367" " 2.9162" " 3.7958")))
(is (= (format-sequence c 4 0)
'("-5.0000E+00" "-4.1204E+00" "-3.2408E+00" "-2.3613E+00" "-1.4817E+00" "-6.0208E-01" " 2.7750E-01" " 1.1571E+00" " 2.0367E+00" " 2.9162E+00" " 3.7958E+00")))
(is (= (format-sequence d)
'("-1.0000E-020" "-1.3340E-100" " 3.4300E+100" " 4.5560E+020" " 1.0000E-020" " 1.3340E-100" "-3.4300E+100" "-4.1556E+021" " 9.9900E-301" "-9.9900E+299")))
(is (= (format-sequence e)
'("-1.0E+99" " 1.0E+99")))
(is (= (format-sequence f)
'("-1.0E+100" " 1.0E+100")))
(is (= (format-sequence g)
'(" 0.002000" " 0.000200" " 0.000333" " 0.100000" "-0.000300" " 0.000000")))
(is (= (format-sequence h)
'(" 0.00200" " 0.00020" " 0.00333" " 0.00001" "-0.00030" " 0.02200" " 0.00010")))
(is (= (format-sequence i)
'(" 10.000" " NaN" " Inf" " -Inf" "100.000" " 0.001" " NaN")))
(is (= (format-sequence i 0 0)
'("1.0E+01" " NaN" " Inf" " -Inf" "1.0E+02" "1.0E-03" " NaN")))
(is (= (format-sequence j)
'(" 39.810" " 36.350" " 43.220" " 28.370" " 25.450" "-39.810" " 36.351" " 43.221" " 28.371" " 25.451"))))
(regression-tests)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment