Last active
August 13, 2020 19:55
-
-
Save joinr/84cf128785627cfde1212fffb8675e35 to your computer and use it in GitHub Desktop.
revised correlation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;These mods pulled runtime down about 37x without going into primitive arrays, | |
;;and retaining a good bit of the original code. | |
;;Intermediate mods that got us down about 12x, keeping fairly clost to the original. | |
#_(defn split-adjacent [f xs] | |
(let [prev (atom nil) | |
res (reduce (fn [acc x] | |
(if (f x) | |
(reset! prev x) | |
(reduced x))) nil xs)] | |
[@prev res])) | |
#_(defn interpolate-point | |
"Given a vector V1 of [X1 Y1] pairs and a vector of X2 values | |
this returns an [X2 Y2} values, Where the Y2's are interpolated linearly" | |
[x2 v1] | |
(let [[preceeding-point following-point] | |
(split-adjacent #(<= ^double (nth % 0) x2) v1)] | |
(if (or (nil? preceeding-point) | |
(nil? following-point)) | |
nil | |
(/ (+ (nth preceeding-point 1) | |
(nth following-point 1)) | |
2.0)))) | |
#_(defn interpolate-points | |
"Given a vector of X2 values and a vector V1 of [X1 Y1] pairs | |
this returns an [X2 Y2] values, Where the Y2's are interpolated linearly" | |
[x2 v1] | |
(->> x2 | |
(map #(vector % (interpolate-point % v1))) | |
#_(filter #(some? (second %))))) | |
;;New mods that get us down ~5x over the intermediate improvement. | |
;;More efficient algorithm for lerping. We now compute | |
;;a sequence of lines, and later step through that sequence | |
;;with our input x values, projecting onto the lines to | |
;;get a lerp'd result. This is not identical to the simple | |
;;averaging that was happening before, but you can do that as | |
;;well. This is actually more general in that you get a piecewise | |
;;linear approximation of the underlying function, which | |
;;forms the lerp basis. Algorthimically, we have a much more | |
;;efficient implementation, since it's only O(N) in the size of the | |
;;input, where the original implementation was doing intersections for | |
;;every point in order to find the applicable range. This way, | |
;;we just scan through the input in one batch, and lerp lazily as we go. | |
(defn linear-ranges [points] | |
(for [[l r] (partition 2 1 points)] | |
(let [[x1 y1] l | |
[x2 y2] r] | |
{:l x1 | |
:r x2 | |
:x0 x1 | |
:y0 y1 | |
:m (double (/ (- y2 y1) (- x2 x1)))}))) | |
;;project x onto f(x) = m(x - x0) + y0 | |
(defn lerp [stats x] | |
(let [^double m (stats :m) | |
^double x0 (stats :x0) | |
^double y0 (stats :y0)] | |
(+ (* (- ^double x x0) m) y0))) | |
(defn lerp-points [xs vs] | |
(when-let [x (first xs)] | |
(when-let [v (first vs)] | |
(lazy-seq | |
(if (<= x (v :r)) | |
(cons [x (lerp v x)] | |
(lerp-points (rest xs) vs)) | |
(lerp-points xs (rest vs))))))) | |
(defn interpolate-points | |
"Given a vector of X2 values and a vector V1 of [X1 Y1] pairs | |
this returns an [X2 Y2] values, Where the Y2's are interpolated linearly" | |
[x2 v1] | |
(->> v1 | |
sort | |
linear-ranges | |
(lerp-points (sort x2)) | |
vec)) | |
;;unchanged | |
(defn interpolate-overlap | |
"Take a 2 point vector PV1 PV2. Crop PV1 to the overlapping segment. | |
Interpolate PV2 points to PV1 positions | |
Return the cropped PV1 and interpolated PV2 (they will be the same length)" | |
[pv1 | |
pv2] | |
(let [first-pv2-position (-> pv2 | |
first | |
first) | |
last-pv2-position (-> pv2 | |
last | |
first) | |
trimmed-pv1 (->> pv1 | |
;; drop all preceeding points with no overlap | |
(drop-while #(->> % | |
first | |
(>= first-pv2-position))) | |
;; only take remaining points with overlap | |
(take-while #(->> % | |
first | |
(> last-pv2-position)))) | |
interpolation-positions (map first trimmed-pv1) | |
interpolated-pv2 (interpolate-points interpolation-positions | |
pv2)] | |
[trimmed-pv1 interpolated-pv2])) | |
;;minor modification to avoid invoking count in case we have an O(n) count. | |
;;not a big deal, but shaved a few ms. | |
(defn mean [coll] | |
(let [[sum n] | |
(reduce (fn [[sum n] x] | |
[(+ sum x) (unchecked-inc n)]) [0 0] coll)] | |
(/ (double sum) n))) | |
#_(defn mean | |
[coll] | |
(assert (not-empty coll) "calculating the mean of an empty vector") | |
(/ (reduce + coll) (count coll))) | |
;;substantial modification. We now compute most of what we | |
;;need in 3 passes (if you count the means) vs. 5. | |
;;Also double type hints help avoid generic numerics. | |
;;Saves several hundred ms. | |
(defn vector-correlation-coefficient [xs ys] | |
(let [xs (vec xs) | |
ys (vec ys) | |
x-mean (mean xs) | |
y-mean (mean ys) | |
{:keys [sdxy varx vary]} | |
(reduce (fn blee [acc idx] | |
(let [x (xs idx) | |
y (ys idx) | |
^double sdxy (acc :sdxy) | |
^double varx (acc :varx) | |
^double vary (acc :vary) | |
xd (- ^double x x-mean) | |
yd (- ^double y y-mean)] | |
{:sdxy (+ sdxy ^double (* xd yd)) | |
:varx (+ varx ^double (Math/pow xd 2.0)) | |
:vary (+ vary ^double (Math/pow yd 2.0))})) {:sdxy 0.0 :varx 0.0 :vary 0.0} | |
(range (count xs)))] | |
(/ ^double sdxy | |
^double (Math/sqrt ^double (* ^double varx ^double vary))))) | |
#_(defn- vector-correlation-coefficient | |
"Get the correlation-coefficient of two vectors. | |
The vectors obviously should be the same length" | |
[x | |
y] | |
(let [x-mean (mean x) | |
y-mean (mean y)] | |
(/ (reduce + | |
(map (fn [xi yi] | |
(* (- xi | |
x-mean) | |
(- yi | |
y-mean))) | |
x | |
y)) | |
(Math/sqrt (* (reduce + | |
(map (fn [xi] | |
(* (Math/pow (- xi | |
x-mean) | |
2.0))) | |
x)) | |
(reduce + | |
(map (fn [yi] | |
(Math/pow (- yi | |
y-mean) | |
2.0)) | |
y))))))) | |
(defn points-correlation-coefficient | |
"Take TWO element count vectors of the form | |
[[position value] | |
[position value] | |
[position value] | |
...] | |
and calculate their correlation. | |
Takes an options V2-SHIFT values to adjust v2 postions" | |
([v1 | |
v2 | |
v2-shift] | |
(let [shifted-v2 (map #(vector (-> % | |
first | |
(+ v2-shift)) | |
(-> % | |
second)) | |
v2)] | |
#_(print " " | |
v2-shift) | |
(points-correlation-coefficient v1 | |
shifted-v2))) | |
([v1 | |
v2] | |
(let [[trimmed-v1 trimmed-v2] (interpolate-overlap v1 v2)] | |
(vector-correlation-coefficient (map second trimmed-v1) | |
(map second trimmed-v2))))) | |
(defn points-cross-correlation | |
"Take TWO element count vectors of the form | |
[[position value] | |
[position value] | |
[position value] | |
...]" | |
[v1 | |
v2 | |
step-size] | |
(let [shifts (-> (range (+ (- (first (last v2))) | |
(first (first v1))) | |
(- (first (last v1)) | |
(first (first v2))) | |
step-size) | |
rest | |
butlast) | |
correlations (map #(points-correlation-coefficient v1 | |
v2 | |
%) | |
shifts)] | |
(zipmap shifts | |
correlations))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment