In this example we calculate "distance covariance" matrices (look it up on Wikipedia)
Python code
import numpy as np
from math import sqrt
def distance_matrix(X):
D = np.empty(shape=(X.shape[0],X.shape[0]))
for j in range(X.shape[0]):
for k in range(j+1):
uv = (X[j,:]-X[k,:])
D[j,k] = np.sqrt(np.sum([x*x for x in uv]))
D[k,j] = D[j,k]
return D
def doubly_center(X):
grand_mean = np.mean(X)
Z = np.empty(shape=X.shape)
for j, k in product(range(X.shape[0]),repeat=2):
Z[j,k] = X[j,k] - np.mean(X[j,:]) - np.mean(X[:,k]) + grand_mean
return Z
def dcov(X,Y):
A = doubly_center(distance_matrix(X))
B = doubly_center(distance_matrix(Y))
return np.sqrt(np.mean(A*B))
def dcor(X,Y):
xvar = dcov(X,X)
yvar = dcov(Y,Y)
xycov = dcov(X,Y)
denom = sqrt((xvar*yvar))
return xycov / denom
Rust code (functionality)
extern crate ndarray;
use ndarray::prelude::*;
fn euclidean_distance(u: ArrayView<f64,Ix1> , v: ArrayView<f64,Ix1>)-> f64 {
let uv = u.to_owned() - v.to_owned();
uv.mapv(|x| x*x).sum().sqrt()
}
fn distance_matrix(x: &Array<f64,Ix2>) -> Array<f64,Ix2>{
let mut d = Array::<f64,Ix2>::zeros((x.nrows(),x.nrows()));
for j in 0..x.nrows() {
for k in 0..j+1 {
let dist = euclidean_distance(x.slice(s![j,..]),x.slice(s![k,..]));
d[[j,k]] = dist;
d[[k,j]] = dist;
}
}
d
}
fn doubly_center(x: &Array<f64,Ix2>) -> Array<f64,Ix2>{
let grand_mean = x.mean().unwrap();
let mut xbar = Array::<f64,Ix2>::zeros(x.raw_dim());
for j in 0..x.dim().0 {
for k in 0..x.dim().1 {
xbar[[j,k]] = x[[j,k]]
- x.slice(s![j,..]).mean().unwrap()
- x.slice(s![..,k]).mean().unwrap()
+ grand_mean;
}
}
xbar
}
pub fn dcov(x : &Array<f64,Ix2>, y: &Array<f64,Ix2>) -> f64 {
let a = doubly_center(&distance_matrix(x));
let b = doubly_center(&distance_matrix(y));
(a*b).mean().unwrap().sqrt()
}
pub fn dvariance(x: &Array<f64, Ix2>) -> f64 {
dcov(&x,&x)
}
pub fn dcor(x : &Array<f64,Ix2>, y: &Array<f64,Ix2>) -> f64 {
let xvar = dvariance(x);
let yvar = dvariance(y);
let xycov = dcov(x,y);
let denom = (xvar * yvar).sqrt();
xycov/denom
}
pub fn vec_dcor(x: Vec<f64>, y: Vec<f64>) -> f64 {
let xvec = Array::from_shape_vec((x.len(),1),x).unwrap();
let yvec = Array::from_shape_vec((y.len(),1),y).unwrap();
dcor(&xvec,&yvec)
}
pub fn vec_dcov(x: Vec<f64>, y: Vec<f64>) -> f64 {
let xvec = Array::from_shape_vec((x.len(),1),x).unwrap();
let yvec = Array::from_shape_vec((y.len(),1),y).unwrap();
dcov(&xvec,&yvec)
}
Rust bridging code
extern crate ndarray;
use ndarray::prelude::*;
use pyo3::prelude::*;
mod dcov;
#[pyfunction]
fn dcov(xs: Vec<f64>, ys: Vec<f64>) -> f64 {
dcov::vec_dcov(xs,ys)
}
#[pyfunction]
fn dcor(xs: Vec<f64>, ys: Vec<f64>) -> f64 {
dcov::vec_dcor(xs,ys)
}
#[pymodule]
fn hsmode(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(dcov, m)?)?;
m.add_function(wrap_pyfunction!(dcor, m)?)?;
Ok(())
}
Python testing code (note that I'm building the matrices in Python!)
def perf_test(M,N):
xs = np.random.uniform(size=(N,M))
ys = np.random.uniform(size=(N,M)) + 0.005 * xs
data = np.concatenate([xs,ys], axis=1)
t0 = time()
mat = np.empty((data.shape[1],data.shape[1]))
for i in range(data.shape[1]):
for j in range(i+1):
mat[i,j] = hsmode.dcor(data[:,i],data[:,j])
mat[j,i] = mat[i,j]
rs_dt = time()-t0;
print(rs_dt, end=" ", flush=True)
t0 = time();
pymat = np.empty((data.shape[1],data.shape[1]))
for i in range(data.shape[1]):
for j in range(i+1):
pymat[i,j] = py_dcov.dcor(data[:,i].reshape(-1,1),data[:,j].reshape(-1,1))
pymat[j,i] = mat[i,j]
py_dt = time() - t0
print(py_dt, end=" ", flush=True)
diff = np.linalg.norm(mat-pymat)
print(diff)
return rs_dt, py_dt, diff
Something like this might go a little faster if you wanted to stick with numpy