Created
October 24, 2023 22:49
-
-
Save VMois/41f6fd371fc10d247f016d8587b0ce86 to your computer and use it in GitHub Desktop.
Example of Euclidian distance calculation in Mojo (naive, fn, SIMD, broken SIMD + parallel) and Python (naive, numpy)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from utils.vector import UnsafeFixedVector | |
from tensor import Tensor | |
from algorithm import vectorize, parallelize | |
from math import sqrt | |
from time import now | |
from python import Python | |
def main(): | |
Python.add_to_path(".") | |
let ed = Python.import_module("euclidian_distance") | |
let anp = ed.anp | |
let bnp = ed.bnp | |
let n: Int = ed.n.to_float64().to_int() | |
var a = Tensor[DType.float64](n) | |
var b = Tensor[DType.float64](n) | |
# This will take the biggest amount of time | |
for i in range(n): | |
a[i] = anp[i].to_float64() | |
b[i] = bnp[i].to_float64() | |
#test_simd_parral(a, b) | |
#test_simd(a, b) | |
#test_fn(a, b) | |
test_naive(a, b) | |
ed.benchmark() | |
fn test_fn(a: Tensor[DType.float64], b: Tensor[DType.float64]): | |
let eval_begin = now() | |
let dist = mojo_fn_dist(a, b) | |
let eval_end = now() | |
print("mojo_fn_dist value", dist) | |
print("mojo_fn_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6) | |
fn test_naive(a: Tensor[DType.float64], b: Tensor[DType.float64]) raises: | |
let eval_begin = now() | |
let dist = mojo_naive_dist(a, b) | |
let eval_end = now() | |
print("mojo_naive_dist value", dist) | |
print("mojo_naive_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6) | |
fn test_simd(a: Tensor[DType.float64], b: Tensor[DType.float64]): | |
let eval_begin = now() | |
let dist = mojo_simd_dist(a, b) | |
let eval_end = now() | |
print("mojo_simd_dist value", dist) | |
print("mojo_simd_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6) | |
fn test_simd_parral(a: Tensor[DType.float64], b: Tensor[DType.float64]): | |
let eval_begin = now() | |
let dist = mojo_simd_parral_dist(a, b) | |
let eval_end = now() | |
print("mojo_simd_parral_dist value", dist) | |
print("mojo_simd_parral_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6) | |
def mojo_naive_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64: | |
var s: Float64 = 0.0 | |
n = a.num_elements() | |
for i in range(n): | |
dist = a[i] - b[i] | |
s += dist*dist | |
return sqrt(s) | |
fn mojo_fn_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64: | |
var s: Float64 = 0.0 | |
let n = a.num_elements() | |
for i in range(n): | |
let dist = a[i] - b[i] | |
s += dist*dist | |
return sqrt(s) | |
alias nelts = simdwidthof[DType.float64]() | |
fn mojo_simd_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64: | |
var s: Float64 = 0.0 | |
let n = a.num_elements() | |
@parameter | |
fn sum[nelts: Int](x: Int): | |
let dist = a.simd_load[nelts](x) - b.simd_load[nelts](x) | |
s += (dist * dist).reduce_add() | |
vectorize[nelts, sum](n) | |
return sqrt(s) | |
fn mojo_simd_parral_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64: | |
var s: Float64 = 0.0 | |
let tasks = 4 | |
# Not sure how Mojo handles access to shared resources | |
# Let's create a sum for each thread | |
var tmp_sum: UnsafeFixedVector[Float64] = UnsafeFixedVector[Float64](tasks) | |
let n = a.num_elements() | |
# TODO: will not work nicely with uneven numbers | |
let batch_size = n // tasks | |
@parameter | |
fn per_core(task_id: Int): | |
@parameter | |
fn sum[nelts: Int](x: Int): | |
let dist = a.simd_load[nelts](x + task_id * batch_size) - b.simd_load[nelts](x + task_id * batch_size) | |
tmp_sum[task_id] += (dist * dist).reduce_add() | |
vectorize[nelts, sum](batch_size) | |
parallelize[per_core](tasks) | |
# add calculated sums | |
for i in range(tasks): | |
s += tmp_sum[i] | |
return sqrt(s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from functools import wraps | |
from time import perf_counter | |
from math import sqrt | |
n = 10_000_000 | |
anp = np.random.rand(n) | |
bnp = np.random.rand(n) | |
def timing(f): | |
@wraps(f) | |
def wrap(*args, **kwargs): | |
start = perf_counter() | |
result = f(*args, **kwargs) | |
end = perf_counter() | |
print(f'{f.__name__} time (ms) {1000 * (end - start)}') | |
print(f'{f.__name__} value {result}') | |
return result | |
return wrap | |
@timing | |
def python_naive_dist(a, b): | |
s = 0.0 | |
n = len(a) | |
for i in range(n): | |
dist = a[i] - b[i] | |
s += dist*dist | |
return sqrt(s) | |
@timing | |
def python_numpy_dist(a, b): | |
return np.linalg.norm(a - b) | |
def benchmark(): | |
print(n) | |
print("=== Python Performance ===") | |
python_numpy_dist(anp, bnp) | |
python_naive_dist(anp, bnp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment