Skip to content

Instantly share code, notes, and snippets.

@VMois
Created October 24, 2023 22:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VMois/41f6fd371fc10d247f016d8587b0ce86 to your computer and use it in GitHub Desktop.
Save VMois/41f6fd371fc10d247f016d8587b0ce86 to your computer and use it in GitHub Desktop.
Example of Euclidian distance calculation in Mojo (naive, fn, SIMD, broken SIMD + parallel) and Python (naive, numpy)
from utils.vector import UnsafeFixedVector
from tensor import Tensor
from algorithm import vectorize, parallelize
from math import sqrt
from time import now
from python import Python
def main():
Python.add_to_path(".")
let ed = Python.import_module("euclidian_distance")
let anp = ed.anp
let bnp = ed.bnp
let n: Int = ed.n.to_float64().to_int()
var a = Tensor[DType.float64](n)
var b = Tensor[DType.float64](n)
# This will take the biggest amount of time
for i in range(n):
a[i] = anp[i].to_float64()
b[i] = bnp[i].to_float64()
#test_simd_parral(a, b)
#test_simd(a, b)
#test_fn(a, b)
test_naive(a, b)
ed.benchmark()
fn test_fn(a: Tensor[DType.float64], b: Tensor[DType.float64]):
let eval_begin = now()
let dist = mojo_fn_dist(a, b)
let eval_end = now()
print("mojo_fn_dist value", dist)
print("mojo_fn_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6)
fn test_naive(a: Tensor[DType.float64], b: Tensor[DType.float64]) raises:
let eval_begin = now()
let dist = mojo_naive_dist(a, b)
let eval_end = now()
print("mojo_naive_dist value", dist)
print("mojo_naive_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6)
fn test_simd(a: Tensor[DType.float64], b: Tensor[DType.float64]):
let eval_begin = now()
let dist = mojo_simd_dist(a, b)
let eval_end = now()
print("mojo_simd_dist value", dist)
print("mojo_simd_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6)
fn test_simd_parral(a: Tensor[DType.float64], b: Tensor[DType.float64]):
let eval_begin = now()
let dist = mojo_simd_parral_dist(a, b)
let eval_end = now()
print("mojo_simd_parral_dist value", dist)
print("mojo_simd_parral_dist time (ms)", Float64((eval_end - eval_begin)) / 1e6)
def mojo_naive_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64:
var s: Float64 = 0.0
n = a.num_elements()
for i in range(n):
dist = a[i] - b[i]
s += dist*dist
return sqrt(s)
fn mojo_fn_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64:
var s: Float64 = 0.0
let n = a.num_elements()
for i in range(n):
let dist = a[i] - b[i]
s += dist*dist
return sqrt(s)
alias nelts = simdwidthof[DType.float64]()
fn mojo_simd_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64:
var s: Float64 = 0.0
let n = a.num_elements()
@parameter
fn sum[nelts: Int](x: Int):
let dist = a.simd_load[nelts](x) - b.simd_load[nelts](x)
s += (dist * dist).reduce_add()
vectorize[nelts, sum](n)
return sqrt(s)
fn mojo_simd_parral_dist(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64:
var s: Float64 = 0.0
let tasks = 4
# Not sure how Mojo handles access to shared resources
# Let's create a sum for each thread
var tmp_sum: UnsafeFixedVector[Float64] = UnsafeFixedVector[Float64](tasks)
let n = a.num_elements()
# TODO: will not work nicely with uneven numbers
let batch_size = n // tasks
@parameter
fn per_core(task_id: Int):
@parameter
fn sum[nelts: Int](x: Int):
let dist = a.simd_load[nelts](x + task_id * batch_size) - b.simd_load[nelts](x + task_id * batch_size)
tmp_sum[task_id] += (dist * dist).reduce_add()
vectorize[nelts, sum](batch_size)
parallelize[per_core](tasks)
# add calculated sums
for i in range(tasks):
s += tmp_sum[i]
return sqrt(s)
import numpy as np
from functools import wraps
from time import perf_counter
from math import sqrt
n = 10_000_000
anp = np.random.rand(n)
bnp = np.random.rand(n)
def timing(f):
@wraps(f)
def wrap(*args, **kwargs):
start = perf_counter()
result = f(*args, **kwargs)
end = perf_counter()
print(f'{f.__name__} time (ms) {1000 * (end - start)}')
print(f'{f.__name__} value {result}')
return result
return wrap
@timing
def python_naive_dist(a, b):
s = 0.0
n = len(a)
for i in range(n):
dist = a[i] - b[i]
s += dist*dist
return sqrt(s)
@timing
def python_numpy_dist(a, b):
return np.linalg.norm(a - b)
def benchmark():
print(n)
print("=== Python Performance ===")
python_numpy_dist(anp, bnp)
python_naive_dist(anp, bnp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment