Last active
March 17, 2023 19:31
-
-
Save ncke/1013521e5d4e13fec1002c7b521da1ca to your computer and use it in GitHub Desktop.
Async Heat Equation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2023 AUTHORS | |
# | |
# SPDX-License-Identifier: BSL-1.0 | |
# Distributed under the Boost Software License, Version 1.0. (See accompanying | |
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
from typing import Optional, Tuple | |
from threading import Thread | |
from queue import Queue | |
import numpy as np | |
import sys | |
import time | |
import os | |
use_hw_counters : bool = sys.argv[4] == "1" | |
if use_hw_counters: | |
from pypapi import events, papi_high as high | |
nx = int(sys.argv[3]) # number of nodes | |
k = 0.5 # heat transfer coefficient | |
dt = 1. # time step | |
dx = 1. # grid spacing | |
nt = int(sys.argv[2]) # number of time steps | |
threads = int(sys.argv[1]) # numnber of threads | |
tx = nx//threads | |
class Worker(Thread): | |
def __init__(self,num:int)->None: | |
Thread.__init__(self) | |
self.num : int = num | |
self.lo : int = tx*num | |
self.hi : int = tx*(num+1) | |
self.right : Optional[Queue[float]] | |
self.left : Optional[Queue[float]] | |
if threads == 1: | |
self.right = None | |
self.left = None | |
elif num+1 == threads: | |
self.hi = nx | |
#self.right = Queue() | |
self.right = None | |
self.left = Queue() | |
elif num == 0: | |
self.left = None | |
#self.left = Queue() | |
self.right = Queue() | |
else: | |
self.left = Queue() | |
self.right = Queue() | |
self.sz = self.hi - self.lo | |
self.data = np.random.randn(self.sz) | |
self.data2 = np.zeros((self.sz,)) | |
self.leftThread : Optional['Worker'] = None | |
self.rightThread : Optional['Worker'] = None | |
def recv_ghosts(self)->None: | |
if self.left is not None: | |
self.data[0] = self.left.get() | |
#print("--> worker ", self.num, " got left ghost ", self.data[0]) | |
if self.right is not None: | |
self.data[-1] = self.right.get() | |
#print("<-- worker ", self.num, " got right ghost ", self.data[-1]) | |
def update(self)->None: | |
self.recv_ghosts() | |
#print("*** worker ", self.num, " starts update with ", self.data[0], self.data[-1]) | |
self.data2[1:-1] = self.data[1:-1] + (k * dt / (dx * dx)) * (self.data[2:] + self.data[1:-1] + self.data[:-2]) | |
self.data, self.data2 = self.data2, self.data | |
#print(" worker ", self.num, " finishes update, size is ", self.data.size) | |
#print("&&& worker ", self.num, " after update has ", self.data[0], self.data[-1]) | |
self.send_ghosts() | |
def send_ghosts(self)->None: | |
if self.leftThread is not None: | |
assert self.leftThread.right is not None | |
#print("<-- worker ", self.num, " will send as a right ghost to ", self.leftThread.num, " data: ", self.data[1]) | |
self.leftThread.right.put_nowait(self.data[1]) | |
if self.rightThread is not None: | |
assert self.rightThread.left is not None | |
#print("--> worker ", self.num, " will send as a left ghost to ", self.rightThread.num, " data: ", self.data[-2]) | |
self.rightThread.left.put_nowait(self.data[-2]) | |
def run(self)->None: | |
self.send_ghosts() | |
for n in range(nt): | |
self.update() | |
self.recv_ghosts() | |
#if self.num == 0: | |
# print(self.data[1]) | |
def main()->Tuple[float,float]: | |
t1 = time.time() | |
th = [] | |
for num in range(threads): | |
th += [Worker(num)] | |
for i in range(threads-1): | |
th[i].rightThread = th[i+1] | |
th[i+1].leftThread = th[i] | |
#th[0].leftThread = th[threads-1] | |
#th[threads-1].rightThread = th[0] | |
if use_hw_counters: | |
high.start_counters([events.PAPI_FP_OPS,]) | |
#t1 = time.time() | |
for t in th: | |
t.start() | |
for t in th: | |
t.join() | |
t2 = time.time() | |
hw : int | |
if use_hw_counters: | |
hw = high.stop_counters() | |
else: | |
hw = 0 | |
print(t2-t1) | |
return t2-t1, hw | |
tdiff, hw = main() | |
fn = 'perfdata.csv' | |
if not os.path.exists(fn): | |
with open(fn,"w") as fd: | |
print('lang,nx,nt,threads,dt,dx,total time,flops',file=fd) | |
with open("perfdata.csv","a") as fd: | |
print(",".join( | |
[str(x) for x in ['heat2', nx, nt, threads, dx, dt, tdiff, hw]] | |
),file=fd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Originally from: @diehlpk | |
// https://github.com/diehlpk/async_heat_equation/blob/main/swift/heat/heat_2.swift | |
// Copyright (c) 2022 AUTHORS | |
// | |
// SPDX-License-Identifier: BSL-1.0 | |
// Distributed under the Boost Software License, Version 1.0. (See accompanying | |
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
import Foundation | |
let start = Date() | |
let C_ARGV = CommandLine.arguments | |
let nx = Int(C_ARGV[3])! // number of nodes | |
let k = 0.5 // heat transfer coefficient | |
let dt = 1.0 // time step | |
let dx = 1.0 // grid spacing | |
let nt = Int(C_ARGV[2])! // number of time steps | |
let threads = Int(C_ARGV[1])! // numnber of threads | |
let space = [ | |
UnsafeMutableBufferPointer<Double>.allocate(capacity: nx), | |
UnsafeMutableBufferPointer<Double>.allocate(capacity: nx), | |
] | |
for i in 0..<nx { | |
space[0][i] = Double(i) | |
} | |
space[1].initialize(repeating: 0) | |
for t in 0..<nt { | |
let t_src = t % 2 | |
let t_dst = (t + 1) % 2 | |
await withTaskGroup( | |
of: Void.self, | |
returning: Void.self | |
) { group in | |
for p in 0..<threads { | |
let length = Int(nx / threads) | |
let begin = p * length | |
let end = p < threads - 1 ? (p + 1) * length : nx | |
group.addTask { | |
for i in begin..<end { | |
let s_prev = i > 0 ? space[t_src][i - 1] : space[t_src][nx - 1] | |
let s_curr = space[t_src][i] | |
let s_next = i < nx - 1 ? space[t_src][i + 1] : space[t_src][0] | |
space[t_dst][i] = | |
s_curr | |
+ (k * dt / (dx * dx)) | |
* (s_prev - 2 * s_curr + s_next) | |
} | |
} | |
} | |
} | |
} | |
let duration = -start.timeIntervalSinceNow | |
print("swift,\(nx),\(nt),\(threads),\(dt),\(dx),\(duration)") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2023 AUTHORS | |
# | |
# SPDX-License-Identifier: BSL-1.0 | |
# Distributed under the Boost Software License, Version 1.0. (See accompanying | |
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
from typing import Optional, Tuple | |
from threading import Thread | |
from queue import Queue | |
import numpy as np | |
import sys | |
import time | |
import os | |
use_hw_counters : bool = sys.argv[4] == "1" | |
if use_hw_counters: | |
from pypapi import events, papi_high as high | |
nx = int(sys.argv[3]) # number of nodes | |
k = 0.5 # heat transfer coefficient | |
dt = 1. # time step | |
dx = 1. # grid spacing | |
nt = int(sys.argv[2]) # number of time steps | |
threads = int(sys.argv[1]) # numnber of threads | |
def main()->Tuple[float,float]: | |
t1 = time.time() | |
sz = nx + 2 | |
data = np.random.randn(sz) | |
data2 = np.zeros((sz)) | |
for t in range(nt): | |
data2[1:-1] = data[1:-1] + (k * dt / (dx * dx)) * (data[2:] + data[1:-1] + data[:-2]) | |
data, data2 = data2, data | |
t2 = time.time() | |
print(t2-t1) | |
return t2-t1 | |
tdiff = main() | |
fn = 'perfdata.csv' | |
if not os.path.exists(fn): | |
with open(fn,"w") as fd: | |
print('lang,nx,nt,threads,dt,dx,total time,flops',file=fd) | |
with open("perfdata.csv","a") as fd: | |
print(",".join( | |
[str(x) for x in ['heat2', nx, nt, threads, dx, dt, tdiff]] | |
),file=fd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Based on idea from: Steve Canon | |
// https://forums.swift.org/t/really-bad-performance-with-concurrecny-and-how-to-optimize-the-code/63732/13 | |
import Foundation | |
let C_ARGV = CommandLine.arguments | |
let nx = Int(C_ARGV[3])! // number of nodes | |
let k = 0.5 // heat transfer coefficient | |
let dt = 1.0 // time step | |
let dx = 1.0 // grid spacing | |
let nt = Int(C_ARGV[2])! // number of time steps | |
let threads = Int(C_ARGV[1])! // numnber of threads | |
//let start = Date() | |
let space = [ | |
UnsafeMutableBufferPointer<Double>.allocate(capacity: nx), | |
UnsafeMutableBufferPointer<Double>.allocate(capacity: nx) | |
] | |
for i in 0..<nx { | |
space[0][i] = Double(i) | |
} | |
let r = k * dt / (dx * dx) | |
let start = Date() | |
for t in 0 ..< nt { | |
let src = space[t % 2] | |
let dst = space[(t+1) % 2] | |
dst[0] = src[0] + r*(src[nx-1] - 2*src[0] + src[1]) | |
dst[nx-1] = src[nx-1] + r*(src[nx-2] - 2*src[nx-1] + src[0]) | |
var i = 1 | |
while i < nx-1 { | |
dst[i] = src[i] + r*(src[i-1] - 2*src[i] + src[i+1]) | |
i += 1 | |
} | |
} | |
let duration = -start.timeIntervalSinceNow | |
print("swift,\(nx),\(nt),\(threads),\(dt),\(dx),\(duration)") | |
print(space[0][0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment