-
-
Save fjarri/6639d8d83b6f1e06070046de12b47e5e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# vectorized version of lap() | |
# The macros @fastmath @simd and @inbounds did not make much difference for me | |
# The important step is to devectorize the array assignments and make it a single | |
# for loop that is cache friendly. | |
# Note that this function requires passing two arrays, uo is not changed and | |
# the result of the calculation is deposited into u | |
function lapdevectorized!(u,uo,dxd,dyd) | |
nx, ny = size(u) | |
@inbounds @simd for j = 2: (nx-1) | |
for i = 2: (ny-1) | |
# it is important to have the inner loop over the first (row) index of u | |
u[i,j] = (uo[i-1, j] + uo[i+1, j])*dyd + (uo[i,j-1] + uo[i, j+1])*dxd | |
end | |
end | |
nothing | |
end | |
# Computing the error is here also done by a devectorized loop as a separate | |
# function. In my benchmarking this was about 8 times faster but it may not be | |
# so important here. | |
function errfunction{T}(u::Array{T},uold::Array{T}) | |
s = zero(T) | |
@inbounds @simd for i in 1 : length(u) | |
s += (u[i] - uold[i])^2 | |
end | |
return sqrt(s) / length(u) | |
end | |
function iter(u, dxd, dyd, eps, n_iter) | |
u_old = copy(u) | |
err = 1.0e100 | |
count = 0 | |
while err > eps && count < n_iter | |
u, u_old = u_old, u | |
count = count + 1 | |
lapdevectorized!(u, u_old, dxd, dyd) | |
err = errfunction(u, u_old) | |
end | |
return u, err, count | |
end | |
xmin = 0. | |
xmax = 1. | |
ymin = 0. | |
ymax = 1. | |
nx = 2000 | |
ny = nx | |
dx = (xmax - xmin) / (nx - 1) | |
dy = (ymax - ymin) / (ny - 1) | |
x = xmin:dx:xmax | |
y = ymin:dy:ymax | |
dx2 = dx^2 | |
dy2 = dy^2 | |
dnr_inv = 0.5 / (dx2 + dy2) | |
dxd = dx2 * dnr_inv | |
dyd = dy2 * dnr_inv | |
n_iter = 100 | |
eps = 1.0e-16 | |
u = zeros(nx, ny) | |
u[1,:] = xmin^2 - y.^2 | |
u[nx,:] = xmax^2 - y.^2 | |
u[:,1] = x.'.^2 - ymin^2 | |
u[:,ny] = x.'.^2 - ymax^2 | |
println("Laplace function call, CPU time taken = ") | |
@time (u, err, count) = iter(u, dxd, dyd, eps, n_iter) | |
println( | |
"error = ", err, " count = ", count, " n_iter = ", n_iter, | |
" nx = ", nx, " ny = ", ny) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import numpy | |
def lap(u, dxd, dyd): | |
ret = numpy.empty_like(u) | |
ret[1:-1, 1:-1] = ( | |
(u[:-2, 1:-1] + u[2:, 1:-1]) * dyd | |
+ (u[1:-1, :-2] + u[1:-1, 2:]) * dxd) | |
ret[0,:] = u[0,:] | |
ret[-1,:] = u[-1,:] | |
ret[:,0] = u[:,0] | |
ret[:,-1] = u[:,-1] | |
return ret | |
def iter(u, dxd, dyd, eps, n_iter): | |
err = 1e100 | |
count = 0 | |
while err > eps and count < n_iter: | |
count = count + 1 | |
u_old = u | |
u = lap(u, dxd, dyd) | |
v = u - u_old | |
err = numpy.linalg.norm(v) / u.size | |
return u, err, count | |
if __name__ == '__main__': | |
xmin = 0 | |
xmax = 1 | |
ymin = 0 | |
ymax = 1 | |
nx = 2000 | |
ny = nx | |
x = numpy.linspace(xmin, xmax, nx) | |
y = numpy.linspace(ymin, ymax, ny) | |
dx = x[1] - x[0] | |
dy = y[1] - y[0] | |
dx2 = dx**2 | |
dy2 = dy**2 | |
dnr_inv = 0.5 / (dx2 + dy2) | |
dxd = dx2*dnr_inv | |
dyd = dy2*dnr_inv | |
n_iter = 100 | |
eps = 1e-16 | |
u = numpy.zeros((nx, ny)) | |
u[0,:] = xmin**2 - y**2 | |
u[nx-1,:] = xmax**2 - y**2 | |
u[:,0] = x.T**2 - ymin**2 | |
u[:,ny-1] = x.T**2 - ymax**2 | |
t1 = time.time() | |
u, err, count = iter(u, dxd, dyd, eps, n_iter) | |
t2 = time.time() | |
print("Laplace function call, CPU time taken =", t2 - t1) | |
print("error, count, n_iter, nx,ny, =", err, count, n_iter, nx, ny) | |
# error, count, n_iter, nx,ny, = 9.56764767814e-08 100 100 2000 2000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import numpy | |
import theano | |
import theano.tensor as T | |
from theano.tensor.signal.conv import conv2d | |
theano.config.profile = True | |
def prepare_function_subtensor(dxd, dyd): | |
u = T.dmatrix('u') | |
u_new = T.set_subtensor( | |
u[1:-1,1:-1], | |
(u[:-2, 1:-1] + u[2:, 1:-1]) * dxd | |
+ (u[1:-1, :-2] + u[1:-1, 2:]) * dyd) | |
v = u_new - u | |
err = ((v**2).sum())**0.5 / u.size | |
lap_and_err = theano.function([u], [u_new, err]) | |
return lap_and_err | |
def prepare_function_conv(dxd, dyd): | |
flt = theano.shared(numpy.array([[0, dxd, 0], [dyd, 0, dyd], [0, dxd, 0]])) | |
u = T.dmatrix('u') | |
u_new = T.set_subtensor( | |
u[1:-1,1:-1], | |
conv2d(u, flt, border_mode='valid')) | |
v = u_new - u | |
err = ((v**2).sum())**0.5 / u.size | |
lap_and_err = theano.function([u], [u_new, err]) | |
return lap_and_err | |
def iter(f, u, dxd, dyd, eps, n_iter): | |
err = 1e100 | |
count = 0 | |
while err > eps and count < n_iter: | |
count = count + 1 | |
u, err = f(u) | |
return u, err, count | |
if __name__ == '__main__': | |
xmin = 0 | |
xmax = 1 | |
ymin = 0 | |
ymax = 1 | |
nx = 2000 | |
ny = nx | |
x = numpy.linspace(xmin, xmax, nx) | |
y = numpy.linspace(ymin, ymax, ny) | |
dx = x[1] - x[0] | |
dy = y[1] - y[0] | |
dx2 = dx**2 | |
dy2 = dy**2 | |
dnr_inv = 0.5 / (dx2 + dy2) | |
dxd = dx2*dnr_inv | |
dyd = dy2*dnr_inv | |
n_iter = 100 | |
eps = 1e-16 | |
u = numpy.zeros((nx, ny)) | |
u[0,:] = xmin**2 - y**2 | |
u[nx-1,:] = xmax**2 - y**2 | |
u[:,0] = x.T**2 - ymin**2 | |
u[:,ny-1] = x.T**2 - ymax**2 | |
f = prepare_function_subtensor(dxd, dyd) | |
#f = prepare_function_conv(dxd, dyd) | |
f(u) | |
t1 = time.time() | |
u1, err, count = iter(f, u, dxd, dyd, eps, n_iter) | |
t2 = time.time() | |
print("Laplace function call, CPU time taken =", t2 - t1) | |
print("error, count, n_iter, nx,ny, =", err, count, n_iter, nx, ny) | |
# error, count, n_iter, nx,ny, = 9.567647678139845e-08 100 100 2000 2000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment