Skip to content

Instantly share code, notes, and snippets.

@fjarri
Created October 17, 2016 05:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fjarri/6639d8d83b6f1e06070046de12b47e5e to your computer and use it in GitHub Desktop.
Save fjarri/6639d8d83b6f1e06070046de12b47e5e to your computer and use it in GitHub Desktop.
# vectorized version of lap()
# The macros @fastmath @simd and @inbounds did not make much difference for me
# The important step is to devectorize the array assignments and make it a single
# for loop that is cache friendly.
# Note that this function requires passing two arrays, uo is not changed and
# the result of the calculation is deposited into u
function lapdevectorized!(u,uo,dxd,dyd)
nx, ny = size(u)
@inbounds @simd for j = 2: (nx-1)
for i = 2: (ny-1)
# it is important to have the inner loop over the first (row) index of u
u[i,j] = (uo[i-1, j] + uo[i+1, j])*dyd + (uo[i,j-1] + uo[i, j+1])*dxd
end
end
nothing
end
# Computing the error is here also done by a devectorized loop as a separate
# function. In my benchmarking this was about 8 times faster but it may not be
# so important here.
function errfunction{T}(u::Array{T},uold::Array{T})
s = zero(T)
@inbounds @simd for i in 1 : length(u)
s += (u[i] - uold[i])^2
end
return sqrt(s) / length(u)
end
function iter(u, dxd, dyd, eps, n_iter)
u_old = copy(u)
err = 1.0e100
count = 0
while err > eps && count < n_iter
u, u_old = u_old, u
count = count + 1
lapdevectorized!(u, u_old, dxd, dyd)
err = errfunction(u, u_old)
end
return u, err, count
end
xmin = 0.
xmax = 1.
ymin = 0.
ymax = 1.
nx = 2000
ny = nx
dx = (xmax - xmin) / (nx - 1)
dy = (ymax - ymin) / (ny - 1)
x = xmin:dx:xmax
y = ymin:dy:ymax
dx2 = dx^2
dy2 = dy^2
dnr_inv = 0.5 / (dx2 + dy2)
dxd = dx2 * dnr_inv
dyd = dy2 * dnr_inv
n_iter = 100
eps = 1.0e-16
u = zeros(nx, ny)
u[1,:] = xmin^2 - y.^2
u[nx,:] = xmax^2 - y.^2
u[:,1] = x.'.^2 - ymin^2
u[:,ny] = x.'.^2 - ymax^2
println("Laplace function call, CPU time taken = ")
@time (u, err, count) = iter(u, dxd, dyd, eps, n_iter)
println(
"error = ", err, " count = ", count, " n_iter = ", n_iter,
" nx = ", nx, " ny = ", ny)
import time
import numpy
def lap(u, dxd, dyd):
ret = numpy.empty_like(u)
ret[1:-1, 1:-1] = (
(u[:-2, 1:-1] + u[2:, 1:-1]) * dyd
+ (u[1:-1, :-2] + u[1:-1, 2:]) * dxd)
ret[0,:] = u[0,:]
ret[-1,:] = u[-1,:]
ret[:,0] = u[:,0]
ret[:,-1] = u[:,-1]
return ret
def iter(u, dxd, dyd, eps, n_iter):
err = 1e100
count = 0
while err > eps and count < n_iter:
count = count + 1
u_old = u
u = lap(u, dxd, dyd)
v = u - u_old
err = numpy.linalg.norm(v) / u.size
return u, err, count
if __name__ == '__main__':
xmin = 0
xmax = 1
ymin = 0
ymax = 1
nx = 2000
ny = nx
x = numpy.linspace(xmin, xmax, nx)
y = numpy.linspace(ymin, ymax, ny)
dx = x[1] - x[0]
dy = y[1] - y[0]
dx2 = dx**2
dy2 = dy**2
dnr_inv = 0.5 / (dx2 + dy2)
dxd = dx2*dnr_inv
dyd = dy2*dnr_inv
n_iter = 100
eps = 1e-16
u = numpy.zeros((nx, ny))
u[0,:] = xmin**2 - y**2
u[nx-1,:] = xmax**2 - y**2
u[:,0] = x.T**2 - ymin**2
u[:,ny-1] = x.T**2 - ymax**2
t1 = time.time()
u, err, count = iter(u, dxd, dyd, eps, n_iter)
t2 = time.time()
print("Laplace function call, CPU time taken =", t2 - t1)
print("error, count, n_iter, nx,ny, =", err, count, n_iter, nx, ny)
# error, count, n_iter, nx,ny, = 9.56764767814e-08 100 100 2000 2000
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.signal.conv import conv2d
theano.config.profile = True
def prepare_function_subtensor(dxd, dyd):
u = T.dmatrix('u')
u_new = T.set_subtensor(
u[1:-1,1:-1],
(u[:-2, 1:-1] + u[2:, 1:-1]) * dxd
+ (u[1:-1, :-2] + u[1:-1, 2:]) * dyd)
v = u_new - u
err = ((v**2).sum())**0.5 / u.size
lap_and_err = theano.function([u], [u_new, err])
return lap_and_err
def prepare_function_conv(dxd, dyd):
flt = theano.shared(numpy.array([[0, dxd, 0], [dyd, 0, dyd], [0, dxd, 0]]))
u = T.dmatrix('u')
u_new = T.set_subtensor(
u[1:-1,1:-1],
conv2d(u, flt, border_mode='valid'))
v = u_new - u
err = ((v**2).sum())**0.5 / u.size
lap_and_err = theano.function([u], [u_new, err])
return lap_and_err
def iter(f, u, dxd, dyd, eps, n_iter):
err = 1e100
count = 0
while err > eps and count < n_iter:
count = count + 1
u, err = f(u)
return u, err, count
if __name__ == '__main__':
xmin = 0
xmax = 1
ymin = 0
ymax = 1
nx = 2000
ny = nx
x = numpy.linspace(xmin, xmax, nx)
y = numpy.linspace(ymin, ymax, ny)
dx = x[1] - x[0]
dy = y[1] - y[0]
dx2 = dx**2
dy2 = dy**2
dnr_inv = 0.5 / (dx2 + dy2)
dxd = dx2*dnr_inv
dyd = dy2*dnr_inv
n_iter = 100
eps = 1e-16
u = numpy.zeros((nx, ny))
u[0,:] = xmin**2 - y**2
u[nx-1,:] = xmax**2 - y**2
u[:,0] = x.T**2 - ymin**2
u[:,ny-1] = x.T**2 - ymax**2
f = prepare_function_subtensor(dxd, dyd)
#f = prepare_function_conv(dxd, dyd)
f(u)
t1 = time.time()
u1, err, count = iter(f, u, dxd, dyd, eps, n_iter)
t2 = time.time()
print("Laplace function call, CPU time taken =", t2 - t1)
print("error, count, n_iter, nx,ny, =", err, count, n_iter, nx, ny)
# error, count, n_iter, nx,ny, = 9.567647678139845e-08 100 100 2000 2000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment