jsnyder/gist:184625

## gistfile1.py
import pyopencl as cl
import numpy as np
import sys, time

class Grid:
    """A simple grid class that stores the details and solution of the
    computational grid."""
    def __init__(self, nx=10, ny=10, xmin=0.0, xmax=1.0,
                 ymin=0.0, ymax=1.0):
        self.xmin, self.xmax, self.ymin, self.ymax = xmin, xmax, ymin, ymax
        self.dx = float(xmax-xmin)/(nx-1)
        self.dy = float(ymax-ymin)/(ny-1)
        self.u = np.zeros((nx, ny)).astype(np.float32)
        # used to compute the change in solution in some of the methods.
        self.old_u = self.u.copy()

    def setBCFunc(self, func):
        """Sets the BC given a function of two variables."""
        xmin, ymin = self.xmin, self.ymin
        xmax, ymax = self.xmax, self.ymax
        x = np.arange(xmin, xmax + self.dx*0.5, self.dx)
        y = np.arange(ymin, ymax + self.dy*0.5, self.dy)
        self.u[0 ,:] = func(xmin,y)
        self.u[-1,:] = func(xmax,y)
        self.u[:, 0] = func(x,ymin)
        self.u[:,-1] = func(x,ymax)

    def computeError(self):
        """Computes absolute error using an L2 norm for the solution.
        This requires that self.u and self.old_u must be appropriately
        setup."""
        v = (self.u[1:-1, 1:-1] - self.old_u[1:-1, 1:-1]).flat
        return np.sqrt(np.dot(v,v))


class LaplaceSolver:
    """A simple Laplacian solver that can use different schemes to
    solve the problem."""
    def __init__(self, grid, stepper='numeric'):
        self.grid = grid
        self.setTimeStepper(stepper)

    def slowTimeStep(self, dt=0.0):
        """Takes a time step using straight forward Python loops.
        While not providing any performance enhancement, """
        g = self.grid
        nx, ny = g.u.shape
        dx2, dy2 = g.dx**2, g.dy**2
        dnr_inv = 0.5/(dx2 + dy2)
        u = g.u

	err = 0.0;
        for offset in range(1,3):
          for i in range(1, nx-1):
              for j in range(1 + ( (i + offset) % 2), ny-1, 2):
                  tmp = u[j,i]
                  u[j,i] = ((u[j-1, i] + u[j+1, i])*dy2 +
                            (u[j, i-1] + u[j, i+1])*dx2)*dnr_inv
                  diff = u[j,i] - tmp
                  err += diff**2

        return np.sqrt(err)

    def numericCheckerTimeStep(self, dt=0.0):
        """Takes a time step using a NumPy expression."""
        g = self.grid
        dx2, dy2 = g.dx**2, g.dy**2
        dnr_inv = 0.5/(dx2 + dy2)
        u = g.u
        g.old_u = u.copy() # needed to compute the error.

        # The actual iteration
	u[1:-1:2, 1:-1:2] = ((u[0:-2:2, 1:-1:2] + u[2::2, 1:-1:2])*dy2 +
                         (u[1:-1:2,0:-2:2] + u[1:-1:2, 2::2])*dx2)*dnr_inv
	u[2:-1:2, 2:-1:2] = ((u[1:-2:2, 2:-1:2] + u[3::2, 2:-1:2])*dy2 +
                         (u[2:-1:2,1:-2:2] + u[2:-1:2, 3::2])*dx2)*dnr_inv

	u[1:-1:2, 2:-1:2] = ((u[0:-2:2, 2:-1:2] + u[2::2, 2:-1:2])*dy2 +
                         (u[1:-1:2,1:-2:2] + u[1:-1:2, 3::2])*dx2)*dnr_inv
	u[2:-1:2, 1:-1:2] = ((u[1:-2:2, 1:-1:2] + u[3::2, 1:-1:2])*dy2 +
                         (u[2:-1:2,0:-2:2] + u[2:-1:2, 2::2])*dx2)*dnr_inv

        return g.computeError()

    def openclCheckerTimeStep(self, dt=0.0):
      """
      Takes a time step using a PyOpenCL kernel based on inline C code from:
      http://www.scipy.org/PerformancePython
      """
      nx, ny = self.grid.u.shape

      if self.count == 0:
          g = self.grid
          dx2, dy2 = g.dx**2, g.dy**2
          dnr_inv = 0.5/(dx2 + dy2)
          u = g.u

          self.err = np.empty( (nx-2,), dtype=np.float32)

          self.ctx = cl.create_some_context()
          self.queue = cl.CommandQueue(self.ctx)

          mf = cl.mem_flags
          self.u_buf = cl.Buffer(self.ctx, mf.READ_WRITE, u.nbytes)
          self.err_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, self.err.nbytes)

          cl.enqueue_write_buffer(self.queue, self.u_buf, u).wait()

          self.prg = cl.Program(self.ctx, """
          __kernel void lp2dstep( __global float u[][%d], __global float *err, const uint stidx )
          {
              float tmp, diff;
              int i = get_global_id(0) + 1;
              if ( stidx == 1 )
              	err[i-1] = 0.0;

              for ( int j = 1 + ( ( i + stidx ) %% 2); j<%d-1; j+=2 ) {
                  tmp = u[j][i];
                  u[j][i] = ((u[j-1][i] + u[j+1][i])*%g +
                                       (u[j][i-1] + u[j][i+1])*%g)*%g;
                  diff = u[j][i] - tmp;
                  err[i-1] += diff*diff;
              }
          }""" % (ny,ny,dy2,dx2,dnr_inv))


          try:
              self.prg.build()
          except:
              print "Error:"
              print self.prg.get_build_info(self.ctx.get_info(cl.context_info.DEVICES)[0], cl.program_build_info.LOG)

      lp1evt = self.prg.lp2dstep(self.queue, ((nx-2),), self.u_buf, self.err_buf, np.uint32(1))
      cl.enqueue_wait_for_events(self.queue, [lp1evt])
      self.prg.lp2dstep(self.queue, ((nx-2),), self.u_buf, self.err_buf, np.uint32(2))

      # Get Updated Error Data & Updated Grid
      #cl.enqueue_read_buffer(self.queue, self.u_buf, self.grid.u)
      cl.enqueue_read_buffer(self.queue, self.err_buf, self.err).wait()

      return np.sqrt(np.sum(self.err))

    def setTimeStepper(self, stepper='numeric'):
        """Sets the time step scheme to be used while solving given a
        string"""
        if stepper == 'slow':
            self.timeStep = self.slowTimeStep
        elif stepper == 'numpychecker':
            self.timeStep = self.numericCheckerTimeStep
        elif stepper == 'openclchecker':
            self.timeStep = self.openclCheckerTimeStep
        else:
            self.timeStep = None

    def solve(self, n_iter=0, eps=1.0e-16):
        self.count = 0
        err = self.timeStep()
        self.count = 1
        count = self.count

        while err > eps:
            if n_iter and count >= n_iter:
                return err
            err = self.timeStep()
            count = count + 1

        return count


def BC(x, y):
    """Used to set the boundary condition for the grid of points.
    Change this as you feel fit."""
    return (x**2 - y**2)


def test(nmin=5, nmax=30, dn=5, eps=1.0e-16, n_iter=0, stepper='numeric'):
    iters = []
    n_grd = numpy.arange(nmin, nmax, dn)
    times = []
    for i in n_grd:
        g = Grid(nx=i, ny=i)
        g.setBCFunc(BC)
        s = LaplaceSolver(g, stepper)
        t1 = time.clock()
        iters.append(s.solve(n_iter=n_iter, eps=eps))
        dt = time.clock() - t1
        times.append(dt)
        print "Solution for nx = ny = %d, took %f seconds"%(i, dt)
    return (n_grd**2, iters, times)


def time_test(nx=512, ny=512, eps=1.0e-16, n_iter=100, stepper='numeric'):
    g = Grid(nx, ny)
    g.setBCFunc(BC)
    s = LaplaceSolver(g, stepper)
    t = time.clock()
    s.solve(n_iter=n_iter, eps=eps)
    return time.clock() - t


def main(n=512, n_iter=512):
    print "Doing %d iterations on a %dx%d grid"%(n_iter, n, n)
    for i in ['openclchecker', 'numpychecker']:
        print i,
        sys.stdout.flush()
        print "took", time_test(n, n, stepper=i, n_iter=n_iter), "seconds"

    print "slow (1 iteration)",
    sys.stdout.flush()
    s = time_test(n, n, stepper='slow', n_iter=1)
    print "took", s, "seconds"
    print "%d iterations should take about %f seconds"%(n_iter, s*n_iter)


if __name__ == "__main__":
    main(n=2048,n_iter=400)
	import pyopencl as cl
	import numpy as np
	import sys, time

	class Grid:
	"""A simple grid class that stores the details and solution of the
	computational grid."""
	def __init__(self, nx=10, ny=10, xmin=0.0, xmax=1.0,
	ymin=0.0, ymax=1.0):
	self.xmin, self.xmax, self.ymin, self.ymax = xmin, xmax, ymin, ymax
	self.dx = float(xmax-xmin)/(nx-1)
	self.dy = float(ymax-ymin)/(ny-1)
	self.u = np.zeros((nx, ny)).astype(np.float32)
	# used to compute the change in solution in some of the methods.
	self.old_u = self.u.copy()

	def setBCFunc(self, func):
	"""Sets the BC given a function of two variables."""
	xmin, ymin = self.xmin, self.ymin
	xmax, ymax = self.xmax, self.ymax
	x = np.arange(xmin, xmax + self.dx*0.5, self.dx)
	y = np.arange(ymin, ymax + self.dy*0.5, self.dy)
	self.u[0 ,:] = func(xmin,y)
	self.u[-1,:] = func(xmax,y)
	self.u[:, 0] = func(x,ymin)
	self.u[:,-1] = func(x,ymax)

	def computeError(self):
	"""Computes absolute error using an L2 norm for the solution.
	This requires that self.u and self.old_u must be appropriately
	setup."""
	v = (self.u[1:-1, 1:-1] - self.old_u[1:-1, 1:-1]).flat
	return np.sqrt(np.dot(v,v))


	class LaplaceSolver:
	"""A simple Laplacian solver that can use different schemes to
	solve the problem."""
	def __init__(self, grid, stepper='numeric'):
	self.grid = grid
	self.setTimeStepper(stepper)

	def slowTimeStep(self, dt=0.0):
	"""Takes a time step using straight forward Python loops.
	While not providing any performance enhancement, """
	g = self.grid
	nx, ny = g.u.shape
	dx2, dy2 = g.dx2, g.dy2
	dnr_inv = 0.5/(dx2 + dy2)
	u = g.u

	err = 0.0;
	for offset in range(1,3):
	for i in range(1, nx-1):
	for j in range(1 + ( (i + offset) % 2), ny-1, 2):
	tmp = u[j,i]
	u[j,i] = ((u[j-1, i] + u[j+1, i])*dy2 +
	(u[j, i-1] + u[j, i+1])dx2)dnr_inv
	diff = u[j,i] - tmp
	err += diff**2

	return np.sqrt(err)

	def numericCheckerTimeStep(self, dt=0.0):
	"""Takes a time step using a NumPy expression."""
	g = self.grid
	dx2, dy2 = g.dx2, g.dy2
	dnr_inv = 0.5/(dx2 + dy2)
	u = g.u
	g.old_u = u.copy() # needed to compute the error.

	# The actual iteration
	u[1:-1:2, 1:-1:2] = ((u[0:-2:2, 1:-1:2] + u[2::2, 1:-1:2])*dy2 +
	(u[1:-1:2,0:-2:2] + u[1:-1:2, 2::2])dx2)dnr_inv
	u[2:-1:2, 2:-1:2] = ((u[1:-2:2, 2:-1:2] + u[3::2, 2:-1:2])*dy2 +
	(u[2:-1:2,1:-2:2] + u[2:-1:2, 3::2])dx2)dnr_inv

	u[1:-1:2, 2:-1:2] = ((u[0:-2:2, 2:-1:2] + u[2::2, 2:-1:2])*dy2 +
	(u[1:-1:2,1:-2:2] + u[1:-1:2, 3::2])dx2)dnr_inv
	u[2:-1:2, 1:-1:2] = ((u[1:-2:2, 1:-1:2] + u[3::2, 1:-1:2])*dy2 +
	(u[2:-1:2,0:-2:2] + u[2:-1:2, 2::2])dx2)dnr_inv

	return g.computeError()

	def openclCheckerTimeStep(self, dt=0.0):
	"""
	Takes a time step using a PyOpenCL kernel based on inline C code from:
	http://www.scipy.org/PerformancePython
	"""
	nx, ny = self.grid.u.shape

	if self.count == 0:
	g = self.grid
	dx2, dy2 = g.dx2, g.dy2
	dnr_inv = 0.5/(dx2 + dy2)
	u = g.u

	self.err = np.empty( (nx-2,), dtype=np.float32)

	self.ctx = cl.create_some_context()
	self.queue = cl.CommandQueue(self.ctx)

	mf = cl.mem_flags
	self.u_buf = cl.Buffer(self.ctx, mf.READ_WRITE, u.nbytes)
	self.err_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, self.err.nbytes)

	cl.enqueue_write_buffer(self.queue, self.u_buf, u).wait()

	self.prg = cl.Program(self.ctx, """
	__kernel void lp2dstep( __global float u[][%d], __global float *err, const uint stidx )
	{
	float tmp, diff;
	int i = get_global_id(0) + 1;
	if ( stidx == 1 )
	err[i-1] = 0.0;

	for ( int j = 1 + ( ( i + stidx ) %% 2); j<%d-1; j+=2 ) {
	tmp = u[j][i];
	u[j][i] = ((u[j-1][i] + u[j+1][i])*%g +
	(u[j][i-1] + u[j][i+1])%g)%g;
	diff = u[j][i] - tmp;
	err[i-1] += diff*diff;
	}
	}""" % (ny,ny,dy2,dx2,dnr_inv))


	try:
	self.prg.build()
	except:
	print "Error:"
	print self.prg.get_build_info(self.ctx.get_info(cl.context_info.DEVICES)[0], cl.program_build_info.LOG)

	lp1evt = self.prg.lp2dstep(self.queue, ((nx-2),), self.u_buf, self.err_buf, np.uint32(1))
	cl.enqueue_wait_for_events(self.queue, [lp1evt])
	self.prg.lp2dstep(self.queue, ((nx-2),), self.u_buf, self.err_buf, np.uint32(2))

	# Get Updated Error Data & Updated Grid
	#cl.enqueue_read_buffer(self.queue, self.u_buf, self.grid.u)
	cl.enqueue_read_buffer(self.queue, self.err_buf, self.err).wait()

	return np.sqrt(np.sum(self.err))

	def setTimeStepper(self, stepper='numeric'):
	"""Sets the time step scheme to be used while solving given a
	string"""
	if stepper == 'slow':
	self.timeStep = self.slowTimeStep
	elif stepper == 'numpychecker':
	self.timeStep = self.numericCheckerTimeStep
	elif stepper == 'openclchecker':
	self.timeStep = self.openclCheckerTimeStep
	else:
	self.timeStep = None

	def solve(self, n_iter=0, eps=1.0e-16):
	self.count = 0
	err = self.timeStep()
	self.count = 1
	count = self.count

	while err > eps:
	if n_iter and count >= n_iter:
	return err
	err = self.timeStep()
	count = count + 1

	return count


	def BC(x, y):
	"""Used to set the boundary condition for the grid of points.
	Change this as you feel fit."""
	return (x2 - y2)


	def test(nmin=5, nmax=30, dn=5, eps=1.0e-16, n_iter=0, stepper='numeric'):
	iters = []
	n_grd = numpy.arange(nmin, nmax, dn)
	times = []
	for i in n_grd:
	g = Grid(nx=i, ny=i)
	g.setBCFunc(BC)
	s = LaplaceSolver(g, stepper)
	t1 = time.clock()
	iters.append(s.solve(n_iter=n_iter, eps=eps))
	dt = time.clock() - t1
	times.append(dt)
	print "Solution for nx = ny = %d, took %f seconds"%(i, dt)
	return (n_grd**2, iters, times)


	def time_test(nx=512, ny=512, eps=1.0e-16, n_iter=100, stepper='numeric'):
	g = Grid(nx, ny)
	g.setBCFunc(BC)
	s = LaplaceSolver(g, stepper)
	t = time.clock()
	s.solve(n_iter=n_iter, eps=eps)
	return time.clock() - t


	def main(n=512, n_iter=512):
	print "Doing %d iterations on a %dx%d grid"%(n_iter, n, n)
	for i in ['openclchecker', 'numpychecker']:
	print i,
	sys.stdout.flush()
	print "took", time_test(n, n, stepper=i, n_iter=n_iter), "seconds"

	print "slow (1 iteration)",
	sys.stdout.flush()
	s = time_test(n, n, stepper='slow', n_iter=1)
	print "took", s, "seconds"
	print "%d iterations should take about %f seconds"%(n_iter, s*n_iter)


	if __name__ == "__main__":
	main(n=2048,n_iter=400)