kaushikcfd/diffusion_loopy.py

## diffusion_loopy.py
import numpy as np
import matplotlib.pyplot as plt
from time import time
import loopy as lp
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom

def apply_diff(nx = 100,
               ny = 100,
               t_left = 0.0,
               t_right = 1.0,
               t_bottom = 0.0,
               t_top = 1.0):
    time_start = time()

    t_new_host = np.zeros((nx, ny), dtype=np.float64)

    t_new_host[:, 0] = t_left
    t_new_host[0, :] = t_bottom
    t_new_host[:, -1] = t_right
    t_new_host[-1, :] = t_top

    t_old_host = np.copy(t_new_host)

    # Transferring to the device
    t_old_dev = cl.array.to_device(queue, t_old_host)
    t_new_dev = cl.array.to_device(queue, t_new_host)

    # Setting the number of divisions for parallel computations
    idiv = 2

    # Making the kernel
    knl = lp.make_kernel(
            ["{[i0, i1, i2, i3, j0, j1, j2]: 0<i0,i1,i2,i3<(nx-1) and\
                0<j0,j1,j2<(ny-1)}"],
            ["""
            t_old_dev[i0, j0] = t_new_dev[i0, j0] {id=copy_prev}
            t_new_dev[i1, j1] = 0.25*(t_old_dev[i1, j1-1] +
                                t_old_dev[i1, j1+1] + t_old_dev[i1-1, j1] +
                                t_old_dev[i1+1, j1]) {id=update, dep=copy_prev}
            l2_norm_temp[i2-1] = sum(j2, (t_old_dev[i2, j2]-t_new_dev[i2,\
            j2])*(t_old_dev[i2, j2]-t_new_dev[i2, j2])){id=norm1, dep=update}
            <float64> l2_norm_temp2 = sum(i3, l2_norm_temp[i3-1]){id=norm2, dep=norm1}
            out = sqrt(l2_norm_temp2){dep=norm2}
            """],
            [
                lp.GlobalArg("t_old_dev", None, shape = ("nx", "ny")),
                lp.GlobalArg("t_new_dev", None, shape = ("nx", "ny")),
                "..."
            ],
            seq_dependencies=True
            )
    knl = lp.set_options(knl, write_cl=True)
    knl = lp.set_options(knl, return_dict=True)
    knl = lp.split_iname(knl, "i0", idiv, outer_tag = "g.0", inner_tag="l.0")

    error = 100.0

    # FIXME:
    # Ok. The code compiles and runs and gives us the correct result. And
    # I have to figure out a way of not telling it to return us the t_new,
    # t_old.
    while error/(nx*ny)>1e-10:
        evt, otpt = knl(queue,t_new_dev=t_new_dev, t_old_dev=t_old_dev, nx=nx,
                ny=ny, out_host=False)
        error = otpt['out']

    # Solution converged
    # Getting the information from the device.

    t_new_host = t_new_dev.get()

    time_end = time()
    print('Grid Points =', nx*ny, 'Time =', time_end-time_start)

    x, y = np.meshgrid(np.linspace(0, 1, nx), np.linspace(0, 1, ny))
    plt.contour(x, y, t_new_host)
    plt.colorbar()
    plt.axis("equal")
    plt.show()

if __name__ == '__main__':
    ctx = cl.create_some_context(interactive=True)
    queue = cl.CommandQueue(ctx)

    apply_diff(nx=100, ny=100)
	import numpy as np
	import matplotlib.pyplot as plt
	from time import time
	import loopy as lp
	import pyopencl as cl
	import pyopencl.array
	import pyopencl.clrandom

	def apply_diff(nx = 100,
	ny = 100,
	t_left = 0.0,
	t_right = 1.0,
	t_bottom = 0.0,
	t_top = 1.0):
	time_start = time()

	t_new_host = np.zeros((nx, ny), dtype=np.float64)

	t_new_host[:, 0] = t_left
	t_new_host[0, :] = t_bottom
	t_new_host[:, -1] = t_right
	t_new_host[-1, :] = t_top

	t_old_host = np.copy(t_new_host)

	# Transferring to the device
	t_old_dev = cl.array.to_device(queue, t_old_host)
	t_new_dev = cl.array.to_device(queue, t_new_host)

	# Setting the number of divisions for parallel computations
	idiv = 2

	# Making the kernel
	knl = lp.make_kernel(
	["{[i0, i1, i2, i3, j0, j1, j2]: 0<i0,i1,i2,i3<(nx-1) and\
	0<j0,j1,j2<(ny-1)}"],
	["""
	t_old_dev[i0, j0] = t_new_dev[i0, j0] {id=copy_prev}
	t_new_dev[i1, j1] = 0.25*(t_old_dev[i1, j1-1] +
	t_old_dev[i1, j1+1] + t_old_dev[i1-1, j1] +
	t_old_dev[i1+1, j1]) {id=update, dep=copy_prev}
	l2_norm_temp[i2-1] = sum(j2, (t_old_dev[i2, j2]-t_new_dev[i2,\
	j2])*(t_old_dev[i2, j2]-t_new_dev[i2, j2])){id=norm1, dep=update}
	<float64> l2_norm_temp2 = sum(i3, l2_norm_temp[i3-1]){id=norm2, dep=norm1}
	out = sqrt(l2_norm_temp2){dep=norm2}
	"""],
	[
	lp.GlobalArg("t_old_dev", None, shape = ("nx", "ny")),
	lp.GlobalArg("t_new_dev", None, shape = ("nx", "ny")),
	"..."
	],
	seq_dependencies=True
	)
	knl = lp.set_options(knl, write_cl=True)
	knl = lp.set_options(knl, return_dict=True)
	knl = lp.split_iname(knl, "i0", idiv, outer_tag = "g.0", inner_tag="l.0")

	error = 100.0

	# FIXME:
	# Ok. The code compiles and runs and gives us the correct result. And
	# I have to figure out a way of not telling it to return us the t_new,
	# t_old.
	while error/(nx*ny)>1e-10:
	evt, otpt = knl(queue,t_new_dev=t_new_dev, t_old_dev=t_old_dev, nx=nx,
	ny=ny, out_host=False)
	error = otpt['out']

	# Solution converged
	# Getting the information from the device.

	t_new_host = t_new_dev.get()

	time_end = time()
	print('Grid Points =', nx*ny, 'Time =', time_end-time_start)

	x, y = np.meshgrid(np.linspace(0, 1, nx), np.linspace(0, 1, ny))
	plt.contour(x, y, t_new_host)
	plt.colorbar()
	plt.axis("equal")
	plt.show()

	if __name__ == '__main__':
	ctx = cl.create_some_context(interactive=True)
	queue = cl.CommandQueue(ctx)

	apply_diff(nx=100, ny=100)