Skip to content

Instantly share code, notes, and snippets.

@kaushikcfd
kaushikcfd / Reading list.md
Created February 25, 2017 22:54
This is the gist for keeping the track of my Reading list
  1. A Short History of Nearly Everything by Bill Bryson
  2. What If?: Serious Scientific Answers to Absurd Hypothetical Questions by Randall Munroe
  3. "Surely You're Joking, Mr. Feynman!": Adventures of a Curious Character by RIchard Feynman
  4. The Beginning of Infinity by David Deutsch
  5. The Drunkard's Walk by Leonard Mlodinow
  6. Future Shock by Alvin (and Heidi) Toffler
  7. Gödel, Escher, Bach: An Eternal Golden Braid by Douglas Hofstadter
  8. Physics of the Impossible by Michio Kaku
import numpy as np
import pyopencl as cl
import matplotlib.pyplot as plt
from time import time
from time import sleep
def bandwidth_calculator(n_numbers):
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)

I was trying to understand the advantages of chaning the local work group sizes in order to check its advantagees. Here are the results for a simple program which only takes in a random array, doubles it and returns. It also records the time needed for the whole operation.

The following plot explains the results for the various devices. Comparison of advantage of threads over various devices

I don't get a few things:

  • Exact explanation of threads.
  • Why does the speedup increase in the case of increasing threads for CPU
import numpy as np
import matplotlib.pyplot as plt
from time import time
import loopy as lp
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
def apply_diff(nx = 100,
ny = 100,
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__constant double const cnst[3 * 3] = { 0.6666666666666669, 0.16666666666666663, 0.16666666666666666, 0.16666666666666674, 0.16666666666666663, 0.6666666666666665, 0.16666666666666669, 0.6666666666666666, 0.16666666666666663 };
__constant double const cnst_0[3] = { 0.16666666666666666, 0.16666666666666666, 0.16666666666666666 };
from ufl import action
from firedrake.ufl_expr import adjoint
from firedrake.formmanipulation import ExtractSubBlock
from firedrake.function import Function
from firedrake.petsc import PETSc
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
from time import time
import loopy as lp
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
---------------------------------------------------------------------------
KERNEL: loopy_kernel_and_loopy_kernel_and_tsfc_kernel_and_loopy_kernel
---------------------------------------------------------------------------
ARGUMENTS:
A0_global: GlobalArg, type: np_atomic:dtype('float64'), shape: (A0_size), dim_tags: (N0:stride:1)
A0_size: ValueArg, type: np:dtype('int32')
coords_global: GlobalArg, type: np:dtype('float64'), shape: (coords_global_len, 2), dim_tags: (N1:stride:2, N0:stride:1)
coords_global_len: ValueArg, type: np:dtype('int32')
ltg_0: GlobalArg, type: np:dtype
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__constant double const cnst[12 * 6] = { -0.05512856699248408, 0.6533077030470595, -0.055128566992484106, 0.2205142679699365, 0.015920894998035683, 0.2205142679699366, -0.055128566992484036, -0.05512856699248409, 0.6533077030470597, 0.22051426796993637, 0.2205142679699362, 0.015920894998035957, 0.6533077030470594, -0.055128566992484154, -0.055128566992484106, 0.01592089499803581, 0.2205142679699364, 0.2205142679699366, -0.1249989825350975, 0.0014305795177887936, -0.12499898253509754, 0.49999593014038923, 0.24857552527162577, 0.4999959301403913, -0.1249989825350975, -0.12499898253509759, 0.0014305795177888207, 0.49999593014038923, 0.4999959301403911, 0.24857552527162594, 0.0014305795177898299, -0.12499898253509759, -0.12499898253509754, 0.24857552527162485, 0.4999959301403901, 0.4999959301403
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__constant double const cnst[12 * 6] = { -0.05512856699248408, 0.6533077030470595, -0.055128566992484106, 0.2205142679699365, 0.015920894998035683, 0.2205142679699366, -0.055128566992484036, -0.05512856699248409, 0.6533077030470597, 0.22051426796993637, 0.2205142679699362, 0.015920894998035957, 0.6533077030470594, -0.055128566992484154, -0.055128566992484106, 0.01592089499803581, 0.2205142679699364, 0.2205142679699366, -0.1249989825350975, 0.0014305795177887936, -0.12499898253509754, 0.49999593014038923, 0.24857552527162577, 0.4999959301403913, -0.1249989825350975, -0.12499898253509759, 0.0014305795177888207, 0.49999593014038923, 0.4999959301403911, 0.24857552527162594, 0.0014305795177898299, -0.12499898253509759, -0.12499898253509754, 0.24857552527162485, 0.4999959301403901, 0.4999959301403