Created
March 28, 2011 19:01
-
-
Save kxbmap/891043 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example.hellojocl | |
import java.nio.FloatBuffer | |
import util.Random | |
import com.jogamp.opencl.{CLBuffer, CLContext} | |
class HelloJOCL(ctx: CLContext) { | |
import HelloJOCL._ | |
// select fastest device | |
lazy val device = { | |
val dev = ctx.getMaxFlopsDevice | |
println("using " + dev) | |
dev | |
} | |
// create command queue on device. | |
lazy val queue = device.createCommandQueue() | |
// Length of arrays to process | |
val elementCount = 1444477 | |
// Local work size dimensions | |
lazy val localWorkSize = math.min(device.getMaxWorkGroupSize, 256) | |
// rounded up to the nearest multiple of the localWorkSize | |
lazy val globalWorkSize = roundUp(localWorkSize, elementCount) | |
// load sources, create and build program | |
lazy val program = ctx.createProgram(classOf[HelloJOCL].getResourceAsStream("VectorAdd.cl")).build() | |
// A, B are input buffers, C is for the result | |
// fill input buffers with random numbers | |
// (just to have test data; seed is fixed -> results will not change between runs). | |
import com.jogamp.opencl.CLMemory.Mem.{READ_ONLY, WRITE_ONLY} | |
lazy val clBufferA = fillBuffer(ctx.createFloatBuffer(globalWorkSize, READ_ONLY), 12345) | |
lazy val clBufferB = fillBuffer(ctx.createFloatBuffer(globalWorkSize, READ_ONLY), 67890) | |
lazy val clBufferC = ctx.createFloatBuffer(globalWorkSize, WRITE_ONLY) | |
// get a reference to the kernel function with the name 'VectorAdd' | |
// and map the buffers to its input parameters. | |
lazy val kernel = { | |
println("used device memory: " + | |
(clBufferA.getCLSize + clBufferB.getCLSize + clBufferC.getCLSize) / 1024 / 1024 + "MiB") | |
println("localWorkSize: " + localWorkSize + ", globalWorkSize: " + globalWorkSize) | |
program.createCLKernel("VectorAdd") | |
.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount) | |
} | |
def run() = { | |
// asynchronous write of data to GPU device, | |
// followed by blocking read to get the computed results back. | |
queue | |
.putWriteBuffer(clBufferA, false) | |
.putWriteBuffer(clBufferB, false) | |
.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize) | |
.putReadBuffer(clBufferC, true) | |
val ret = clBufferC.getBuffer | |
ret.rewind() | |
ret | |
} | |
} | |
object HelloJOCL { | |
def main(args: Array[String]){ | |
val ctx = CLContext.create() | |
println("created " + ctx) | |
try { | |
val hello = new HelloJOCL(ctx) | |
for (i <- 1 to 10) { | |
val startTime = System.nanoTime | |
val ret = hello.run() | |
val endTime = System.nanoTime | |
if (i == 1) { | |
// print first few elements of the resulting buffer to the console. | |
println("a+b=c results snapshot: ") | |
for (i <- 0 until 10) { | |
print(ret.get() + ", ") | |
} | |
println("...; " + ret.remaining + " more") | |
} | |
println("computation took %2d: %d micro sec" format (i, (endTime - startTime) / 1000)) | |
} | |
} finally ctx.release() | |
} | |
def fillBuffer(clBuf: CLBuffer[FloatBuffer], seed: Int) = { | |
def nextFloats(size: Int) = Array.fill(size)(Random.nextFloat() * 100) | |
val buffer = clBuf.getBuffer | |
Random.setSeed(seed) | |
buffer.put(nextFloats(buffer.remaining)).rewind() | |
clBuf | |
} | |
def roundUp(groupSize: Int, globalSize: Int) = { | |
val r = globalSize % groupSize | |
if (r == 0) globalSize | |
else globalSize + groupSize - r | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example.hellojocl | |
import util.Random | |
class HelloScala { | |
val size = 1444477 | |
lazy val arrayA = { | |
Random.setSeed(12345) | |
Array.fill(size)(Random.nextFloat * 100) | |
} | |
lazy val arrayB = { | |
Random.setSeed(67890) | |
Array.fill(size)(Random.nextFloat * 100) | |
} | |
lazy val arrayC = Array.ofDim[Float](size) | |
def run(r: Int => Seq[Int]) { | |
val seq = r(size) | |
val t = for(n <- 1 to 10) yield { | |
val startTime = System.nanoTime | |
for (i <- seq) { | |
arrayC(i) = arrayA(i) + arrayB(i) | |
} | |
val endTime = System.nanoTime | |
System.gc() | |
(endTime - startTime) / 1000 | |
} | |
println("a+b=c results snapshot: ") | |
for (i <- 0 until 10) { | |
print(arrayC(i) + ", ") | |
} | |
println("...; " + (arrayC.length - 10) + " more") | |
t map ("computation took: " + _ + " micro sec") foreach println | |
} | |
} | |
object HelloScala { | |
def main(args: Array[String]) { | |
println("Parallel - availableProcessors: " + scala.collection.parallel.availableProcessors) | |
new HelloScala().run(0 until _ par) | |
println("Linear") | |
new HelloScala().run(0 until _) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// OpenCL Kernel Function for element by element vector addition | |
kernel void VectorAdd(global const float* a, global const float* b, global float* c, int numElements) { | |
// get index into global data array | |
int iGID = get_global_id(0); | |
// bound check, equivalent to the limit on a 'for' loop | |
if (iGID >= numElements) { | |
return; | |
} | |
// add the vector elements | |
c[iGID] = a[iGID] + b[iGID]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment