Skip to content

Instantly share code, notes, and snippets.

@trxcllnt
Last active April 30, 2018 18:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trxcllnt/a37bca8dd3ddd8ff99a0e39068271ad8 to your computer and use it in GitHub Desktop.
Save trxcllnt/a37bca8dd3ddd8ff99a0e39068271ad8 to your computer and use it in GitHub Desktop.
// const memwatch = require('memwatch-next');
// require('segfault-handler').registerHandler("async.log");
// memwatch.on('leak', ({ growth, reason }) => console.log(`Leak: ${round(growth >> 10, 2)}mb ${reason}`));
let eventID = 0;
const assert = require('assert');
const cl = require('node-opencl');
const { promisify } = require('util');
const sleep = promisify(setTimeout);
const MemoryPool = createMemoryPoolClass();
const roundToSig = (x, d) => Math.round(x * Math.pow(10, d)) / Math.pow(10, d);
const source = `
__kernel void square(
__global uint* input,
__global uint* output,
unsigned int count)
{
unsigned int i = get_global_id(0);
if (i < count)
output[i] = input[i] * input[i];
}
`;
// const DEVICE_TYPE = cl.DEVICE_TYPE_CPU;
const DEVICE_TYPE = cl.DEVICE_TYPE_GPU;
const enqueueMarkerWithWaitList = cl.enqueueMarkerWithWaitList || ((queue) => cl.enqueueMarker(queue, true));
const [platformID, deviceIDs] = cl.getPlatformIDs().reduce((platformAndDeviceIDs, platformID) => [
...platformAndDeviceIDs,
...cl.getDeviceIDs(platformID, cl.DEVICE_TYPE_ALL)
.filter((d) => cl.getDeviceInfo(d, cl.DEVICE_TYPE) === DEVICE_TYPE)
.reduce(([_, deviceIDs = []], deviceID) => [platformID, [...deviceIDs, deviceID]], [])
], []);
console.log(`${
cl.getPlatformInfo(platformID, cl.PLATFORM_VERSION)} - ${
cl.getDeviceInfo(deviceIDs[0], cl.DEVICE_NAME)
}`);
((async () => {
const context = cl.createContext ?
cl.createContext([cl.CONTEXT_PLATFORM, platformID], [deviceIDs[0]]) :
cl.createContextFromType([cl.CONTEXT_PLATFORM, platformID], DEVICE_TYPE, null, null);
const program = cl.createProgramWithSource(context, source); cl.buildProgram(program);
const kernel = cl.createKernel(program, `square`);
const memFlags = cl.MEM_READ_WRITE | cl.MEM_ALLOC_HOST_PTR;
const byteLength = 32 * 1024 * Uint32Array.BYTES_PER_ELEMENT; // <-- 128MB
const inputs = cl.createBuffer(context, memFlags, byteLength);
const outputs = cl.createBuffer(context, memFlags, byteLength);
const queue = (cl.createCommandQueueWithProperties || cl.createCommandQueue)(context, deviceIDs[0], null);
const logInterval = 525;
const { startTime, getTime } = createTimer();
const runTest = bindTest({ context, queue, kernel, inputs, outputs, byteLength });
for await (const i of range(9975)) {
const async = !!true;
// Or alternate sync/async on each iteration
// const async = i % 2 !== 0;
await runTest(async, i);
if (i % logInterval === 0) {
console.log(`iteration: ${i} (async=${async}, dTime=${getTime()})`);
}
}
return `success (total=${getTime(startTime)})`;
})()
.then(
console.log.bind(console, `result:`),
console.error.bind(console, `error:`)
).catch(console.error.bind(console, `caught:`)));
function bindTest({ context, queue, kernel, inputs, outputs, byteLength }) {
const setKernelArg = cl.setKernelArg.bind(cl, kernel);
const length = byteLength / Uint32Array.BYTES_PER_ELEMENT;
const bufferPool = new MemoryPool(Uint32Array, byteLength, 3);
const enqueueNDRangeKernel = (async, gws, lws, events) => [
cl.enqueueNDRangeKernel(queue, kernel, 1, null, gws, lws, events, async)].filter(eventIsActive);
const writeInputs = bindMapBuffer(queue, cl.MAP_WRITE, byteLength, (hostPtr, data) =>
!hostPtr.event ? memcpy(hostPtr, data) || hostPtr : eventAsPromise(hostPtr.event).then(() =>
memcpy(hostPtr, data) || hostPtr));
const readOutputs = bindMapBuffer(queue, cl.MAP_READ, byteLength, (hostPtr, data) =>
!hostPtr.event ? memcpy(data, hostPtr) || hostPtr : eventAsPromise(hostPtr.event).then(() =>
memcpy(data, hostPtr) || hostPtr));
const out = new Uint32Array(length);
setKernelArg(2, `uint`, byteLength);
return async function runTest(async, i = 0) {
const in_ = bufferPool.get();
// alternate input/output buffers on each iteration
const [inMem, outMem] = (i % 2 === 0 ? [inputs, outputs] : [outputs, inputs]);
setKernelArg(0, `uint*`, inMem);
setKernelArg(1, `uint*`, outMem);
// The compact form...
// await eventAsPromise(
// await readOutputs(async, outMem,
// enqueueNDRangeKernel(async, [length], null,
// await writeInputs(async, inMem, [], in_)), out), queue);
// ...or unrolled to measure timings
let events, getTime, debug;
// Flip this flag to see the write/read/map/unmap timings
(debug = false) && debug && ({ getTime } = createTimer());
(events = await writeInputs(async, inMem, [], in_)) && debug && console.log(`map write: ${getTime()}`);
(events = enqueueNDRangeKernel(async, [length], null, events)) && debug && console.log(`enqueue K: ${getTime()}`);
(events = await readOutputs(async, outMem, events, out)) && debug && console.log(` map read: ${getTime()}`);
(await eventAsPromise(events, queue)) && debug && console.log(`enqueue M: ${getTime()}`);
// Verify kernel results (slow)
// (() => {
// assert.equal(out.length, in_.length,
// `expected out.length to equal ${in_.length}`);
// for (let i = -1, n = in_.length; ++i < n;) {
// assert.equal(isNaN(out[i]), false, `expected out[${i}] not NaN`);
// assert.equal(out[i], in_[i] ** 2,
// `expected out[${i}] to equal ${in_[i] ** 2}`);
// }
// })();
}
}
function bindMapBuffer(q, mapFlags, size, sel) {
const map = cl.enqueueMapBuffer;
const unmap = cl.enqueueUnmapMemObject;
const done = (...e) => e.filter(eventIsActive);
const mapSync = (mem, es, ...xs) => done(unmap(q, mem, sel(map(q, mem, !0, mapFlags, 0, size, es, !1), ...xs), [], !1));
const mapAsync = async (mem, es, ...xs) => done(unmap(q, mem, await sel(map(q, mem, !1, mapFlags, 0, size, es, !0), ...xs), [], !0));
return (async, memObj, events, ...rest) => (!async ? mapSync : mapAsync)(memObj, (events || []).filter(eventIsActive), ...rest);
}
function eventIsActive(e) {
if (Object(e) !== e) return false;
if (e.hasOwnProperty('refCount')) return e.refCount > 0;
try {
return (e.refCount = cl.getEventInfo(e, cl.EVENT_REFERENCE_COUNT)) > 0;
} catch (_) { return false; }
}
function eventAsPromise(e, queue) {
let tmp = e;
// const { startTime, getTime } = createTimer();
if (Array.isArray(tmp) && (tmp = tmp.filter(eventIsActive)).length > 0) {
return tmp.length === 1 ? eventAsPromise(tmp[0])
: !queue ? Promise.all(tmp.map((e) => eventAsPromise(e)))
: eventAsPromise(enqueueMarkerWithWaitList(queue, tmp, true));
} else if (!eventIsActive(e)) { return Promise.resolve(e); }
return new Promise((resolve) => cl.setEventCallback(e, cl.COMPLETE, async () => {
// console.log(`event ${eventID++} time: ${getTime()}`);
resolve(e);
// without this sleep, node exits after the first event's refCount drops to 0
// resolve(e) || (await sleep(25, e)); // either style is valid
setTimeout(() => {
try {
let refCount = !e ? 0 : cl.getEventInfo(e, cl.EVENT_REFERENCE_COUNT);
while (refCount-- > 0) { e.refCount = refCount; cl.releaseEvent(e); }
} catch (_) { console.error(`Error releasing event\n\t${_ && _.stack || _}`); }
}, 25);
}));
}
function memcpy(target, source, byteLength = source.byteLength) {
const T = byteLength % 8 === 0 ? Float64Array :
byteLength % 4 === 0 ? Float32Array :
byteLength % 2 === 0 ? Uint16Array : Uint8Array;
asTypedArray(T, target, byteLength / T.BYTES_PER_ELEMENT).
set(asTypedArray(T, source, byteLength / T.BYTES_PER_ELEMENT));
}
function asTypedArray(TypedArray, data, length) {
if (data instanceof TypedArray) return data;
if (data instanceof ArrayBuffer) return new TypedArray(data, 0, length);
if (!data) return new TypedArray(length);
return !data || !ArrayBuffer.isView(data) ?
new TypedArray(data || [], 0, length) :
new TypedArray(data.buffer, data.byteOffset, data.byteLength / TypedArray.BYTES_PER_ELEMENT);
}
function* range(n) {
for (let i = -1; ++i < n;) {
yield i;
}
}
function createMemoryPoolClass() {
return class MemoryPool {
constructor(TypedArray, bytesTotal, poolSize = 16) {
this.buffers = [];
this.buffersIndex = 0;
this.bytesTotal = bytesTotal;
this.TypedArray = TypedArray;
for (let i = -1; ++i < poolSize;) {
this.buffers.push(randomData(new TypedArray(bytesTotal / TypedArray.BYTES_PER_ELEMENT)));
}
}
get() {
return this.buffers[(this.buffersIndex = (this.buffersIndex + 1) % this.buffers.length)];
}
}
function randomData(tArray) {
for (let i = -1, n = tArray.length; ++i < n;) {
tArray[i] = Math.pow(10, 2) * Math.random() | 0;
}
return tArray;
}
}
function createTimer() {
const startTime = process.hrtime();
const getTime = ((prev) => (time = prev) => {
prev = process.hrtime();
const [s, ns] = process.hrtime(time);
return `${roundToSig((s * 1000) + (ns / 1000000), 2)}ms`;
})(startTime);
return { startTime, getTime };
}
console.log("== Initial loop terminated ==");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment