Skip to content

Instantly share code, notes, and snippets.

@arisada
Last active July 26, 2023 12:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arisada/97667d71c3b2729fef7f6fd93b726059 to your computer and use it in GitHub Desktop.
Save arisada/97667d71c3b2729fef7f6fd93b726059 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from sympy import symbols, init_printing, simplify, diff
from sympy.utilities.codegen import codegen
import numpy as np
import re
import os
import pyopencl as cl
# https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html
init_printing(use_unicode=True)
class GradSolver():
def __init__(self, eq, variables, ranges):
self.eq = eq
self.variables = variables
self.ranges = ranges
def prepare(self):
## (self.eq - 0) ** 2
self.cost = self.eq ** 2
self.deq = [simplify(diff(self.cost, v)) for v in self.variables]
print("eq:", self.eq)
print("cost:", self.cost)
print("dcost/dv:", self.deq, self.variables)
self.x0 = np.array([0, 0])
self.pgrad = None
def step(self, gamma=0.1, beta=0.9):
x0 = self.x0
fx0 = self.eq.evalf(subs={"x":x0[0], "y": x0[1]})
print("x0: ", x0, "f(x0):", fx0)
grad = np.array([df.evalf(subs={"x": x0[0], "y": x0[1]}) for df in self.deq])
print("grad: ", grad)
# implementation of momentum
if self.pgrad is None:
self.pgrad = grad
grad = beta * self.pgrad + (1-beta) * grad
self.pgrad = grad
x1 = x0 - gamma * grad
for i in range(len(x1)):
if x1[i] < self.ranges[i][0]:
x1[i] = self.ranges[i][0]
elif x1[i] > self.ranges[i][1]:
x1[i] = self.ranges[i][1]
self.x0 = x1
return self.eq.evalf(subs={"x": x1[0], "y": x1[1]})
class OpenCLGradSolver(GradSolver):
def convert_opencl(self, code):
pattern = r"^#include.*$"
code = re.sub(pattern, '', code, flags=re.MULTILINE)
code = code.replace("double", "float")
return code
def prepare(self, nthreads=32):
super().prepare()
self.nthreads = nthreads
gen = codegen(("eq", self.eq), "C", "eq", header=False)
code = self.convert_opencl(gen[0][1])
for f, v in zip(self.deq, self.variables):
gen = codegen(("deq_d" + v.name, f), "C", "deq", header=False)
code += self.convert_opencl(gen[0][1])
self.kernel_code = code
self.kernel_code += "#define NVAR " + str(len(self.variables)) + "\n"
kernel_file = os.path.dirname(os.path.realpath('__file__')) + "/gradient.cl"
with open(kernel_file, "r") as f:
self.kernel_code += f.read()
self.kernel_code += "void eval_deq(vec grad, __global vec x0){\n"
for i, v in zip(range(len(self.variables)), self.variables):
self.kernel_code += " grad[%d] = deq_d%s("%(i, v.name)
self.kernel_code += ','.join(["x0[%d]"%(j,) for j in range(len(self.variables))])
self.kernel_code += ");\n"
self.kernel_code += "}\n"
self.kernel_code += "float eval_eq(__global const vec x0){\n"
self.kernel_code += " return eq("
self.kernel_code += ','.join(["x0[%d]"%(j,) for j in range(len(self.variables))])
self.kernel_code += ");\n"
self.kernel_code += "}\n"
print(self.kernel_code)
# now with the gore opencl code
self.ctx = cl.create_some_context(answers=[0])
self.queue = cl.CommandQueue(self.ctx)
self.prg = cl.Program(self.ctx, self.kernel_code).build()
mf = cl.mem_flags
self.x0 = np.random.rand(nthreads, len(self.variables)).astype(np.float32)
#print("x0", self.x0)
self.pgrad = np.ndarray((nthreads, len(self.variables)), dtype=np.float32)
self.result = np.ndarray(shape=nthreads, dtype=np.float32)
self.x0_cl= cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.x0)
self.pgrad_cl = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.pgrad)
self.result_cl = cl.Buffer(self.ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=self.result)
self.prg.init_pgrad(self.queue, (self.nthreads,), None, self.x0_cl, self.pgrad_cl)
cl.enqueue_copy(self.queue, self.pgrad, self.pgrad_cl)
def step(self, gamma=0.1, beta=0.9):
self.prg.step(self.queue, (self.nthreads,), None, cl.cltypes.float(gamma), cl.cltypes.float(beta), self.x0_cl, self.pgrad_cl)
cl.enqueue_copy(self.queue, self.x0, self.x0_cl)
cl.enqueue_copy(self.queue, self.pgrad, self.pgrad_cl)
#print("cl x0 ", self.x0)
#print("cl pgrad ", self.pgrad)
self.prg.eval(self.queue, (self.nthreads,) , None, self.x0_cl, self.result_cl)
cl.enqueue_copy(self.queue, self.result, self.result_cl)
#print("cl res", self.result)
return sum(self.result)
def main():
x, y, z = symbols("x y z")
eq = 3 * x**3 - 2* x**2 - 3*x + 1 + 2*y**2 - 2 * x * y + 2 * x**2 * y + x * y * z
solver = OpenCLGradSolver(eq, (x, y, z), ((-100, 100), (-100, 100)))
#solver2 = GradSolver(eq, (x, y), ((-100, 100), (-100, 100)))
solver.prepare(nthreads=64)
#solver2.prepare()
# return
for i in range(3000):
#f2 = solver2.step(gamma = 0.01)
f = solver.step(gamma = 0.01)
#print("---")
if abs(f) < 1e-8:
print("Converged in ", i, "steps")
break
else:
print("Didn't converge in %d steps"%(i,))
print("x0:", solver.x0)
print("pgrad:", solver.pgrad)
print("result:", solver.result)
if __name__ == "__main__":
main()
/* Plain OpenCL implementation of my gradient descent algorithm
* This one allows many gradient descents on the same function to happen at once
*/
// def step(self, gamma=0.1, beta=0.9):
// x0 = self.x0
// fx0 = self.eq.evalf(subs={"x":x0[0], "y": x0[1]})
// print("x0: ", x0, "f(x0):", fx0)
// grad = np.array([df.evalf(subs={"x": x0[0], "y": x0[1]}) for df in self.deq])
// print("grad: ", grad)
// # implementation of momentum
// if self.pgrad is None:
// self.pgrad = grad
// grad = beta * self.pgrad + (1-beta) * grad
// self.pgrad = grad
// x1 = x0 - gamma * grad
// for i in range(len(x1)):
// if x1[i] < self.ranges[i][0]:
// x1[i] = self.ranges[i][0]
// elif x1[i] > self.ranges[i][1]:
// x1[i] = self.ranges[i][1]
// self.x0 = x1
// return self.eq.evalf(subs={"x": x1[0], "y": x1[1]})
typedef float vec[NVAR];
void eval_deq(vec grad, __global vec x0);
float eval_eq(__global const vec x0);
__kernel void step(float gamma, float beta,
__global vec *x0,
__global vec *pgrad){
int gid = get_global_id(0);
vec grad;
eval_deq(grad, x0[gid]);
for (int i=0; i<NVAR; ++i){
// momentum
grad[i] = beta * pgrad[gid][i] + (1.0 - beta) * grad[i];
pgrad[gid][i] = grad[i];
// descent
x0[gid][i] -= gamma * grad[i];
// clamping
x0[gid][i] = max(x0[gid][i], -100.0);
x0[gid][i] = min(x0[gid][i], 100.0);
}
}
__kernel void eval(
__global const vec *x0,
__global float *result){
int gid = get_global_id(0);
result[gid] = eval_eq(x0[gid]);
}
__kernel void init_pgrad(
__global const vec *x0,
__global vec *pgrad){
int gid = get_global_id(0);
vec grad;
eval_deq(grad, x0[gid]);
for (int i=0; i<NVAR;++i){
pgrad[gid][i] = grad[i];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment