einsum
using loopy¶In this problem, we will design a function that carries out an einsum
-type operation using loopy
. It should be usable as shown in the tests towards the end of the worksheet. Also try to perform a simple parallelization so that your code will run on a GPU.
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pyopencl.array
import pyopencl.clmath
import pyopencl.clrandom
import loopy as lp
Some hints:
loopy.Reduction("sum", ("i", "j", "k"), expr)
expresses a sum.{[i,j]: 0<=i<Ni and 0<=j<Nj}
as a string and pass it to loopy.str.join()
: ",".join(names)
and str.format
: "Hi {name}".format(name="Andreas")
def loopy_einsum(queue, spec, *args):
arg_spec, out_spec = spec.split("->")
arg_specs = arg_spec.split(",")
# ...
def loopy_einsum(queue, spec, *args):
arg_spec, out_spec = spec.split("->")
arg_specs = arg_spec.split(",")
out_indices = set(out_spec)
all_indices = set(
idx
for argsp in arg_specs
for idx in argsp) | out_indices
sum_indices = all_indices - out_indices
from pymbolic import var
lhs = var("out")[tuple(var(i) for i in out_spec)]
rhs = 1
for i, argsp in enumerate(arg_specs):
rhs = rhs * var("arg%d" % i)[tuple(var(i) for i in argsp)]
if sum_indices:
rhs = lp.Reduction("sum", tuple(var(idx) for idx in sum_indices), rhs)
constraints = " and ".join(
"0 <= %s < N%s" % (idx, idx)
for idx in all_indices
)
domain = "{[%s]: %s}" % (",".join(all_indices), constraints)
knl = lp.make_kernel(domain, [lp.ExpressionInstruction(lhs, rhs)])
knl = lp.set_options(knl, write_cl=True)
kwargs = {}
for i, arg in enumerate(args):
kwargs["arg%d" % i] = arg
evt, (out,) = knl(queue, **kwargs)
return out
Now, let us test our implementation, using a simple matrix-matrix multiplication:
cl_context = cl.create_some_context()
queue = cl.CommandQueue(cl_context)
a = cl.clrandom.rand(queue, (300, 300), dtype=np.float64)
b = cl.clrandom.rand(queue, (300, 300), dtype=np.float64)
ab = loopy_einsum(queue, "ik,kj->ij", a, b)
diff = a.get().dot(b.get()) - ab.get()
print(la.norm(diff, 2))