Loopy: Reductions

Setup code

In [11]:
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
import loopy as lp
In [12]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
In [13]:
n = 1024
a = cl.clrandom.rand(queue, (n, n), dtype=np.float32)
x = cl.clrandom.rand(queue, (n,), dtype=np.float32)

Capturing matrix-vector multiplication

In [14]:
knl = lp.make_kernel(
    "{[i,k]: 0<=i,k<n}",
    "b[i] = sum(k, a[i, k]*x[k])"
    )
In [15]:
knl = lp.set_options(knl, write_cl=True)
evt, _ = knl(queue, a=a, x=x)
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))

__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float const *restrict a, __global float *restrict b, int const n, __global float const *restrict x)
{
  float acc_k;

  for (int i = 0; i <= -1 + n; ++i)
  {
    acc_k = 0.0f;
    for (int k = 0; k <= -1 + n; ++k)
      acc_k = acc_k + a[n * i + k] * x[k];
    b[i] = acc_k;
  }
}
In [ ]:
 
In [ ]: