# coding: utf-8 # # PyOpenCL Parallel Patterns: Scan/Prefix Sum # # ## Setup Code # In[1]: import pyopencl as cl import pyopencl.array import pyopencl.clrandom import numpy as np import numpy.linalg as la # In[2]: ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) # In[3]: n = 10**7 x = cl.clrandom.rand(queue, n, np.float64) # ## Setting up the kernel: Compute the prefix sum of squares # # Want to compute the prefix sum of the squares of all entries in `x`. # # First, using `numpy`, as `result1`: # In[4]: result1 = np.cumsum(x.get()**2) # Then, using PyOpenCL: # In[5]: from pyopencl.scan import GenericScanKernel # Syntax: # # GSK(context, dtype, arguments, input_expr, scan_expr using `a` and `b`, neutral, output_statement with `item`) # In[7]: sknl = GenericScanKernel(ctx, np.float64, arguments="double *y, double *x", input_expr="x[i]*x[i]", scan_expr="a+b", neutral="0", output_statement="y[i] = item") # In[8]: result2 = cl.array.empty_like(x) sknl(result2, x) # ## Testing the outcome # In[9]: print(la.norm(result2.get() - result1)) # More features: # # * Segmented Scan # * Output stencils # * Works on structured types