# coding: utf-8 # # PyOpenCL Parallel Patterns: Reduction # ## Setup Code # In[1]: import pyopencl as cl import pyopencl.array import pyopencl.clrandom import numpy as np # In[2]: ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) # In[3]: n = 10**7 x = cl.clrandom.rand(queue, n, np.float64) # ## Setting up the kernel: Computing a sum of squares # # Want to compute the sum of the squares of all entries in `x`. # # First, using `numpy`, as `result1` (watch out: `.get()`) # In[4]: result1 = np.sum(x.get()**2) # Then, using PyOpenCL: # In[5]: from pyopencl.reduction import ReductionKernel # Syntax: # # ReductionKernel(context, dtype, netural, reduce_expr, map_expr, arguments) # In[6]: rknl = ReductionKernel(ctx, np.float64, neutral="0", reduce_expr="a+b", map_expr="x[i]*x[i]", arguments="double *x") # ## Testing the result # In[7]: result2 = rknl(x) # In[8]: type(result2) # In[9]: result2.shape # Now check the result: # In[10]: print(result2.get()-result1) # * Change this to find maximum. # * Works on structured types, too. # * What if you wanted to find maximum *and* location? # In[ ]: