{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PyOpenCL Parallel Patterns: Reduction" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup Code" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pyopencl as cl\n", "import pyopencl.array\n", "import pyopencl.clrandom\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ctx = cl.create_some_context()\n", "queue = cl.CommandQueue(ctx)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "n = 10**7\n", "x = cl.clrandom.rand(queue, n, np.float64)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setting up the kernel: Computing a sum of squares\n", "\n", "Want to compute the sum of the squares of all entries in `x`.\n", "\n", "First, using `numpy`, as `result1` (watch out: `.get()`)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "result1 = np.sum(x.get()**2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then, using PyOpenCL:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pyopencl.reduction import ReductionKernel" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Syntax:\n", "\n", "ReductionKernel(context, dtype, netural, reduce_expr, map_expr, arguments)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rknl = ReductionKernel(ctx, np.float64,\n", " neutral=\"0\",\n", " reduce_expr=\"a+b\", map_expr=\"x[i]*x[i]\",\n", " arguments=\"double *x\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Testing the result" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "result2 = rknl(x)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "pyopencl.array.Array" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(result2)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "()" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result2.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now check the result:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "9.31322574615e-10\n" ] } ], "source": [ "print(result2.get()-result1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Change this to find maximum.\n", "* Works on structured types, too.\n", "* What if you wanted to find maximum *and* location?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1+" } }, "nbformat": 4, "nbformat_minor": 0 }