
# coding: utf-8

# # PyOpenCL: An exercise

# In[1]:

import pyopencl as cl
import numpy as np
import numpy.linalg as la
import pyopencl.array
import pyopencl.clrandom


# Change the code below to:
#     
# * Compute $c_i = a_ib_i$
# * Use work groups of $16\times 16$ items
# * Benchmark $1\times 1$ workgroups against $16\times 16$ workgroups
# 
#   * Use `time()` from the `time` module. (i.e. `import time`)
#   * Use `queue.finish()`.

# In[2]:

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)


# In[3]:

a = np.random.rand(1024, 1024).astype(np.float32)


# In[4]:

prg = cl.Program(ctx, """
    __kernel void twice(__global float *a)
    {
      int gid0 = get_global_id(0);
      int gid1 = get_global_id(1);
      int i = gid1 * 1024 + gid0;
      a[i] = 2*a[i];
    }
    """).build()
twice = prg.twice


# In[5]:

a_dev = cl.array.to_device(queue, a)
twice(queue, a_dev.shape, None, a_dev.data)


# In[6]:

print(la.norm(a_dev.get() - 2*a), la.norm(a))


# In[ ]:



