
# coding: utf-8

# # Monte Carlo Method
# 
# As a simple example of a Monte Carlo method, we will approximate the value of $\pi$:

# In[1]:

import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
import loopy as lp


# In[2]:

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)


# ## Sample generation

# In[3]:

knl = lp.make_kernel(
        "{ [i, j]: 0<=i<n and 0<=j < 2}",
        """
        <> key2 = make_uint2(i, k1)
        <> ctr = make_uint4(0, 1, 2, 3)
        <> rng_res, <> dummy = philox4x32_f32(ctr, key2)
        
        samples[i,0] = rng_res.s0 + 1j*rng_res.s1
        samples[i,1] = rng_res.s2 + 1j*rng_res.s3

        accepted[i,j] = real(samples[i,j] * conj(samples[i,j])) < 1
        """)                                                                     

knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
knl = lp.set_options(knl, return_dict=True)


# In[4]:

knl = lp.set_options(knl, write_cl=True)


# In[5]:

evt, result = knl(queue, n=100000, k1=np.int32(99123))

samples = result["samples"].reshape(-1)
accepted = result["accepted"].reshape(-1)


# In[6]:

import matplotlib.pyplot as pt

samples_host = samples.get()
accepted_host = accepted.get() != 0

pt.gca().set_aspect("equal")
pt.plot(
    samples_host[accepted_host].real,
    samples_host[accepted_host].imag, "o")


# Lastly, we compute the ratio of accepted to the total number of samples to get our approximate value of $\pi$:

# In[7]:

4 * cl.array.sum(accepted).get() / len(samples)


# We (roughly) expect convergence as $1/\sqrt N$, so this gives an idea of the relative error to expect:

# In[8]:

1/np.sqrt(len(samples))


# ## Direct Reduction

# In[9]:

knl = lp.make_kernel(
        "{ [l, g, j, isamp]: 0<=l<nl and 0<=g,j<ng and 0<=isamp< 2}",
        """
        <> key2 = make_uint2(l + nl*g, 1234)
        <> ctr = make_uint4(0, 1, 2, 3)
        <> rng_res, <> dummy = philox4x32_f32(ctr, key2)
        
        <> samples[0] = rng_res.s0 + 1j*rng_res.s1     {id=samp1}
           samples[1] = rng_res.s2 + 1j*rng_res.s3     {id=samp2}

        <> accepted[isamp] = real(samples[isamp] * conj(samples[isamp])) < 1 \
                                                        {dep=samp1:samp2} 
        
        <> tmp[g] = sum(l, accepted[0] + accepted[1])           
                                                                                      
        result = sum(j, tmp[j])    
        """)                                                                     

size = 1000000
ng = 50

knl = lp.fix_parameters(knl, ng=ng)                                               

knl = lp.set_options(knl, write_cl=True, highlight_cl=True)                                          

ref_knl = knl                                                                     

knl = lp.split_iname(knl, "l", 128, inner_tag="l.0")
knl = lp.split_reduction_outward(knl, "l_inner")
knl = lp.tag_inames(knl, "g:g.0,j:l.0")

evt, (result,) = knl(queue, nl=size) 

nsamples = size*2*ng
print(4*result.get()/nsamples, nsamples/1e9, "billion samples")


# In[ ]:



