# coding: utf-8

# # Monte Carlo Method
# 
# As a simple example of a Monte Carlo method, we will approximate the value of $\pi$:

# In[1]:

import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom


# In[2]:

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)


# ### Boilerplate for Random Number Generator

# In[3]:

generator_preamble = """
#include <pyopencl-random123/philox.cl>

typedef union {
    uint4 v;
    philox4x32_ctr_t c;
} philox4x32_ctr_vec_union;


uint4 philox4x32_bump(uint4 ctr)
{
    if (++ctr.x == 0)
        if (++ctr.y == 0)
            ++ctr.z;
    return ctr;
}

uint4 philox4x32_gen(
        uint4 ctr,
        uint2 key,
        uint4 *new_ctr)
{
    philox4x32_ctr_vec_union result;
    result.c = philox4x32(
        *(philox4x32_ctr_t *) &ctr,
        *(philox4x32_key_t *) &key);
    *new_ctr = philox4x32_bump(ctr);
    return result.v;
}

float4 philox4x32_f32(
        uint4 ctr,
        uint2 key,
        uint4 *new_ctr)
{
    *new_ctr = ctr;
    return
        convert_float4(philox4x32_gen(*new_ctr, key, new_ctr))
        * 2.3283064365386963e-10f;
}
"""


# ### Reduction Code

# Complete the sampler code:
# 
# ```
# mc_preamble_src = """
# 
# #include <pyopencl-complex.h>
# 
# float compute_sample(int i, unsigned int k1)
# {
#     uint4 ctr = { 0, 1, 2, 3 };
#     uint2 key2 = { i, k1 };
#     float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));
#     ...
# }
# """
# ```

# In[27]:

mc_preamble_src = """

#include <pyopencl-complex.h>

float compute_sample(int i, unsigned int k1)
{
    uint4 ctr = { 0, 1, 2, 3 };
    uint2 key2 = { i, k1 };
    float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));
    
    cfloat_t samp0 = cfloat_new(rng_res.s0, rng_res.s1);
    cfloat_t samp1 = cfloat_new(rng_res.s2, rng_res.s3);
    
    float result = 0;
    if (cfloat_abs(samp0) <= 1)
        result += 1;
    if (cfloat_abs(samp1) <= 1)
        result += 1;
        
    return result;
}
"""


# In[28]:

from pyopencl.reduction import ReductionKernel


# Syntax:
# 
# `ReductionKernel(context, dtype, netural, reduce_expr, map_expr, arguments)`

# In[29]:

rknl = ReductionKernel(ctx, np.float32,
        neutral="0",
        reduce_expr="a+b", map_expr="compute_sample(i, k1)",
        arguments="unsigned int k1", preamble=generator_preamble+mc_preamble_src)


# In[32]:

n = 100000000

nsamples_accepted = rknl(15, range=slice(n), queue=queue).get()
nsamples = 2*n
approx_pi = 4 * nsamples_accepted/nsamples

print(approx_pi)