In this exercise, modify the Monte-Carlo example to demonstrate the orthonormality of the two Hermite polynomials
with respect to the weight $e^{-\frac{x^2}2}$, i.e. show (numerically, using a Monte Carlo method) that
$$ \int_{-\infty}^\infty 1 \cdot (x^2-1) \cdot e^{-\frac{x^2}2}dx = 0 $$and that
$$ \int_{-\infty}^\infty (x^2-1)^2 \cdot e^{-\frac{x^2}2}dx = 2\sqrt{2\pi}. $$Realize that $$ \int_{-\infty}^\infty \dots \cdot \frac{e^{-\frac{x^2}2}}{\sqrt{2\pi}}dx $$ can be evaluated by Monte-Carlo summation of $\dots$ where the $x$ are normally distributed.
Use the Box-Muller transform to obtain normally-distributed random numbers from the uniformly distributed ones returned by PyOpenCL's random number generator.
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
generator_preamble = """
#include <pyopencl-random123/philox.cl>
typedef union {
uint4 v;
philox4x32_ctr_t c;
} philox4x32_ctr_vec_union;
uint4 philox4x32_bump(uint4 ctr)
{
if (++ctr.x == 0)
if (++ctr.y == 0)
++ctr.z;
return ctr;
}
uint4 philox4x32_gen(
uint4 ctr,
uint2 key,
uint4 *new_ctr)
{
philox4x32_ctr_vec_union result;
result.c = philox4x32(
*(philox4x32_ctr_t *) &ctr,
*(philox4x32_key_t *) &key);
*new_ctr = philox4x32_bump(ctr);
return result.v;
}
float4 philox4x32_f32(
uint4 ctr,
uint2 key,
uint4 *new_ctr)
{
*new_ctr = ctr;
return
convert_float4(philox4x32_gen(*new_ctr, key, new_ctr))
* 2.3283064365386963e-10f;
}
"""
from mako.template import Template
mc_preamble_src = Template("""
#include <pyopencl-complex.h>
float compute_sample(int i, unsigned int k1)
{
uint4 ctr = { 0, 1, 2, 3 };
uint2 key2 = { i, k1 };
float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));
float r0 = sqrt(-2*log(rng_res.s0));
float v0 = r0*cos((float) (2*M_PI) * rng_res.s1);
float v1 = r0*sin((float) (2*M_PI) * rng_res.s1);
float r2 = sqrt(-2*log(rng_res.s2));
float v2 = r2*cos((float) (2*M_PI) * rng_res.s3);
float v3 = r2*sin((float) (2*M_PI) * rng_res.s3);
float result = 0;
%for x in ["v0", "v1", "v2", "v3"]:
{
float x = ${x};
float H2 = x*x - 1;
result += H2;
}
%endfor
return result;
}
""", strict_undefined=True).render()
from pyopencl.reduction import ReductionKernel
rknl = ReductionKernel(ctx, np.float32,
neutral="0",
reduce_expr="a+b", map_expr="compute_sample(i, k1)",
arguments="unsigned int k1", preamble=generator_preamble+mc_preamble_src)
n = 10000000
nsamples = 4*n
result = rknl(15, range=slice(n), queue=queue).get() / nsamples
print(result)