# coding: utf-8 # # Practice: Orthogonality of Hermite Polynomials # # In this exercise, modify the Monte-Carlo example to demonstrate the orthonormality of the two [Hermite polynomials](https://en.wikipedia.org/wiki/Hermite_polynomials) # # * $1$ and # * $x^2-1$ # # with respect to the weight $e^{-\frac{x^2}2}$, i.e. show (numerically, using a Monte Carlo method) that # # $$ # \int_{-\infty}^\infty 1 \cdot (x^2-1) \cdot e^{-\frac{x^2}2}dx = 0 # $$ # # and that # # $$ # \int_{-\infty}^\infty (x^2-1)^2 \cdot e^{-\frac{x^2}2}dx = 2\sqrt{2\pi}. # $$ # # Realize that # $$ # \int_{-\infty}^\infty \dots \cdot \frac{e^{-\frac{x^2}2}}{\sqrt{2\pi}}dx # $$ # can be evaluated by Monte-Carlo summation of $\dots$ where the $x$ are normally distributed. # # Use the [Box-Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) to obtain normally-distributed random numbers from the uniformly distributed ones returned by PyOpenCL's random number generator. # # # ### Initialization # In[3]: import numpy as np import pyopencl as cl import pyopencl.array import pyopencl.clrandom ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) # ### Boilerplate for Random Number Generator # In[4]: generator_preamble = """ #include typedef union { uint4 v; philox4x32_ctr_t c; } philox4x32_ctr_vec_union; uint4 philox4x32_bump(uint4 ctr) { if (++ctr.x == 0) if (++ctr.y == 0) ++ctr.z; return ctr; } uint4 philox4x32_gen( uint4 ctr, uint2 key, uint4 *new_ctr) { philox4x32_ctr_vec_union result; result.c = philox4x32( *(philox4x32_ctr_t *) &ctr, *(philox4x32_key_t *) &key); *new_ctr = philox4x32_bump(ctr); return result.v; } float4 philox4x32_f32( uint4 ctr, uint2 key, uint4 *new_ctr) { *new_ctr = ctr; return convert_float4(philox4x32_gen(*new_ctr, key, new_ctr)) * 2.3283064365386963e-10f; } """ # ### Monte-Carlo code # In[36]: from mako.template import Template mc_preamble_src = Template(""" #include float compute_sample(int i, unsigned int k1) { uint4 ctr = { 0, 1, 2, 3 }; uint2 key2 = { i, k1 }; float4 rng_res = philox4x32_f32(ctr, key2, &(ctr)); float r0 = sqrt(-2*log(rng_res.s0)); float v0 = r0*cos((float) (2*M_PI) * rng_res.s1); float v1 = r0*sin((float) (2*M_PI) * rng_res.s1); float r2 = sqrt(-2*log(rng_res.s2)); float v2 = r2*cos((float) (2*M_PI) * rng_res.s3); float v3 = r2*sin((float) (2*M_PI) * rng_res.s3); float result = 0; %for x in ["v0", "v1", "v2", "v3"]: { float x = ${x}; float H2 = x*x - 1; result += H2; } %endfor return result; } """, strict_undefined=True).render() # In[37]: from pyopencl.reduction import ReductionKernel rknl = ReductionKernel(ctx, np.float32, neutral="0", reduce_expr="a+b", map_expr="compute_sample(i, k1)", arguments="unsigned int k1", preamble=generator_preamble+mc_preamble_src) # In[38]: n = 10000000 nsamples = 4*n result = rknl(15, range=slice(n), queue=queue).get() / nsamples print(result) # In[ ]: