Using C++ Function Templates in PyCUDA
You can use C++ function templates in PyCUDA, but you must allow name mangling to be used for the templates in order to let nvcc compile them.
License of this example: |
Public Domain |
Date: |
July 27, 2010 |
PyCUDA version: |
0.94rc |
1 import pycuda.gpuarray as gpuarray
2 import pycuda.driver as drv
3 import pycuda.autoinit
4 import numpy as np
5
6 from pycuda.compiler import SourceModule
7 func_mod = SourceModule("""
8 template <class T>
9 __device__ T incr(T x) {
10 return (x + 1.0);
11 }
12
13 // Needed to avoid name mangling so that PyCUDA can
14 // find the kernel function:
15 extern "C" {
16 __global__ void func(float *a, int N)
17 {
18 int idx = threadIdx.x;
19 if (idx < N)
20 a[idx] = incr(a[idx]);
21 }
22 }
23 """, no_extern_c=1)
24
25 func = func_mod.get_function('func')
26
27 N = 5
28 x = np.asarray(np.random.rand(N), np.float32)
29 x_orig = x.copy()
30 x_gpu = gpuarray.to_gpu(x)
31
32 func(x_gpu.gpudata, np.uint32(N), block=(N, 1, 1))
33 print 'x: ', x
34 print 'incr(x): ', x_gpu.get()