Source code for pyfr.backends.cuda.generator

from math import prod

from pyfr.backends.base.generator import BaseGPUKernelGenerator


[docs] class CUDAKernelGenerator(BaseGPUKernelGenerator): _lid = ('threadIdx.x', 'threadIdx.y') _gid = 'ixdtype_t(blockIdx.x)*blockDim.x + threadIdx.x' _shared_prfx = '__shared__' _shared_sync = '__syncthreads()'
[docs] def _render_spec(self): res = '__restrict__' # We first need the argument list; starting with the dimensions kargs = [f'ixdtype_t {d}' for d in self._dims] # Now add any scalar arguments kargs.extend(f'{sa.dtype} {sa.name}' for sa in self.scalargs) # Finally, add the vector arguments for va in self.vectargs: if va.intent == 'in': kargs.append(f'const {va.dtype}* {res} {va.name}_v') else: kargs.append(f'{va.dtype}* {res} {va.name}_v') # Views if va.isview: kargs.append(f'const ixdtype_t* {res} {va.name}_vix') if va.ncdim == 2: kargs.append(f'const ixdtype_t* {res} {va.name}_vrstri') # Arrays elif self.needs_ldim(va): kargs.append(f'ixdtype_t ld{va.name}') # Determine the launch bounds for the kernel nthrds = prod(self.block1d if self.ndim == 1 else self.block2d) kattrs = f'__global__ __launch_bounds__({nthrds})' return '{0} void {1}({2})'.format(kattrs, self.name, ', '.join(kargs))