Source code for pyfr.backends.opencl.provider

from pyfr.backends.base import (BaseKernelProvider, BaseOrderedMetaKernel,
                                BasePointwiseKernelProvider,
                                BaseUnorderedMetaKernel, Kernel)
from pyfr.backends.opencl.generator import OpenCLKernelGenerator
from pyfr.nputil import npdtype_to_ctypestype
from pyfr.util import memoize


class OpenCLKernel(Kernel):
    def add_to_graph(self, graph, deps):
        pass


class OpenCLOrderedMetaKernel(BaseOrderedMetaKernel):
    def add_to_graph(self, graph, deps):
        pass

    def run(self, queue, wait_for=None, ret_evt=False):
        for k in self.kernels[:-1]:
            wait_for = [k.run(queue, wait_for, True)]

        return self.kernels[-1].run(queue, wait_for, ret_evt)


class OpenCLUnorderedMetaKernel(BaseUnorderedMetaKernel):
    def add_to_graph(self, graph, deps):
        pass

    def run(self, queue, wait_for=None, ret_evt=False):
        if ret_evt:
            kevts = [k.run(queue, wait_for, True) for k in self.kernels]
            return queue.marker(kevts)
        else:
            for k in self.kernels:
                k.run(queue, wait_for, False)


class OpenCLKernelProvider(BaseKernelProvider):
    def _benchmark(self, kfunc, nbench=4, nwarmup=1):
        queue = self.backend.cl.queue(profiling=True)

        for i in range(nbench + nwarmup):
            if i == nwarmup:
                start_evt = end_evt = kfunc(queue)
            elif i == nbench + nwarmup - 1:
                end_evt = kfunc(queue)
            else:
                kfunc(queue)

        queue.finish()

        return (end_evt.end_time - start_evt.start_time) / nbench

    @memoize
    def _build_program(self, src):
        flags = ['-cl-fast-relaxed-math', '-cl-std=CL2.0']

        return self.backend.cl.program(src, flags)

    def _build_kernel(self, name, src, argtypes, argn=[]):
        argtypes = [npdtype_to_ctypestype(arg) for arg in argtypes]

        return self._build_program(src).get_kernel(name, argtypes)


[docs] class OpenCLPointwiseKernelProvider(OpenCLKernelProvider, BasePointwiseKernelProvider): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._ls1d = (64,) self._ls2d = (64, 4) # Pass the local work group sizes to the generator class KernelGenerator(OpenCLKernelGenerator): block1d = self._ls1d block2d = self._ls2d self.kernel_generator_cls = KernelGenerator
[docs] def _instantiate_kernel(self, dims, fun, arglst, argm, argv): rtargs = [] # Determine the work group sizes if len(dims) == 1: ls = self._ls1d gs = (dims[0] - dims[0] % -ls[0],) else: ls = self._ls2d gs = (dims[1] - dims[1] % -ls[0], ls[1]) fun.set_dims(gs, ls) # Process the arguments for i, k in enumerate(arglst): if isinstance(k, str): rtargs.append((i, k)) else: fun.set_arg(i, k) class PointwiseKernel(OpenCLKernel): if rtargs: def bind(self, **kwargs): for i, k in rtargs: fun.set_arg(i, kwargs[k]) def run(self, queue, wait_for=None, ret_evt=False): return fun.exec_async(queue, wait_for, ret_evt) return PointwiseKernel(argm, argv)