Source code for pyfr.backends.opencl.base

# -*- coding: utf-8 -*-

import numpy as np

from pyfr.backends.base import BaseBackend
from pyfr.mpiutil import get_local_rank


[docs]class OpenCLBackend(BaseBackend): name = 'opencl' blocks = False def __init__(self, cfg): super().__init__(cfg) from pyfr.backends.opencl.driver import OpenCL # Load and wrap OpenCL self.cl = OpenCL() # Get the platform/device info from the config file platid = cfg.get('backend-opencl', 'platform-id', '0').lower() devid = cfg.get('backend-opencl', 'device-id', 'local-rank').lower() devtype = cfg.get('backend-opencl', 'device-type', 'all').upper() # Handle the local-rank case if devid == 'local-rank': devid = str(get_local_rank()) # Determine the OpenCL platform to use for i, platform in enumerate(self.cl.get_platforms()): if platid == str(i) or platid == platform.name.lower(): break else: raise ValueError('No suitable OpenCL platform found') # Determine the OpenCL device to use for i, device in enumerate(platform.get_devices(devtype)): if (devid == str(i) or devid == device.name.lower() or devid == str(device.uuid or '')): break else: raise ValueError('No suitable OpenCL device found') # Determine if the device supports double precision arithmetic if self.fpdtype == np.float64 and not device.has_fp64: raise ValueError('Device does not support double precision') # Set the device self.cl.set_device(device) # Compute the alignment requirement for the context self.alignb = device.mem_align # Compute the SoA size self.soasz = 2*self.alignb // np.dtype(self.fpdtype).itemsize self.csubsz = self.soasz from pyfr.backends.opencl import (blasext, clblast, gimmik, packing, provider, types) # Register our data types and meta kernels self.const_matrix_cls = types.OpenCLConstMatrix self.graph_cls = types.OpenCLGraph self.matrix_cls = types.OpenCLMatrix self.matrix_slice_cls = types.OpenCLMatrixSlice self.view_cls = types.OpenCLView self.xchg_matrix_cls = types.OpenCLXchgMatrix self.xchg_view_cls = types.OpenCLXchgView self.ordered_meta_kernel_cls = provider.OpenCLOrderedMetaKernel self.unordered_meta_kernel_cls = provider.OpenCLUnorderedMetaKernel # Instantiate the base kernel providers kprovs = [provider.OpenCLPointwiseKernelProvider, blasext.OpenCLBlasExtKernels, packing.OpenCLPackingKernels, gimmik.OpenCLGiMMiKKernels] self._providers = [k(self) for k in kprovs] # Load CLBlast if available try: self._providers.append(clblast.OpenCLCLBlastKernels(self)) except OSError: pass # Pointwise kernels self.pointwise = self._providers[0] # Queues (in and out of order) self.queue = self.cl.queue(out_of_order=True)
[docs] def run_kernels(self, kernels, wait=False): # Submit the kernels to the command queue for k in kernels: self.queue.barrier() k.run(self.queue) if wait: self.queue.finish() else: self.queue.flush()
[docs] def run_graph(self, graph, wait=False): self.queue.barrier() graph.run(self.queue) if wait: self.queue.finish()
[docs] def _malloc_impl(self, nbytes): # Allocate the device buffer buf = self.cl.mem_alloc(nbytes) # Zero the buffer self.cl.zero(buf, 0, nbytes) return buf