Source code for pyfr.backends.openmp.generator

from math import prod

from pyfr.backends.base.generator import BaseKernelGenerator


[docs] class OpenMPKernelGenerator(BaseKernelGenerator):
[docs] def render(self): kargdefn, kargassn = self._render_args('args') if self.ndim == 1: core = f''' for (int _xi = 0; _xi < BLK_SZ; _xi += SOA_SZ) {{ #pragma omp simd for (int _xj = 0; _xj < SOA_SZ; _xj++) {{ {self.body} }} }}''' clean = f''' int _xi = 0; #pragma omp simd for (int _xj = 0; _xj < _nx % BLK_SZ; _xj++) {{ {self.body} }}''' else: core = f''' for (ixdtype_t _y = 0; _y < _ny; _y++) {{ for (int _xi = 0; _xi < BLK_SZ; _xi += SOA_SZ) {{ #pragma omp simd for (int _xj = 0; _xj < SOA_SZ; _xj++) {{ {self.body} }} }} }}''' clean = f''' for (ixdtype_t _y = 0, _xi = 0; _y < _ny; _y++) {{ #pragma omp simd for (int _xj = 0; _xj < _nx % BLK_SZ; _xj++) {{ {self.body} }} }}''' return f''' struct {self.name}_kargs {{ {kargdefn}; }}; void {self.name}(ixdtype_t _ib, const struct {self.name}_kargs *args, int _disp_mask) {{ {kargassn}; #define X_IDX (_xi + _xj) #define X_IDX_AOSOA(v, nv)\ ((_xi/SOA_SZ*(nv) + (v))*SOA_SZ + _xj) #define BCAST_BLK(r, c, ld) ((c) % (ld) + ((c) / (ld))*(ld)*r) if (_nx - _ib*BLK_SZ >= BLK_SZ) {{ {core} }} else {{ {clean} }} #undef X_IDX #undef X_IDX_AOSOA #undef BCAST_BLK }}'''
[docs] def ldim_size(self, name, factor=1): return f'{factor}*BLK_SZ' if factor > 1 else 'BLK_SZ'
[docs] def needs_ldim(self, arg): return False
[docs] def _displace_arg(self, arg): if arg.isview: return None elif self.ndim == 1: # Vector if arg.ncdim == 0 or arg.ismpi: return '_ib*BLK_SZ' # 2D broadcast vector elif arg.isbroadcast: return None # Stacked vector: else: return f'_ib*BLK_SZ*{prod(arg.cdims)}' else: # 2D broadcast vector or row broadcast matrix if arg.isbroadcast or arg.isbroadcastr: return None # Column broadcast matrix elif arg.isbroadcastc: return f'_ib*BLK_SZ*{prod(arg.cdims)}' # Matrix else: return f'_ib*BLK_SZ*{prod(arg.cdims)}*_ny'
[docs] def _render_args(self, argn): # We first need the argument list; starting with the dimensions kargs = [('ixdtype_t', d, None, None) for d in self._dims] # Now add any scalar arguments kargs.extend((sa.dtype, sa.name, None, None) for sa in self.scalargs) # Finally, add the vector arguments for va in self.vectargs: da = self._displace_arg(va) mi = len(kargs) if da else None if va.intent == 'in': kargs.append((f'const {va.dtype}*', f'{va.name}_v', da, mi)) else: kargs.append((f'{va.dtype}*', f'{va.name}_v', da, mi)) # Views if va.isview: kargs.append(('const ixdtype_t*', f'{va.name}_vix', '_ib*BLK_SZ', None)) if va.ncdim == 2: kargs.append(('const ixdtype_t*', f'{va.name}_vrstri', '_ib*BLK_SZ', None)) # Argument definitions and assignments kargdefn, kargassn = [], [] for dtype, name, disp, midx in kargs: assn = f'{dtype} {name} = {argn}->{name}' # Handle displacement and potential masking thereof if disp and midx is not None: assn += f' + ((_disp_mask & {1 << midx}) ? 0 : {disp})' elif disp: assn += f' + {disp}' kargdefn.append(f'{dtype} {name}') kargassn.append(assn) return ';\n'.join(kargdefn), ';\n'.join(kargassn)