from math import prod
from pyfr.backends.base.generator import BaseKernelGenerator
[docs]
class OpenMPKernelGenerator(BaseKernelGenerator):
[docs]
def render(self):
kargdefn, kargassn = self._render_args('args')
if self.ndim == 1:
core = f'''
for (int _xi = 0; _xi < BLK_SZ; _xi += SOA_SZ)
{{
#pragma omp simd
for (int _xj = 0; _xj < SOA_SZ; _xj++)
{{
{self.body}
}}
}}'''
clean = f'''
int _xi = 0;
#pragma omp simd
for (int _xj = 0; _xj < _nx % BLK_SZ; _xj++)
{{
{self.body}
}}'''
else:
core = f'''
for (ixdtype_t _y = 0; _y < _ny; _y++)
{{
for (int _xi = 0; _xi < BLK_SZ; _xi += SOA_SZ)
{{
#pragma omp simd
for (int _xj = 0; _xj < SOA_SZ; _xj++)
{{
{self.body}
}}
}}
}}'''
clean = f'''
for (ixdtype_t _y = 0, _xi = 0; _y < _ny; _y++)
{{
#pragma omp simd
for (int _xj = 0; _xj < _nx % BLK_SZ; _xj++)
{{
{self.body}
}}
}}'''
return f'''
struct {self.name}_kargs {{ {kargdefn}; }};
void {self.name}(ixdtype_t _ib,
const struct {self.name}_kargs *args,
int _disp_mask)
{{
{kargassn};
#define X_IDX (_xi + _xj)
#define X_IDX_AOSOA(v, nv)\
((_xi/SOA_SZ*(nv) + (v))*SOA_SZ + _xj)
#define BCAST_BLK(r, c, ld) ((c) % (ld) + ((c) / (ld))*(ld)*r)
if (_nx - _ib*BLK_SZ >= BLK_SZ)
{{
{core}
}}
else
{{
{clean}
}}
#undef X_IDX
#undef X_IDX_AOSOA
#undef BCAST_BLK
}}'''
[docs]
def ldim_size(self, name, factor=1):
return f'{factor}*BLK_SZ' if factor > 1 else 'BLK_SZ'
[docs]
def needs_ldim(self, arg):
return False
[docs]
def _displace_arg(self, arg):
if arg.isview:
return None
elif self.ndim == 1:
# Vector
if arg.ncdim == 0 or arg.ismpi:
return '_ib*BLK_SZ'
# 2D broadcast vector
elif arg.isbroadcast:
return None
# Stacked vector:
else:
return f'_ib*BLK_SZ*{prod(arg.cdims)}'
else:
# 2D broadcast vector or row broadcast matrix
if arg.isbroadcast or arg.isbroadcastr:
return None
# Column broadcast matrix
elif arg.isbroadcastc:
return f'_ib*BLK_SZ*{prod(arg.cdims)}'
# Matrix
else:
return f'_ib*BLK_SZ*{prod(arg.cdims)}*_ny'
[docs]
def _render_args(self, argn):
# We first need the argument list; starting with the dimensions
kargs = [('ixdtype_t', d, None, None) for d in self._dims]
# Now add any scalar arguments
kargs.extend((sa.dtype, sa.name, None, None) for sa in self.scalargs)
# Finally, add the vector arguments
for va in self.vectargs:
da = self._displace_arg(va)
mi = len(kargs) if da else None
if va.intent == 'in':
kargs.append((f'const {va.dtype}*', f'{va.name}_v', da, mi))
else:
kargs.append((f'{va.dtype}*', f'{va.name}_v', da, mi))
# Views
if va.isview:
kargs.append(('const ixdtype_t*', f'{va.name}_vix',
'_ib*BLK_SZ', None))
if va.ncdim == 2:
kargs.append(('const ixdtype_t*', f'{va.name}_vrstri',
'_ib*BLK_SZ', None))
# Argument definitions and assignments
kargdefn, kargassn = [], []
for dtype, name, disp, midx in kargs:
assn = f'{dtype} {name} = {argn}->{name}'
# Handle displacement and potential masking thereof
if disp and midx is not None:
assn += f' + ((_disp_mask & {1 << midx}) ? 0 : {disp})'
elif disp:
assn += f' + {disp}'
kargdefn.append(f'{dtype} {name}')
kargassn.append(assn)
return ';\n'.join(kargdefn), ';\n'.join(kargassn)