diff --git a/conftest.py b/conftest.py index 666db66eae..16ae762b4e 100644 --- a/conftest.py +++ b/conftest.py @@ -35,7 +35,7 @@ def skipif(items, whole_module=False): accepted.update({'device', 'device-C', 'device-openmp', 'device-openacc', 'device-aomp', 'cpu64-icc', 'cpu64-icx', 'cpu64-nvc', 'noadvisor', 'cpu64-arm', 'cpu64-icpx', 'chkpnt'}) - accepted.update({'nodevice'}) + accepted.update({'nodevice', 'noomp'}) unknown = sorted(set(items) - accepted) if unknown: raise ValueError("Illegal skipif argument(s) `%s`" % unknown) @@ -86,6 +86,10 @@ def skipif(items, whole_module=False): not get_advisor_path()): skipit = "Only `icx+advisor` should be tested here" break + # Slip if not using openmp + if i == 'noomp' and 'openmp' not in configuration['language']: + skipit = "Must use openmp" + break # Skip if it won't run on Arm if i == 'cpu64-arm' and isinstance(configuration['platform'], Arm): skipit = "Arm doesn't support x86-specific instructions" diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index 0272304ee3..4b8ec7acd5 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -555,7 +555,7 @@ def get_advisor_path(): @memoized_func def get_hip_path(): # *** First try: via commonly used environment variables - for i in ['HIP_HOME']: + for i in ['HIP_HOME', 'ROCM_HOME']: hip_home = os.environ.get(i) if hip_home: return hip_home diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py index 3dcabc5602..94cf8682de 100644 --- a/devito/ir/iet/visitors.py +++ b/devito/ir/iet/visitors.py @@ -353,7 +353,10 @@ def _gen_rettype(self, obj): elif isinstance(obj, (FieldFromComposite, FieldFromPointer)): return self._gen_value(obj.function.base, 0).typename else: - return None + try: + return obj._type_.__name__ + except AttributeError: + return None def _args_decl(self, args): """Generate cgen declarations from an iterable of symbols and expressions.""" diff --git a/devito/mpi/distributed.py b/devito/mpi/distributed.py index 63e3643fe0..9083d17059 100644 --- a/devito/mpi/distributed.py +++ b/devito/mpi/distributed.py @@ -40,7 +40,7 @@ def cleanup(): devito_mpi_finalize() atexit.register(cleanup) -except ImportError as e: +except (RuntimeError, ImportError) as e: # Dummy fallback in case mpi4py/MPI aren't available class NoneMetaclass(type): def __getattr__(self, name): diff --git a/devito/mpi/routines.py b/devito/mpi/routines.py index 59377ef59a..dcf04e8f44 100644 --- a/devito/mpi/routines.py +++ b/devito/mpi/routines.py @@ -1395,7 +1395,7 @@ def _arg_values(self, args=None, **kwargs): class AllreduceCall(Call): def __init__(self, arguments, **kwargs): - super().__init__('MPI_Allreduce', arguments) + super().__init__('MPI_Allreduce', arguments, **kwargs) class ReductionBuilder(object): @@ -1422,6 +1422,6 @@ def make(self, dr): op = self.mapper[dr.op] arguments = [inplace, Byref(f), Integer(1), mpitype, op, comm] - allreduce = AllreduceCall(arguments) + allreduce = AllreduceCall(arguments, writes=f) return allreduce diff --git a/devito/passes/iet/languages/C.py b/devito/passes/iet/languages/C.py index 7f1c8d8052..6b4285b4ed 100644 --- a/devito/passes/iet/languages/C.py +++ b/devito/passes/iet/languages/C.py @@ -1,16 +1,67 @@ import numpy as np from sympy.printing.c import C99CodePrinter -from devito.ir import Call, BasePrinter +from devito.exceptions import InvalidOperator +from devito.ir import Call, BasePrinter, List from devito.passes.iet.definitions import DataManager from devito.passes.iet.orchestration import Orchestrator from devito.passes.iet.langbase import LangBB from devito.symbolics import c_complex, c_double_complex +from devito.symbolics.extended_sympy import UnaryOp from devito.tools import dtype_to_cstr __all__ = ['CBB', 'CDataManager', 'COrchestrator'] +class RealExt(UnaryOp): + + _op = '__real__ ' + + +class ImagExt(UnaryOp): + + _op = '__imag__ ' + + +def atomic_add(i, pragmas, split=False): + # Base case, real reduction + if not split: + return i._rebuild(pragmas=pragmas) + # Complex reduction, split using a temp pointer + # Transforns lhs += rhs into + # { + # pragmas + # __real__ lhs += __real__ rhs; + # pragmas + # __imag__ lhs += __imag__ rhs; + # } + lhs, rhs = i.expr.lhs, i.expr.rhs + if (np.issubdtype(lhs.dtype, np.complexfloating) + and np.issubdtype(rhs.dtype, np.complexfloating)): + # Complex i, complex j + # Atomic add real and imaginary parts separately + lhsr, rhsr = RealExt(lhs), RealExt(rhs) + lhsi, rhsi = ImagExt(lhs), ImagExt(rhs) + real = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr), + pragmas=pragmas) + imag = i._rebuild(expr=i.expr._rebuild(lhs=lhsi, rhs=rhsi), + pragmas=pragmas) + return List(body=[real, imag]) + + elif (np.issubdtype(lhs.dtype, np.complexfloating) + and not np.issubdtype(rhs.dtype, np.complexfloating)): + # Complex i, real j + # Atomic add j to real part of i + lhsr, rhsr = RealExt(lhs), rhs + real = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr), + pragmas=pragmas) + return real + else: + # Real i, complex j + raise InvalidOperator("Atomic add not implemented for real " + "Functions with complex increments") + + class CBB(LangBB): mapper = { @@ -29,7 +80,7 @@ class CBB(LangBB): 'host-free-pin': lambda i: Call('free', (i,)), 'alloc-global-symbol': lambda i, j, k: - Call('memcpy', (i, j, k)), + Call('memcpy', (i, j, k)) } diff --git a/devito/passes/iet/languages/CXX.py b/devito/passes/iet/languages/CXX.py index dbc66807e6..bc2c7691d4 100644 --- a/devito/passes/iet/languages/CXX.py +++ b/devito/passes/iet/languages/CXX.py @@ -1,12 +1,17 @@ +from ctypes import POINTER + import numpy as np from sympy.printing.cxx import CXX11CodePrinter -from devito.ir import Call, UsingNamespace, BasePrinter +from devito import Real, Imag +from devito.exceptions import InvalidOperator +from devito.ir import Call, UsingNamespace, BasePrinter, DummyExpr, List from devito.passes.iet.definitions import DataManager from devito.passes.iet.orchestration import Orchestrator from devito.passes.iet.langbase import LangBB -from devito.symbolics import c_complex, c_double_complex -from devito.tools import dtype_to_cstr +from devito.symbolics import c_complex, c_double_complex, IndexedPointer, cast, Byref +from devito.tools import dtype_to_cstr, dtype_to_ctype +from devito.types import Pointer __all__ = ['CXXBB', 'CXXDataManager', 'CXXOrchestrator'] @@ -65,6 +70,51 @@ def std_arith(prefix=None): """ +def atomic_add(i, pragmas, split=False): + # Base case, real reduction + if not split: + return i._rebuild(pragmas=pragmas) + # Complex reduction, split using a temp pointer + # Transforns lhs += rhs into + # { + # float * lhs = reinterpret_cast(&lhs); + # pragmas + # lhs[0] += std::real(rhs); + # pragmas + # lhs[1] += std::imag(rhs); + # } + # Make a temp pointer + lhs, rhs = i.expr.lhs, i.expr.rhs + rdtype = lhs.dtype(0).real.__class__ + plhs = Pointer(name=f'p{lhs.name}', dtype=POINTER(dtype_to_ctype(rdtype))) + peq = DummyExpr(plhs, cast(rdtype, stars='*')(Byref(lhs), reinterpret=True)) + + if (np.issubdtype(lhs.dtype, np.complexfloating) + and np.issubdtype(rhs.dtype, np.complexfloating)): + # Complex i, complex j + # Atomic add real and imaginary parts separately + lhsr, rhsr = IndexedPointer(plhs, 0), Real(rhs) + lhsi, rhsi = IndexedPointer(plhs, 1), Imag(rhs) + real = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr), + pragmas=pragmas) + imag = i._rebuild(expr=i.expr._rebuild(lhs=lhsi, rhs=rhsi), + pragmas=pragmas) + return List(body=[peq, real, imag]) + + elif (np.issubdtype(lhs.dtype, np.complexfloating) + and not np.issubdtype(rhs.dtype, np.complexfloating)): + # Complex i, real j + # Atomic add j to real part of i + lhsr, rhsr = IndexedPointer(plhs, 0), rhs + real = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr), + pragmas=pragmas) + return List(body=[peq, real]) + else: + # Real i, complex j + raise InvalidOperator("Atomic add not implemented for real " + "Functions with complex increments") + + class CXXBB(LangBB): mapper = { @@ -86,7 +136,7 @@ class CXXBB(LangBB): 'host-free-pin': lambda i: Call('free', (i,)), 'alloc-global-symbol': lambda i, j, k: - Call('memcpy', (i, j, k)), + Call('memcpy', (i, j, k)) } diff --git a/devito/passes/iet/languages/openacc.py b/devito/passes/iet/languages/openacc.py index bacd2a5f66..a6c33992ad 100644 --- a/devito/passes/iet/languages/openacc.py +++ b/devito/passes/iet/languages/openacc.py @@ -74,7 +74,7 @@ class AccBB(PragmaLangBB): Call('acc_set_device_num', args), # Pragmas 'atomic': - Pragma('acc atomic update'), + lambda i, s: i._rebuild(pragmas=Pragma('acc atomic update')), 'map-enter-to': lambda f, imask: PragmaTransfer('acc enter data copyin(%s%s)', f, imask=imask), 'map-enter-to-async': lambda f, imask, a: diff --git a/devito/passes/iet/languages/openmp.py b/devito/passes/iet/languages/openmp.py index a3dec34f9e..bdbee7fe3e 100644 --- a/devito/passes/iet/languages/openmp.py +++ b/devito/passes/iet/languages/openmp.py @@ -5,7 +5,7 @@ from sympy import And, Ne, Not from devito.arch import AMDGPUX, NVIDIAX, INTELGPUX, PVC -from devito.arch.compiler import GNUCompiler, NvidiaCompiler +from devito.arch.compiler import GNUCompiler, NvidiaCompiler, CustomCompiler from devito.ir import (Call, Conditional, DeviceCall, List, Pragma, Prodder, ParallelBlock, PointerCast, While, FindSymbols) from devito.passes.iet.definitions import DataManager, DeviceAwareDataManager @@ -15,8 +15,8 @@ PragmaDeviceAwareTransformer, PragmaLangBB, PragmaIteration, PragmaTransfer) from devito.passes.iet.languages.utils import joins -from devito.passes.iet.languages.C import CBB -from devito.passes.iet.languages.CXX import CXXBB +from devito.passes.iet.languages.C import CBB, atomic_add as c_atomic_add +from devito.passes.iet.languages.CXX import CXXBB, atomic_add as cxx_atomic_add from devito.symbolics import CondEq, DefFunction from devito.tools import filter_ordered @@ -133,8 +133,7 @@ class AbstractOmpBB(LangBB): Pragma('omp simd'), 'simd-for-aligned': lambda n, *a: SimdForAligned('omp simd aligned(%s:%d)', arguments=(n, *a)), - 'atomic': - Pragma('omp atomic update') + 'atomic': lambda i, s: i._rebuild(pragmas=Pragma('omp atomic update')) } Region = OmpRegion @@ -144,11 +143,20 @@ class AbstractOmpBB(LangBB): class OmpBB(AbstractOmpBB): - mapper = {**AbstractOmpBB.mapper, **CBB.mapper} + + mapper = { + **AbstractOmpBB.mapper, + **CBB.mapper, + 'atomic': lambda i, s: c_atomic_add(i, Pragma('omp atomic update'), split=s) + } class CXXOmpBB(AbstractOmpBB): - mapper = {**AbstractOmpBB.mapper, **CXXBB.mapper} + mapper = { + **AbstractOmpBB.mapper, + **CXXBB.mapper, + 'atomic': lambda i, s: cxx_atomic_add(i, Pragma('omp atomic update'), split=s) + } class DeviceOmpBB(OmpBB, PragmaLangBB): @@ -230,6 +238,9 @@ class AbstractOmpizer(PragmaShmTransformer): @classmethod def _support_array_reduction(cls, compiler): + # In case we have a CustomCompiler + if isinstance(compiler, CustomCompiler): + compiler = compiler._base() # Not all backend compilers support array reduction! # Here are the known unsupported ones: if isinstance(compiler, GNUCompiler) and \ @@ -241,6 +252,16 @@ def _support_array_reduction(cls, compiler): else: return True + @classmethod + def _support_complex_reduction(cls, compiler): + # In case we have a CustomCompiler + if isinstance(compiler, CustomCompiler): + compiler = compiler._base() + if isinstance(compiler, GNUCompiler): + # Gcc doesn't supports complex reduction + return False + return True + class Ompizer(AbstractOmpizer): langbb = OmpBB diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index 9cefc4786b..3cb1c6da95 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -42,6 +42,10 @@ class PragmaSimdTransformer(PragmaTransformer): def _support_array_reduction(cls, compiler): return True + @classmethod + def _support_complex_reduction(cls, compiler): + return False + @property def simd_reg_nbytes(self): return self.platform.simd_reg_nbytes @@ -238,8 +242,9 @@ def _make_reductions(self, partree): # Implement reduction mapper = {partree.root: partree.root._rebuild(reduction=reductions)} elif all(i is OpInc for _, _, i in reductions): - # Use atomic increments - mapper = {i: i._rebuild(pragmas=self.langbb['atomic']) for i in exprs} + test2 = not self._support_complex_reduction(self.compiler) and \ + any(np.iscomplexobj(i.dtype(0)) for i, _, _ in reductions) + mapper = {i: self.langbb['atomic'](i, test2) for i in exprs} else: raise NotImplementedError diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py index 4a8d2df206..938bb391be 100644 --- a/devito/symbolics/extended_sympy.py +++ b/devito/symbolics/extended_sympy.py @@ -172,9 +172,9 @@ def __new__(cls, call, pointer, params=None, **kwargs): pointer = Symbol(pointer) if isinstance(call, str): call = Symbol(call) - elif not isinstance(call, Basic): - raise ValueError("`call` must be a `devito.Basic` or a type " - "with compatible interface") + elif not isinstance(call, (BasicWrapperMixin, Basic)): + raise ValueError(f"`call` {call} must be a `devito.Basic` or a type " + f"with compatible interface, not {type(call)}") _params = [] for p in as_tuple(params): if isinstance(p, str): diff --git a/devito/types/dense.py b/devito/types/dense.py index 20a31a10f9..2014846477 100644 --- a/devito/types/dense.py +++ b/devito/types/dense.py @@ -149,7 +149,7 @@ def wrapper(self): # Perhaps user only wants to initialise the physical domain self._initializer(self.data) else: - self.data_with_halo.fill(0) + self._data_allocated.fill(0) return func(self) return wrapper @@ -297,7 +297,8 @@ def shape_global(self): @property def symbolic_shape(self): - return tuple(self._C_get_field(FULL, d).size for d in self.dimensions) + return DimensionTuple(*[self._C_get_field(FULL, d).size for d in self.dimensions], + getters=self.dimensions) @property def size_global(self): diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py index 3ecd855827..f6f7496b4a 100644 --- a/tests/test_dtypes.py +++ b/tests/test_dtypes.py @@ -2,17 +2,26 @@ import pytest import sympy +try: + from ..conftest import skipif +except ImportError: + from conftest import skipif + from devito import ( Constant, Eq, Function, Grid, Operator, exp, log, sin, configuration ) +from devito.arch.compiler import GNUCompiler, CustomCompiler +from devito.exceptions import InvalidOperator from devito.ir.cgen.printer import BasePrinter from devito.passes.iet.langbase import LangBB from devito.passes.iet.languages.C import CBB, CPrinter from devito.passes.iet.languages.openacc import AccBB, AccPrinter from devito.passes.iet.languages.openmp import OmpBB from devito.symbolics.extended_dtypes import ctypes_vector_mapper +from devito.tools import dtype_to_cstr from devito.types.basic import Basic, Scalar, Symbol from devito.types.dense import TimeFunction +from devito.types.sparse import SparseTimeFunction # Mappers for language-specific types and headers _languages: dict[str, type[LangBB]] = { @@ -274,3 +283,56 @@ def test_complex_space_deriv(dtype: np.dtype[np.complexfloating]) -> None: dfdy = h.data.T[1:-1, 1:-1] assert np.allclose(dfdx, np.ones((5, 5), dtype=dtype)) assert np.allclose(dfdy, np.ones((5, 5), dtype=dtype)) + + +@skipif(['noomp', 'device']) +@pytest.mark.parametrize('dtypeu', [np.float32, np.complex64, np.complex128]) +def test_complex_reduction(dtypeu: np.dtype[np.complexfloating]) -> None: + """ + Tests reductions over complex-valued functions. + """ + grid = Grid((11, 11)) + + u = TimeFunction(name="u", grid=grid, space_order=2, time_order=1, dtype=dtypeu) + for dtypes in [dtypeu, dtypeu(0).real.__class__]: + u.data.fill(0) + s = SparseTimeFunction(name="s", grid=grid, npoint=1, nt=10, dtype=dtypes) + if np.issubdtype(dtypes, np.complexfloating): + s.data[:] = 1 + 2j + expected = 8. + 16.j + else: + s.data[:] = 1 + expected = 8. + s.coordinates.data[:] = [.5, .5] + + # s complex and u real should error + if np.issubdtype(dtypeu, np.floating) and \ + np.issubdtype(dtypes, np.complexfloating): + with pytest.raises(InvalidOperator): + op = Operator([Eq(u.forward, u)] + s.inject(u.forward, expr=s)) + continue + else: + op = Operator([Eq(u.forward, u)] + s.inject(u.forward, expr=s)) + op() + + if op._options['linearize']: + ustr = 'uL0(t1, rsx + posx + 2, rsy + posy + 2)' + else: + ustr = 'u[t1][rsx + posx + 2][rsy + posy + 2]' + + compiler = configuration['compiler'] + gnu = isinstance(compiler, GNUCompiler) or \ + (isinstance(compiler, CustomCompiler) and compiler._base is GNUCompiler) + if gnu and np.issubdtype(dtypeu, np.complexfloating): + if 'CXX' in op._language: + rd = dtype_to_cstr(dtypeu(0).real.__class__) + assert f'{rd} * p{u.name} = reinterpret_cast<{rd}*>(&uL0' in str(op) + assert f'p{u.name}[0] += std::real(r0)' in str(op) + assert f'p{u.name}[1] += std::imag(r0)' in str(op) + else: + assert f'__real__ {ustr} += __real__ r0' in str(op) + assert f'__imag__ {ustr} += __imag__ r0' in str(op) + else: + assert f'{ustr} += r0' in str(op) + + assert np.isclose(u.data[0, 5, 5], expected)