diff --git a/.github/workflows/docker-bases.yml b/.github/workflows/docker-bases.yml index 2d10b3e7e8..65a12fb289 100644 --- a/.github/workflows/docker-bases.yml +++ b/.github/workflows/docker-bases.yml @@ -66,7 +66,7 @@ jobs: dockerfile: './docker/Dockerfile.cpu' runner: ubuntu-latest - - tag: 'devitocodes/bases:cpu-icc' + - tag: 'devitocodes/bases:cpu-icc, devitocodes/bases:cpu-icx' arch: 'arch=icc' version: '' dockerfile: './docker/Dockerfile.cpu' diff --git a/.github/workflows/pytest-core-nompi.yml b/.github/workflows/pytest-core-nompi.yml index 7a8958210e..738daf79ed 100644 --- a/.github/workflows/pytest-core-nompi.yml +++ b/.github/workflows/pytest-core-nompi.yml @@ -38,7 +38,8 @@ jobs: pytest-ubuntu-py39-gcc9-omp, pytest-osx-py37-clang-omp, pytest-docker-py37-gcc-omp, - pytest-docker-py37-icc-omp + pytest-docker-py37-icc-omp, + pytest-docker-py38-icx-omp ] set: [base, adjoint] include: @@ -105,6 +106,13 @@ jobs: language: "openmp" sympy: "1.11" + - name: pytest-docker-py38-icx-omp + python-version: '3.8' + os: ubuntu-22.04 + arch: "icx" + language: "openmp" + sympy: "1.11" + - set: base test-set: 'not adjoint' @@ -133,13 +141,13 @@ jobs: - name: Set run prefix run: | if [[ "${{ matrix.name }}" =~ "docker" ]]; then - echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} --name testrun devito_img" >> $GITHUB_ENV + echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} -e DEVITO_ARCH=${{ matrix.arch }} --name testrun devito_img" >> $GITHUB_ENV else echo "RUN_CMD=" >> $GITHUB_ENV fi id: set-run - - name: Install GCC ${{ matrix.arch }} + - name: Install ${{ matrix.arch }} compiler if: "runner.os == 'linux' && !contains(matrix.name, 'docker')" run : | sudo apt-get install -y ${{ matrix.arch }} diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index bcff8eb365..70c25bcdb8 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -16,12 +16,17 @@ __all__ = ['platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_nvidia_cc', 'get_cuda_path', 'get_hip_path', 'check_cuda_runtime', 'get_m1_llvm_path', - 'Platform', 'Cpu64', 'Intel64', 'Amd', 'Arm', 'Power', 'Device', - 'NvidiaDevice', 'AmdDevice', 'IntelDevice', - 'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'KNL', 'KNL7210', # Intel - 'AMD', 'ARM', 'M1', 'GRAVITON', # ARM - 'POWER8', 'POWER9', # Other loosely supported CPU architectures - 'AMDGPUX', 'NVIDIAX', 'INTELGPUX'] # GPUs + 'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power', + 'Device', 'NvidiaDevice', 'AmdDevice', 'IntelDevice', + # Intel + 'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210', + 'SKX', 'KLX', 'CLX', 'CLK', + # ARM + 'AMD', 'ARM', 'M1', 'GRAVITON', + # Other loosely supported CPU architectures + 'POWER8', 'POWER9', + # GPUs + 'AMDGPUX', 'NVIDIAX', 'INTELGPUX'] @memoized_func @@ -494,7 +499,7 @@ def get_platform(): if 'phi' in brand: # Intel Xeon Phi? return platform_registry['knl'] - # Unknown Xeon ? May happen on some virtualizes systems... + # Unknown Xeon ? May happen on some virtualized systems... return platform_registry['intel64'] elif 'intel' in brand: # Most likely a desktop i3/i5/i7 @@ -607,6 +612,14 @@ class Intel64(Cpu64): known_isas = ('cpp', 'sse', 'avx', 'avx2', 'avx512') +class IntelSkylake(Intel64): + pass + + +class IntelGoldenCode(Intel64): + pass + + class Arm(Cpu64): known_isas = ('fp', 'asimd', 'asimdrdm') @@ -725,11 +738,12 @@ def march(cls): IVB = Intel64('ivb') HSW = Intel64('hsw') BDW = Intel64('bdw', isa='avx2') -SKX = Intel64('skx') -KLX = Intel64('klx') -CLX = Intel64('clx') KNL = Intel64('knl') KNL7210 = Intel64('knl', cores_logical=256, cores_physical=64, isa='avx512') +SKX = IntelSkylake('skx') +KLX = IntelSkylake('klx') +CLX = IntelSkylake('clx') +CLK = IntelSkylake('clk') ARM = Arm('arm') GRAVITON = Arm('graviton') @@ -756,6 +770,7 @@ def march(cls): 'skx': SKX, # Skylake 'klx': KLX, # Kaby Lake 'clx': CLX, # Coffee Lake + 'clk': CLK, # Cascade Lake 'knl': KNL, 'knl7210': KNL7210, 'arm': ARM, # Generic ARM CPU diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 36c15388e8..452084bc91 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -12,8 +12,9 @@ from codepy.jit import compile_from_string from codepy.toolchain import GCCToolchain -from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, SKX, POWER8, POWER9, GRAVITON, - get_nvidia_cc, check_cuda_runtime, get_m1_llvm_path) +from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON, + INTELGPUX, IntelSkylake, get_nvidia_cc, check_cuda_runtime, + get_m1_llvm_path) from devito.exceptions import CompilationError from devito.logger import debug, warning, error from devito.parameters import configuration @@ -375,13 +376,22 @@ class GNUCompiler(Compiler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.cflags += ['-march=native', '-Wno-unused-result', '-Wno-unused-variable', - '-Wno-unused-but-set-variable'] + platform = kwargs.pop('platform', configuration['platform']) + + self.cflags += ['-march=native', '-Wno-unused-result', + '-Wno-unused-variable', '-Wno-unused-but-set-variable'] + if configuration['safe-math']: self.cflags.append('-fno-unsafe-math-optimizations') else: self.cflags.append('-ffast-math') + if isinstance(platform, IntelSkylake): + # The default is `=256` because avx512 slows down the CPU frequency; + # however, we empirically found that stencils generally benefit + # from `=512` + self.cflags.append('-mprefer-vector-width=512') + language = kwargs.pop('language', configuration['language']) try: if self.version >= Version("4.9.0"): @@ -414,7 +424,7 @@ def __init__(self, *args, **kwargs): class ClangCompiler(Compiler): def __init__(self, *args, **kwargs): - super(ClangCompiler, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.cflags += ['-Wno-unused-result', '-Wno-unused-variable'] if not configuration['safe-math']: @@ -481,7 +491,7 @@ class AOMPCompiler(Compiler): """AMD's fork of Clang for OpenMP offloading on both AMD and NVidia cards.""" def __init__(self, *args, **kwargs): - super(AOMPCompiler, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.cflags += ['-Wno-unused-result', '-Wno-unused-variable'] if not configuration['safe-math']: @@ -531,7 +541,7 @@ def __lookup_cmds__(self): class PGICompiler(Compiler): def __init__(self, *args, **kwargs): - super(PGICompiler, self).__init__(*args, cpp=True, **kwargs) + super().__init__(*args, cpp=True, **kwargs) self.cflags.remove('-std=c99') self.cflags.remove('-O3') @@ -671,39 +681,30 @@ def __lookup_cmds__(self): class IntelCompiler(Compiler): def __init__(self, *args, **kwargs): - super(IntelCompiler, self).__init__(*args, **kwargs) - - self.cflags.append("-xhost") + super().__init__(*args, **kwargs) - language = kwargs.pop('language', configuration['language']) platform = kwargs.pop('platform', configuration['platform']) + language = kwargs.pop('language', configuration['language']) + self.cflags.append("-xHost") if configuration['safe-math']: self.cflags.append("-fp-model=strict") else: - self.cflags.append('-fast') + self.cflags.append('-fp-model=fast') - if platform is SKX: + if isinstance(platform, IntelSkylake): # Systematically use 512-bit vectors on skylake self.cflags.append("-qopt-zmm-usage=high") - try: - if self.version >= Version("15.0.0"): - # Append the OpenMP flag regardless of configuration['language'], - # since icc15 and later versions implement OpenMP 4.0, hence - # they support `#pragma omp simd` - self.ldflags.append('-qopenmp') - except (TypeError, ValueError): - if language == 'openmp': - # Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0 - self.ldflags.append('-fopenmp') + if language == 'openmp': + self.ldflags.append('-qopenmp') # Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is if kwargs.get('mpi'): - ver = check_output([self.MPICC, "--version"]).decode("utf-8") - if not ver.startswith("icc"): - warning("The MPI compiler `%s` doesn't use the Intel " - "C/C++ compiler underneath" % self.MPICC) + mpi_distro = sniff_mpi_distro('mpiexec') + if mpi_distro != 'IntelMPI': + warning("Expected Intel MPI distribution with `%s`, but found `%s`" + % (self.__class__.__name__, mpi_distro)) def __lookup_cmds__(self): self.CC = 'icc' @@ -727,9 +728,9 @@ def __lookup_cmds__(self): class IntelKNLCompiler(IntelCompiler): def __init__(self, *args, **kwargs): - super(IntelKNLCompiler, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) - self.cflags += ["-xMIC-AVX512"] + self.cflags.append('-xMIC-AVX512') language = kwargs.pop('language', configuration['language']) @@ -737,6 +738,45 @@ def __init__(self, *args, **kwargs): warning("Running on Intel KNL without OpenMP is highly discouraged") +class OneapiCompiler(IntelCompiler): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + platform = kwargs.pop('platform', configuration['platform']) + language = kwargs.pop('language', configuration['language']) + + if language == 'openmp': + self.ldflags.remove('-qopenmp') + self.ldflags.append('-fopenmp') + + if language == 'sycl': + self.cflags.append('-fsycl') + if platform is NVIDIAX: + self.cflags.append('-fsycl-targets=nvptx64-cuda') + else: + self.cflags.append('-fsycl-targets=spir64') + + if platform is NVIDIAX: + self.cflags.append('-fopenmp-targets=nvptx64-cuda') + if platform is INTELGPUX: + self.cflags.append('-fopenmp-targets=spir64') + self.cflags.append('-fopenmp-target-simd') + + if platform is INTELGPUX: + self.cflags.remove('-g') # -g disables some optimizations in IGC + self.cflags.append('-gline-tables-only') + self.cflags.append('-fdebug-info-for-profiling') + + def __lookup_cmds__(self): + # OneAPI HPC ToolKit comes with icpx, which is clang++, + # and icx, which is clang + self.CC = 'icx' + self.CXX = 'icpx' + self.MPICC = 'mpicc' + self.MPICX = 'mpicx' + + class CustomCompiler(Compiler): """ @@ -800,9 +840,11 @@ def __lookup_cmds__(self): 'nvidia': NvidiaCompiler, 'cuda': CudaCompiler, 'osx': ClangCompiler, - 'intel': IntelCompiler, - 'icpc': IntelCompiler, + 'intel': OneapiCompiler, + 'icx': OneapiCompiler, + 'icpx': OneapiCompiler, 'icc': IntelCompiler, + 'icpc': IntelCompiler, 'intel-knl': IntelKNLCompiler, 'knl': IntelKNLCompiler, 'dpcpp': DPCPPCompiler, diff --git a/devito/parameters.py b/devito/parameters.py index 0c56f10b44..3805ac73ae 100644 --- a/devito/parameters.py +++ b/devito/parameters.py @@ -235,8 +235,11 @@ class switchconfig(object): Decorator to temporarily change `configuration` parameters. """ - def __init__(self, **params): - self.params = {k.replace('_', '-'): v for k, v in params.items()} + def __init__(self, condition=True, **params): + if condition: + self.params = {k.replace('_', '-'): v for k, v in params.items()} + else: + self.params = {} def __call__(self, func, *args, **kwargs): @wraps(func) diff --git a/tests/conftest.py b/tests/conftest.py index 9f1246a294..7db4cb2fbd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,8 @@ from devito.checkpointing import NoopRevolver from devito.finite_differences.differentiable import EvalDerivative from devito.arch import Cpu64, Device, sniff_mpi_distro, Arm -from devito.arch.compiler import compiler_registry, IntelCompiler, NvidiaCompiler +from devito.arch.compiler import (compiler_registry, IntelCompiler, OneapiCompiler, + NvidiaCompiler) from devito.ir.iet import (FindNodes, FindSymbols, Iteration, ParallelBlock, retrieve_iteration_tree) from devito.tools import as_tuple @@ -26,7 +27,8 @@ def skipif(items, whole_module=False): # Sanity check accepted = set() accepted.update({'device', 'device-C', 'device-openmp', 'device-openacc', - 'device-aomp', 'cpu64-icc', 'cpu64-nvc', 'cpu64-arm', 'chkpnt'}) + 'device-aomp', 'cpu64-icc', 'cpu64-icx', 'cpu64-nvc', 'cpu64-arm', + 'cpu64-icpx', 'chkpnt'}) accepted.update({'nompi', 'nodevice'}) unknown = sorted(set(items) - accepted) if unknown: @@ -70,6 +72,12 @@ def skipif(items, whole_module=False): isinstance(configuration['platform'], Cpu64): skipit = "`icc+cpu64` won't work with this test" break + # Skip if it won't run with OneAPICompiler + if i == 'cpu64-icx' and \ + isinstance(configuration['compiler'], OneapiCompiler) and \ + isinstance(configuration['platform'], Cpu64): + skipit = "`icx+cpu64` won't work with this test" + break # Skip if it won't run on Arm if i == 'cpu64-arm' and isinstance(configuration['platform'], Arm): skipit = "Arm doesn't support x86-specific instructions" diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 1b45290ae2..2b0988fc33 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -4,9 +4,11 @@ from benchmarks.user.benchmark import run from devito import configuration, switchconfig +from conftest import skipif from subprocess import check_call +@skipif('cpu64-icx') @pytest.mark.parametrize('mode, problem, op', [ ('run', 'acoustic', 'forward'), ('run', 'acoustic', 'adjoint'), ('run', 'acoustic', 'jacobian'), ('run', 'acoustic', 'jacobian_adjoint'), diff --git a/tests/test_buffering.py b/tests/test_buffering.py index 29aea2bc04..997a2f571b 100644 --- a/tests/test_buffering.py +++ b/tests/test_buffering.py @@ -701,7 +701,6 @@ def test_everything(): assert np.all(u.data == u1.data) -@skipif('cpu64-icc') @pytest.mark.parametrize('subdomain', ['domain', 'interior']) def test_stencil_issue_1915(subdomain): nt = 5 diff --git a/tests/test_dimension.py b/tests/test_dimension.py index 8b06e39d33..a5a4804d88 100644 --- a/tests/test_dimension.py +++ b/tests/test_dimension.py @@ -9,7 +9,8 @@ SparseFunction, SparseTimeFunction, Eq, Operator, Constant, Dimension, DefaultDimension, SubDimension, switchconfig, SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension, - CustomDimension, dimensions) + CustomDimension, dimensions, configuration) +from devito.arch.compiler import IntelCompiler, OneapiCompiler from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes, FindSymbols, retrieve_iteration_tree) from devito.symbolics import indexify, retrieve_functions, IntDiv @@ -1382,6 +1383,8 @@ def test_affiness(self): iterations = [i for i in FindNodes(Iteration).visit(op) if i.dim is not time] assert all(i.is_Affine for i in iterations) + @switchconfig(condition=isinstance(configuration['compiler'], + (IntelCompiler, OneapiCompiler)), safe_math=True) def test_sparse_time_function(self): nt = 20