diff --git a/CMakeLists.txt b/CMakeLists.txt index c6750758f9..1a528975e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,10 @@ SET(EMBREE_PROJECT_COMPILATION ON) include(CMakeDependentOption) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + # We use our own strip tool on macOS to sign during install. This is required as CMake modifies RPATH of the binary during install. IF (APPLE AND EMBREE_SIGN_FILE) SET(EMBREE_STRIP ${CMAKE_STRIP}) @@ -235,9 +239,11 @@ OPTION(EMBREE_MIN_WIDTH "Enables min-width feature to enlarge curve and point th IF (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_OSX_ARCHITECTURES MATCHES "arm64")) MESSAGE(STATUS "Building for Apple silicon") SET(EMBREE_ARM ON) + SET(EMBREE_ISA_AVX512SKX OFF) ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") MESSAGE(STATUS "Building for AArch64") SET(EMBREE_ARM ON) + SET(EMBREE_ISA_AVX512SKX OFF) ENDIF() SET(EMBREE_TASKING_SYSTEM "TBB" CACHE STRING "Selects tasking system") @@ -253,14 +259,33 @@ IF (EMBREE_TASKING_SYSTEM STREQUAL "TBB") SET(TASKING_TBB ON ) SET(TASKING_INTERNAL OFF) SET(TASKING_PPL OFF ) + SET(TASKING_HPX OFF ) ADD_DEFINITIONS(-DTASKING_TBB) LIST(APPEND ISPC_DEFINITIONS -DTASKING_TBB) ELSEIF (EMBREE_TASKING_SYSTEM STREQUAL "PPL") SET(TASKING_PPL ON ) SET(TASKING_TBB OFF ) + SET(TASKING_HPX OFF ) SET(TASKING_INTERNAL OFF) ADD_DEFINITIONS(-DTASKING_PPL) LIST(APPEND ISPC_DEFINITIONS -DTASKING_PPL) +ELSEIF (EMBREE_TASKING_SYSTEM STREQUAL "HPX") + IF(NOT HPX_DIR AND HPX_ROOT) + SET(HPX_DIR ${HPX_ROOT}/lib/cmake/HPX) + ENDIF() + + IF(NOT HPX_DIR AND EXISTS "$ENV{HPX_DIR}") + SET(HPX_DIR $ENV{HPX_DIR}) + ENDIF() + + UNSET(CMAKE_CXX_STANDARD) + SET(CMAKE_CXX_STANDARD 20) + SET(TASKING_HPX ON ) + SET(TASKING_PPL OFF ) + SET(TASKING_TBB OFF ) + SET(TASKING_INTERNAL OFF) + ADD_DEFINITIONS(-DTASKING_HPX) + LIST(APPEND ISPC_DEFINITIONS -DTASKING_HPX) ELSE() SET(TASKING_INTERNAL ON ) SET(TASKING_TBB OFF) @@ -379,8 +404,10 @@ ELSE() ENDIF() IF (EMBREE_ARM) + message(STATUS "NEON, NEON2X") SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE NEON NEON2X) ELSE() + message(STATUS "SSE2 SSE4.2 AVX AVX2 AVX512") SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 AVX AVX2 AVX512 DEFAULT) ENDIF() @@ -390,9 +417,11 @@ IF (EMBREE_MAX_ISA STREQUAL "NONE") IF (APPLE) OPTION(EMBREE_ISA_NEON "Enables NEON ISA." OFF) OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." ON) + TRY_COMPILE(COMPILER_SUPPORTS_ARM "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_ARM}) ELSE() OPTION(EMBREE_ISA_NEON "Enables NEON ISA." ON) OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." OFF) + TRY_COMPILE(COMPILER_SUPPORTS_ARM "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_ARM}) ENDIF() ELSE() TRY_COMPILE(COMPILER_SUPPORTS_AVX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX}) diff --git a/README.md b/README.md index f7a7d89b90..4afdef8198 100644 --- a/README.md +++ b/README.md @@ -305,7 +305,7 @@ macOS M1 - Apple Clang 12.0.5 (macOS 11.7.1) -IMPORTANT: Unfortunatlly, latest version of the Intel® oneAPI DPC++/C++ +IMPORTANT: Unfortunately, latest version of the Intel® oneAPI DPC++/C++ Compiler (2023.2.1), has a bug that doesn't allow Embree to run correctly with ISAs >= AVX2. Please wait for 2024.0.0, which will be released soon after Embree 4.3.0. @@ -325,6 +325,11 @@ installation, put the path to `ispc` permanently into your `PATH` environment variable or you set the `EMBREE_ISPC_EXECUTABLE` variable to point at the ISPC executable during CMake configuration. +Embree supports using the HPX runtime system as the tasking system. HPX can be +enabled by setting `EMBREE_TASKING_SYSTEM=HPX`. If HPX is enabled the CMake +variables `HPX_DIR` or `HPX_ROOT` are required to be set. The variables are +file system paths where `HPXConfig.cmake` or `HPXConfigVersion.cmake` resides. + You additionally have to install CMake 3.1.0 or higher and the developer version of [GLFW](https://www.glfw.org/) version 3. @@ -818,8 +823,8 @@ parameters that can be configured in CMake: + `EMBREE_TASKING_SYSTEM`: Chooses between Intel® Threading TBB Building Blocks (TBB), Parallel Patterns Library (PPL) (Windows - only), or an internal tasking system (INTERNAL). By default, TBB is - used. + only), HPX (HPX), or an internal tasking system (INTERNAL). By default, + TBB is used. + `EMBREE_TBB_ROOT`: If Intel® Threading Building Blocks (TBB) is used as a tasking system, search the library in this directory diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 47868652d7..04e01981c7 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -5,4 +5,4 @@ ADD_SUBDIRECTORY(sys) ADD_SUBDIRECTORY(math) ADD_SUBDIRECTORY(simd) ADD_SUBDIRECTORY(lexers) -ADD_SUBDIRECTORY(tasking) \ No newline at end of file +ADD_SUBDIRECTORY(tasking) diff --git a/common/algorithms/parallel_for.h b/common/algorithms/parallel_for.h index fd5213e70a..de21f6bbb7 100644 --- a/common/algorithms/parallel_for.h +++ b/common/algorithms/parallel_for.h @@ -8,6 +8,11 @@ #include "../math/emath.h" #include "../math/range.h" +#if defined(TASKING_HPX) +#include +#include +#endif + namespace embree { /* parallel_for without range */ @@ -46,6 +51,20 @@ namespace embree concurrency::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); }); +#elif defined(TASKING_HPX) + std::vector> futures; + futures.reserve(N-1); + + hpx::threads::run_as_hpx_thread([N, &func, &futures]() + { + for(auto i = 1; i < N; ++i) { + futures.push_back( hpx::async([i, &func]() { func(i); }) ); + } + + func(0); + hpx::wait_all(futures); + }); + #else # error "no tasking system enabled" #endif @@ -84,7 +103,20 @@ namespace embree concurrency::parallel_for(first, last, Index(1) /*minStepSize*/, [&](Index i) { func(range(i,i+1)); }); +#elif defined(TASKING_HPX) + auto irange = hpx::util::counting_shape(last-first); + + hpx::future fut = + hpx::threads::run_as_hpx_thread([minStepSize, &irange, &func]() -> hpx::future { + hpx::experimental::for_loop_strided(hpx::execution::par, hpx::util::begin(irange), hpx::util::end(irange), minStepSize, + [&func](auto i) { + func(range(*i, (*i)+1)); + }); + + return hpx::make_ready_future(); + }); + fut.wait(); #else # error "no tasking system enabled" #endif diff --git a/common/algorithms/parallel_reduce.h b/common/algorithms/parallel_reduce.h index b52b1e2e13..40a4485ef3 100644 --- a/common/algorithms/parallel_reduce.h +++ b/common/algorithms/parallel_reduce.h @@ -5,6 +5,11 @@ #include "parallel_for.h" +#if defined(TASKING_HPX) +#include +#include +#endif + namespace embree { template @@ -69,7 +74,7 @@ namespace embree throw std::runtime_error("task cancelled"); return v; #endif -#else // TASKING_PPL +#elif defined(TASKING_PPL) struct AlignedValue { char storage[__alignof(Value)+sizeof(Value)]; @@ -107,6 +112,57 @@ namespace embree }; const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction); return v; +#elif defined(TASKING_HPX) +/* + binner = parallel_reduce(begin,end,blockSize,binner, + [&](const range& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, + [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); +*/ + struct AlignedValue + { + char storage[__alignof(Value)+sizeof(Value)]; + static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - 1) % a); }; + Value* getValuePtr() { return reinterpret_cast(alignUp(uintptr_t(storage), __alignof(Value))); } + const Value* getValuePtr() const { return reinterpret_cast(alignUp(uintptr_t(storage), __alignof(Value))); } + AlignedValue(const Value& v) { new(getValuePtr()) Value(v); } + AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); } + AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); }; + AlignedValue& operator = (const AlignedValue& v) { *getValuePtr() = *v.getValuePtr(); return *this; }; + AlignedValue& operator = (const AlignedValue&& v) { *getValuePtr() = *v.getValuePtr(); return *this; }; + operator Value() const { return *getValuePtr(); } + }; + + std::function red = [&](AlignedValue x, AlignedValue y) -> AlignedValue { + return AlignedValue(reduction(x, y)); + }; + + std::function xfm = [&](Index i) -> AlignedValue { + return AlignedValue(func(range(i,i))); + }; + + const Index sz = last-first; + auto irange = hpx::util::counting_shape(sz); + auto beg = hpx::util::begin(irange); + auto end = hpx::util::end(irange); + + Value v = + hpx::threads::run_as_hpx_thread([&red, &xfm, &beg, &end, &identity]() -> Value + { + + Value v = hpx::transform_reduce( + hpx::execution::par, + beg, end, + AlignedValue(identity), + red, + xfm + ); + + return v; + }); + + return v; +#else +# error "no tasking system enabled" #endif } diff --git a/common/cmake/check_isa.cpp b/common/cmake/check_isa.cpp index a9879d2dc1..2d5249fd11 100644 --- a/common/cmake/check_isa.cpp +++ b/common/cmake/check_isa.cpp @@ -26,6 +26,10 @@ char const *info_isa = "ISA" ":" "AVX"; char const *info_isa = "ISA" ":" "SSE42"; #else // defined(__SSE2__) char const *info_isa = "ISA" ":" "SSE2"; +#else defined(__arm__) +char const *info_isa = "ISA" ":" "ARM"; +#else defined(__aarch64__) +char const *info_isa = "ISA" ":" "ARM"; #endif int main(int argc, char **argv) diff --git a/common/cmake/clang.cmake b/common/cmake/clang.cmake index 8d05e4a449..95ed9f2d40 100644 --- a/common/cmake/clang.cmake +++ b/common/cmake/clang.cmake @@ -127,7 +127,9 @@ ELSE() SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") # enable full optimizations IF (APPLE) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions + IF(NOT CMAKE_OSX_DEPLOYMENT_TARGET) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.7") # makes sure code runs on older MacOSX versions + ENDIF() SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # link against libc++ which supports C++11 features ELSE(APPLE) IF (NOT EMBREE_ADDRESS_SANITIZER) # for address sanitizer this causes link errors @@ -140,6 +142,10 @@ ELSE() SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack") # we do not need an executable stack ENDIF() ENDIF() + + SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-aligned-allocation") + SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-aligned-allocation") + SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-aligned-allocation") ENDIF(APPLE) diff --git a/common/sys/CMakeLists.txt b/common/sys/CMakeLists.txt index 66fc708313..26047bffc8 100644 --- a/common/sys/CMakeLists.txt +++ b/common/sys/CMakeLists.txt @@ -1,8 +1,10 @@ ## Copyright 2009-2021 Intel Corporation ## SPDX-License-Identifier: Apache-2.0 -SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) -FIND_PACKAGE(Threads REQUIRED) +IF(NOT TASKING_HPX) + SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) + FIND_PACKAGE(Threads REQUIRED) +endif() ADD_LIBRARY(sys STATIC sysinfo.cpp @@ -20,9 +22,28 @@ ADD_LIBRARY(sys STATIC SET_PROPERTY(TARGET sys PROPERTY FOLDER common) SET_PROPERTY(TARGET sys APPEND PROPERTY COMPILE_FLAGS " ${FLAGS_LOWEST}") -TARGET_LINK_LIBRARIES(sys ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) -IF (EMBREE_SYCL_SUPPORT) - TARGET_LINK_LIBRARIES(sys ${SYCL_LIB_NAME}) +IF(TASKING_HPX) + IF(HPX_FOUND) + TARGET_INCLUDE_DIRECTORIES(sys PUBLIC "${HPX_INCLUDE_DIRS}") + TARGET_LINK_LIBRARIES(sys PUBLIC ${CMAKE_DL_LIBS} HPX::hpx) + ELSE() + find_package(HPX REQUIRED) + IF(HPX_FOUND) + TARGET_INCLUDE_DIRECTORIES(sys PUBLIC "${HPX_INCLUDE_DIRS}") + IF (EMBREE_SYCL_SUPPORT) + TARGET_LINK_LIBRARIES(sys PUBLIC {CMAKE_DL_LIBS} ${SYCL_LIB_NAME} HPX::hpx) + ELSE() + TARGET_LINK_LIBRARIES(sys PUBLIC ${CMAKE_DL_LIBS} HPX::hpx) + ENDIF() + ELSE() + message("-- Not found HPX") + ENDIF() + ENDIF() +ELSE() + TARGET_LINK_LIBRARIES(sys ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) + IF (EMBREE_SYCL_SUPPORT) + TARGET_LINK_LIBRARIES(sys ${SYCL_LIB_NAME}) + ENDIF() ENDIF() IF (EMBREE_STATIC_LIB) diff --git a/common/sys/barrier.cpp b/common/sys/barrier.cpp index 0c0e39d92d..8265bc4279 100644 --- a/common/sys/barrier.cpp +++ b/common/sys/barrier.cpp @@ -101,7 +101,7 @@ namespace embree __forceinline void wait() { mutex.lock(); - count++; + count+=1; if (count == barrierSize) { count = 0; @@ -128,19 +128,33 @@ namespace embree namespace embree { BarrierSys::BarrierSys (size_t N) { +#if defined(TASKING_HPX) + b = std::make_shared>(N); +#else opaque = new BarrierSysImplementation(N); +#endif } BarrierSys::~BarrierSys () { +#if !defined(TASKING_HPX) delete (BarrierSysImplementation*) opaque; +#endif } void BarrierSys::init(size_t count) { +#if defined(TASKING_HPX) + b.reset(new hpx::barrier<>(count)); +#else ((BarrierSysImplementation*) opaque)->init(count); +#endif } void BarrierSys::wait() { +#if defined(TASKING_HPX) + b->arrive_and_wait(); +#else ((BarrierSysImplementation*) opaque)->wait(); +#endif } LinearBarrierActive::LinearBarrierActive (size_t N) diff --git a/common/sys/barrier.h b/common/sys/barrier.h index e1580f41a9..d9efb5b643 100644 --- a/common/sys/barrier.h +++ b/common/sys/barrier.h @@ -7,6 +7,11 @@ #include "sysinfo.h" #include "atomic.h" +#if defined(TASKING_HPX) +#include +#include +#endif + namespace embree { /*! system barrier using operating system */ @@ -31,7 +36,12 @@ namespace embree void wait(); private: + +#if defined(TASKING_HPX) + std::shared_ptr< hpx::barrier<> > b; +#else void* opaque; +#endif }; /*! fast active barrier using atomic counter */ diff --git a/common/tasking/CMakeLists.txt b/common/tasking/CMakeLists.txt index bf790ef63c..cc7e58242e 100644 --- a/common/tasking/CMakeLists.txt +++ b/common/tasking/CMakeLists.txt @@ -67,6 +67,15 @@ ELSEIF (TASKING_TBB) ELSEIF (TASKING_PPL) ADD_LIBRARY(tasking STATIC taskschedulerppl.cpp) TARGET_LINK_LIBRARIES(tasking PUBLIC ${PPL_LIBRARIES}) +ELSEIF (TASKING_HPX) + find_package(HPX REQUIRED) + if(HPX_FOUND) + ADD_LIBRARY(tasking STATIC taskschedulerhpx.cpp) + TARGET_INCLUDE_DIRECTORIES(tasking PUBLIC "${HPX_INCLUDE_DIRS}") + TARGET_LINK_LIBRARIES(tasking PUBLIC HPX::hpx) + else() + message("-- Not found HPX") + endif() ENDIF() SET_PROPERTY(TARGET tasking PROPERTY FOLDER common) diff --git a/common/tasking/taskscheduler.h b/common/tasking/taskscheduler.h index edfffe0e57..d10a307780 100644 --- a/common/tasking/taskscheduler.h +++ b/common/tasking/taskscheduler.h @@ -9,6 +9,8 @@ # include "taskschedulertbb.h" #elif defined(TASKING_PPL) # include "taskschedulerppl.h" +#elif defined(TASKING_HPX) +# include "taskschedulerhpx.h" #else # error "no tasking system enabled" #endif diff --git a/common/tasking/taskschedulerhpx.cpp b/common/tasking/taskschedulerhpx.cpp new file mode 100644 index 0000000000..2c5c6c538b --- /dev/null +++ b/common/tasking/taskschedulerhpx.cpp @@ -0,0 +1,34 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "taskschedulerhpx.h" + +#include +#include + +#include +#include + +namespace embree +{ + static bool g_hpx_threads_initialized = false; + + void TaskScheduler::create(size_t numThreads, bool set_affinity, bool start_threads) + { + g_hpx_threads_initialized = true; + std::string count = std::to_string(numThreads); + std::string thread_arg = "--hpx:threads=" + count; + hpx::init_params params; + params.cfg = { thread_arg }; + hpx::start(nullptr, 0, nullptr, params); + numThreads = threadCount(); + } + + void TaskScheduler::destroy() + { + if (g_hpx_threads_initialized) { + hpx::post([]() { hpx::finalize(); }); + hpx::stop(); + } + } +} diff --git a/common/tasking/taskschedulerhpx.h b/common/tasking/taskschedulerhpx.h new file mode 100644 index 0000000000..8c25a982b4 --- /dev/null +++ b/common/tasking/taskschedulerhpx.h @@ -0,0 +1,51 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#ifndef __APPLE__ +#if !defined(__forceinline) +#define __forceinline __attribute__((always_inline)) +#endif +#elif defined(__APPLE__) && (__arm__) +#if !defined(__forceinline) +#define __forceinline __attribute__((always_inline)) +#endif +#endif + +#include + +namespace embree +{ + struct TaskScheduler + { + +#ifdef __APPLE__ +__attribute__((visibility("default"))) +#endif + /*! initializes the task scheduler */ + static void create(size_t numThreads, bool set_affinity, bool start_threads); + +#ifdef __APPLE__ +__attribute__((visibility("default"))) +#endif + /*! destroys the task scheduler again */ + static void destroy(); + + /* returns the ID of the current thread */ + static size_t threadID() { + return hpx::get_worker_thread_num(); + } + + /* returns the index (0..threadCount-1) of the current thread */ + /* FIXME: threadIndex is NOT supported by PPL! */ + static size_t threadIndex() { + return 0; + } + + /* returns the total number of threads */ + static size_t threadCount() { + return static_cast(hpx::get_worker_thread_num()); + } + }; +}; diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 60f27ff413..beae2c0e7a 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -283,10 +283,23 @@ ENDIF() # ENDIF() #ENDIF () +IF(HPX_DIR) + find_package(HPX REQUIRED) + IF(NOT HPX_FOUND) + MESSAGE(FATAL_ERROR "HPX Not Found") + ENDIF() +ENDIF() + IF (EMBREE_ISA_SSE42 AND EMBREE_LIBRARY_FILES_SSE42) DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_SSE42}) ADD_LIBRARY(embree_sse42 STATIC ${EMBREE_LIBRARY_FILES_SSE42}) - TARGET_LINK_LIBRARIES(embree_sse42 PRIVATE tasking) + IF(NOT HPX_FOUND) + TARGET_LINK_LIBRARIES(embree_sse42 PRIVATE tasking) + ELSE() + TARGET_INCLUDE_DIRECTORIES(embree_sse42 PUBLIC "${HPX_INCLUDE_DIRS}") + TARGET_LINK_LIBRARIES(embree_sse42 PRIVATE tasking PUBLIC HPX::hpx) + ENDIF() + SET_TARGET_PROPERTIES(embree_sse42 PROPERTIES COMPILE_FLAGS "${FLAGS_SSE42}") SET_PROPERTY(TARGET embree_sse42 PROPERTY FOLDER kernels) SET(EMBREE_LIBRARIES ${EMBREE_LIBRARIES} embree_sse42) @@ -300,7 +313,13 @@ ENDIF () IF (EMBREE_ISA_AVX AND EMBREE_LIBRARY_FILES_AVX) DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX}) ADD_LIBRARY(embree_avx STATIC ${EMBREE_LIBRARY_FILES_AVX}) - TARGET_LINK_LIBRARIES(embree_avx PRIVATE tasking) + IF(NOT HPX_FOUND) + TARGET_LINK_LIBRARIES(embree_avx PRIVATE tasking) + ELSE() + TARGET_INCLUDE_DIRECTORIES(embree_avx PUBLIC "${HPX_INCLUDE_DIRS}") + TARGET_LINK_LIBRARIES(embree_avx PRIVATE tasking PUBLIC HPX::hpx) + ENDIF() + SET_TARGET_PROPERTIES(embree_avx PROPERTIES COMPILE_FLAGS "${FLAGS_AVX}") SET_PROPERTY(TARGET embree_avx PROPERTY FOLDER kernels) SET(EMBREE_LIBRARIES ${EMBREE_LIBRARIES} embree_avx) @@ -314,7 +333,13 @@ ENDIF() IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2) DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX2}) ADD_LIBRARY(embree_avx2 STATIC ${EMBREE_LIBRARY_FILES_AVX2}) - TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking) + IF(NOT HPX_FOUND) + TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking) + ELSE() + TARGET_INCLUDE_DIRECTORIES(embree_avx2 PUBLIC "${HPX_INCLUDE_DIRS}") + TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking PUBLIC HPX::hpx) + ENDIF() + SET_TARGET_PROPERTIES(embree_avx2 PROPERTIES COMPILE_FLAGS "${FLAGS_AVX2}") SET_PROPERTY(TARGET embree_avx2 PROPERTY FOLDER kernels) SET(EMBREE_LIBRARIES ${EMBREE_LIBRARIES} embree_avx2) @@ -328,7 +353,13 @@ ENDIF() IF (EMBREE_ISA_AVX512 AND EMBREE_LIBRARY_FILES_AVX512) DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX512}) ADD_LIBRARY(embree_avx512 STATIC ${EMBREE_LIBRARY_FILES_AVX512}) - TARGET_LINK_LIBRARIES(embree_avx512 PRIVATE tasking) + IF(NOT HPX_FOUND) + TARGET_LINK_LIBRARIES(embree_avx512 PRIVATE tasking) + ELSE() + TARGET_INCLUDE_DIRECTORIES(embree_avx512 PUBLIC "${HPX_INCLUDE_DIRS}") + TARGET_LINK_LIBRARIES(embree_avx512 PRIVATE tasking PUBLIC HPX::hpx) + ENDIF() + SET_TARGET_PROPERTIES(embree_avx512 PROPERTIES COMPILE_FLAGS "${FLAGS_AVX512}") SET_PROPERTY(TARGET embree_avx512 PROPERTY FOLDER kernels) SET(EMBREE_LIBRARIES ${EMBREE_LIBRARIES} embree_avx512) @@ -340,8 +371,16 @@ IF (EMBREE_ISA_AVX512 AND EMBREE_LIBRARY_FILES_AVX512) ENDIF() TARGET_LINK_LIBRARIES(embree PRIVATE ${EMBREE_LIBRARIES} sys math simd lexers tasking) -IF (EMBREE_SYCL_SUPPORT) - TARGET_LINK_LIBRARIES(embree PRIVATE embree_rthwif ${SYCL_LIB_NAME} ze_wrapper PUBLIC embree_sycl) +IF(NOT HPX_FOUND) + IF (EMBREE_SYCL_SUPPORT) + TARGET_LINK_LIBRARIES(embree PRIVATE embree_rthwif ${SYCL_LIB_NAME} ze_wrapper PUBLIC embree_sycl HPX::hpx) + ELSE() + TARGET_LINK_LIBRARIES(embree PRIVATE embree_rthwif ${SYCL_LIB_NAME} ze_wrapper PUBLIC HPX::hpx) + ENDIF() +ELSE() + IF (EMBREE_SYCL_SUPPORT) + TARGET_LINK_LIBRARIES(embree PRIVATE embree_rthwif ${SYCL_LIB_NAME} ze_wrapper PUBLIC embree_sycl) + ENDIF() ENDIF() #TARGET_LINK_LIBRARIES(embree PRIVATE ${EMBREE_LIBRARIES} sys math simd lexers tasking ${CMAKE_LINK_FLAGS_SYCL}) # FIXME: enable this line @@ -350,7 +389,6 @@ target_include_directories(embree PUBLIC $ $) - # libtbb is located in same install folder as libembree IF(WIN32) ELSEIF(APPLE) diff --git a/kernels/common/alloc.cpp b/kernels/common/alloc.cpp index cc2f9976f2..4b78d6417f 100644 --- a/kernels/common/alloc.cpp +++ b/kernels/common/alloc.cpp @@ -3,10 +3,13 @@ #include "alloc.h" #include "../../common/sys/thread.h" -#if defined(APPLE) && defined(__aarch64__) +#if defined(TASKING_HPX) +#include "../../common/sys/barrier.h" +#elif defined(APPLE) && defined(__aarch64__) #include "../../common/sys/barrier.h" #endif + namespace embree { __thread FastAllocator::ThreadLocal2* FastAllocator::thread_local_allocator2 = nullptr; diff --git a/kernels/common/rtcore.cpp b/kernels/common/rtcore.cpp index 8dc5d7045b..ed57e2a831 100644 --- a/kernels/common/rtcore.cpp +++ b/kernels/common/rtcore.cpp @@ -7,6 +7,7 @@ #include "device.h" #include "scene.h" #include "context.h" +#include "../common/tasking/taskscheduler.h" #include "../geometry/filter.h" #include "../../include/embree4/rtcore_ray.h" using namespace embree; diff --git a/kernels/common/rtcore.h b/kernels/common/rtcore.h index 73a061de11..da49c948a8 100644 --- a/kernels/common/rtcore.h +++ b/kernels/common/rtcore.h @@ -4,6 +4,7 @@ #pragma once #include "../../include/embree4/rtcore.h" + RTC_NAMESPACE_USE namespace embree diff --git a/kernels/common/scene.cpp b/kernels/common/scene.cpp index fda8dd938a..6940793e5f 100644 --- a/kernels/common/scene.cpp +++ b/kernels/common/scene.cpp @@ -21,6 +21,7 @@ namespace embree struct TaskGroup { /*! global lock step task scheduler */ #if defined(TASKING_INTERNAL) + using mutex_t = MutexSys; MutexSys schedulerMutex; Ref scheduler; #elif defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION @@ -29,6 +30,9 @@ namespace embree tbb::task_group group; #elif defined(TASKING_PPL) concurrency::task_group group; +#elif defined(TASKING_HPX) + using mutex_t = hpx::mutex; + MutexSys schedulerMutex; #endif }; @@ -866,7 +870,7 @@ namespace embree return scene_flags; } -#if defined(TASKING_INTERNAL) +#if defined(TASKING_INTERNAL) void Scene::commit (bool join) { @@ -908,6 +912,27 @@ namespace embree #endif +#if defined(TASKING_HPX) + + void Scene::commit (bool join) + { +#if defined(TASKING_HPX) + if (join) + throw_RTCError(RTC_ERROR_INVALID_OPERATION,"rtcJoinCommitScene not supported with HPX"); +#endif + + /* try to obtain build lock */ + std::lock_guard lock(buildMutex); + + checkIfModifiedAndSet (); + if (!isModified()) { + return; + } + + hpx::threads::run_as_hpx_thread([&]() { commit_task(); }); + } +#endif + #if defined(TASKING_TBB) void Scene::commit (bool join) diff --git a/kernels/rthwif/CMakeLists.txt b/kernels/rthwif/CMakeLists.txt index cd87b47276..91dfd3c2ad 100644 --- a/kernels/rthwif/CMakeLists.txt +++ b/kernels/rthwif/CMakeLists.txt @@ -14,6 +14,13 @@ SET(RTHWIF_VERSION ${RTHWIF_VERSION_MAJOR}.${RTHWIF_VERSION_MINOR}.${RTHWIF_VERS SET(CMAKE_CXX_STANDARD 17) +IF(HPX_DIR) + find_package(HPX REQUIRED) + IF(NOT HPX_FOUND) + MESSAGE(FATAL_ERROR "HPX Not Found") + ENDIF() +ENDIF() + IF (NOT DEFINED EMBREE_VERSION_MAJOR) SET(RTHWIF_STANDALONE ON) @@ -25,7 +32,10 @@ IF (NOT DEFINED EMBREE_VERSION_MAJOR) OPTION(EMBREE_SYCL_IMPLICIT_DISPATCH_GLOBALS "Using L0 allocated Dispatch Globals" ON) SET(EMBREE_RTHWIF_STATIC_LIB OFF) - SET(EMBREE_BUILDER_TBB_STATIC ON) + + if(TASKING_TBB) + SET(EMBREE_BUILDER_TBB_STATIC ON) + endif() SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") @@ -53,7 +63,9 @@ IF (NOT DEFINED EMBREE_VERSION_MAJOR) GET_FILENAME_COMPONENT(SYCL_COMPILER_DIR ${CMAKE_CXX_COMPILER} PATH) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem \"${SYCL_COMPILER_DIR}/../include/sycl\" -isystem \"${SYCL_COMPILER_DIR}/../include/\"") # disable warning from SYCL header (FIXME: why required?) - find_package(TBB 2020) + if(TASKING_TBB) + find_package(TBB 2020) + endif() ELSE() SET(RTHWIF_NAME embree_rthwif) @@ -78,7 +90,7 @@ endif() # information we need when TASKING_TBB is used set(TBB_TARGET tasking) -if (EMBREE_BUILDER_TBB_STATIC OR NOT TASKING_TBB) +if (EMBREE_BUILDER_TBB_STATIC OR NOT TASKING_TBB AND NOT TASKING_HPX) #################################################################### # fetch TBB and build static version of it @@ -158,13 +170,23 @@ IF (EMBREE_SYCL_RT_VALIDATION_API) ENDIF() ADD_LIBRARY(embree_rthwif ${RTHWIF_LIB_TYPE} rtbuild/rtbuild.cpp rtbuild/qbvh6.cpp rtbuild/statistics.cpp) -TARGET_LINK_LIBRARIES(embree_rthwif PUBLIC ${EMBREE_RTHWIF_SYCL} PRIVATE ${TBB_TARGET} simd sys) +IF(HPX_FOUND) + ADD_DEFINITIONS(-DTASKING_HPX) + TARGET_LINK_LIBRARIES(embree_rthwif PUBLIC ${EMBREE_RTHWIF_SYCL} HPX::hpx PRIVATE ${TBB_TARGET} simd sys) +ELSE() + TARGET_LINK_LIBRARIES(embree_rthwif PUBLIC ${EMBREE_RTHWIF_SYCL} PRIVATE ${TBB_TARGET} simd sys) +ENDIF() + SET_TARGET_PROPERTIES(embree_rthwif PROPERTIES OUTPUT_NAME ${RTHWIF_NAME}) IF (EMBREE_RTHWIF_STATIC_LIB) TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_RTHWIF_STATIC_LIB) ENDIF() -TARGET_LINK_LIBRARIES(embree_rthwif PRIVATE ze_wrapper) +IF(HPX_FOUND) + TARGET_LINK_LIBRARIES(embree_rthwif PRIVATE ze_wrapper HPX::hpx) +ELSE() + TARGET_LINK_LIBRARIES(embree_rthwif PRIVATE ze_wrapper) +ENDIF() TARGET_COMPILE_DEFINITIONS(embree_rthwif PUBLIC EMBREE_SYCL_SUPPORT) IF (EMBREE_STATIC_LIB OR NOT EMBREE_RTHWIF_STATIC_LIB) @@ -176,5 +198,3 @@ IF (EMBREE_STATIC_LIB OR NOT EMBREE_RTHWIF_STATIC_LIB) ENDIF() ADD_SUBDIRECTORY(testing) - - diff --git a/kernels/subdiv/tessellation_cache.cpp b/kernels/subdiv/tessellation_cache.cpp index d00fa65433..7fb4ea8dcb 100644 --- a/kernels/subdiv/tessellation_cache.cpp +++ b/kernels/subdiv/tessellation_cache.cpp @@ -4,6 +4,9 @@ #include "tessellation_cache.h" #include "../../common/tasking/taskscheduler.h" +#if defined(TASKING_HPX) +#include "../../common/sys/barrier.h" +#endif namespace embree { diff --git a/third-party-programs-hpx.txt b/third-party-programs-hpx.txt new file mode 100644 index 0000000000..8e32d1c138 --- /dev/null +++ b/third-party-programs-hpx.txt @@ -0,0 +1,40 @@ +HPX Third Party Programs File + +This file contains the list of third party software ("third party programs") +contained in the Intel software and their required notices and/or license terms. +This third party software, even if included with the distribution of the Intel +software, may be governed by separate license terms, including without limitation, +third party license terms, other Intel software license terms, and open source +software license terms. These separate license terms govern your use of the third +party programs as set forth in the "third-party-programs.txt" or other similarlynamed text file. + +The third party programs and their corresponding required notices and/or license +terms are listed below. + +_______________________________________________________________________________________________________ + +1. HPX + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third-party-programs.txt b/third-party-programs.txt index eac909077e..1d071a2908 100644 --- a/third-party-programs.txt +++ b/third-party-programs.txt @@ -1819,3 +1819,5 @@ files. These additional third party program files are as follows: 12.4. Intel(R) oneAPI DPC++/C++ Compiler third-party-programs-oneAPI-DPCPP.txt file. +12.5 HPX + third-party-programs-hpx.txt file. diff --git a/tutorials/buildbench/buildbench_device.cpp b/tutorials/buildbench/buildbench_device.cpp index d0deba1992..a53ff0f579 100644 --- a/tutorials/buildbench/buildbench_device.cpp +++ b/tutorials/buildbench/buildbench_device.cpp @@ -5,6 +5,7 @@ #include "../common/tutorial/tutorial_device.h" #include "../common/tutorial/scene_device.h" +#include "../common/sys/barrier.h" #ifdef USE_GOOGLE_BENCHMARK #include @@ -469,7 +470,9 @@ namespace embree { RTCScene scene; void perform_work(size_t threadID) { +#if !defined(TASKING_HPX) setAffinity(threadID); +#endif while (true) { barrier.wait(); if (term) @@ -495,13 +498,24 @@ namespace embree { Helper helper; helper.barrier.init(numThreads); +#if !defined(TASKING_HPX) std::vector threads; +#else + std::vector> threads; +#endif threads.reserve(numThreads); /* ramp up threads */ +#if !defined(TASKING_HPX) setAffinity(0); +#endif + for (size_t i=1; i threads; +#else + std::vector> threads; +#endif threads.reserve(numThreads); /* ramp up threads */ setAffinity(0); for (size_t i=1; i> verify_k(K); + std::vector> verify_k(K); for (size_t i = 0; i < K; i++) verify_k[i].store(0); diff --git a/tutorials/embree_tests/common/algorithms/parallel_for_for_prefix_sum.cpp b/tutorials/embree_tests/common/algorithms/parallel_for_for_prefix_sum.cpp index ffdac70612..b8009d383e 100644 --- a/tutorials/embree_tests/common/algorithms/parallel_for_for_prefix_sum.cpp +++ b/tutorials/embree_tests/common/algorithms/parallel_for_for_prefix_sum.cpp @@ -16,7 +16,9 @@ TEST_CASE("Test parallel_for_for_prefix_sum", "[parallel_for_for_prefix_sum]") /* create vector with random numbers */ const size_t M = 10; - std::vector> flattened; +#if !defined(TASKING_HPX) + std::vector> flattened; +#endif typedef std::vector *> ArrayArray; ArrayArray array2(M); size_t K = 0; @@ -30,7 +32,7 @@ TEST_CASE("Test parallel_for_for_prefix_sum", "[parallel_for_for_prefix_sum]") } /* array to test global index */ - std::vector> verify_k(K); + std::vector> verify_k(K); for (size_t i = 0; i < K; i++) verify_k[i].store(0); @@ -49,7 +51,11 @@ TEST_CASE("Test parallel_for_for_prefix_sum", "[parallel_for_for_prefix_sum]") }, [](size_t v0, size_t v1) { return v0 + v1; }); /* create properly sized output array */ +#if !defined(TASKING_HPX) flattened.resize(S); +#else + std::vector> flattened(S); +#endif for (auto &a : flattened) a.store(0); diff --git a/tutorials/verify/verify.cpp b/tutorials/verify/verify.cpp index d248b8bf12..c769fdccc3 100644 --- a/tutorials/verify/verify.cpp +++ b/tutorials/verify/verify.cpp @@ -7,6 +7,7 @@ #include "../common/scenegraph/scenegraph.h" #include "../common/scenegraph/geometry_creation.h" #include "../common/math/closest_point.h" +#include "../common/sys/barrier.h" #include "../../common/algorithms/parallel_for.h" #include "../../common/simd/simd.h" #include "../../kernels/common/context.h"