diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt index def6ea41efc2d18d249eda2fadc87a8d36fcf610..0a3ec4b62d36578e993ed8b8ee33b8e7610c0758 100644 --- a/algo/CMakeLists.txt +++ b/algo/CMakeLists.txt @@ -76,6 +76,7 @@ set(SRCS base/MainConfig.cxx base/RecoParams.cxx base/System.cxx + base/compat/Algorithm.cxx base/util/MemoryLogger.cxx base/util/StlUtils.cxx base/util/EnumDict.cxx @@ -207,6 +208,7 @@ target_link_libraries(Algo external::fles_ipc external::fles_monitoring cppzmq + poolstl ) target_compile_definitions(Algo PUBLIC NO_ROOT) xpu_attach(Algo ${DEVICE_SRCS}) diff --git a/algo/base/BuildInfo.h b/algo/base/BuildInfo.h index 679d18742e61610355f5adf319409f484ba31736..27213c3e054746202263672e5a014782d5fb76e8 100644 --- a/algo/base/BuildInfo.h +++ b/algo/base/BuildInfo.h @@ -6,12 +6,20 @@ #include <string> +#define MAKE_GCC_VERSION(major, minor, patch) ((major) *10000 + (minor) *100 + (patch)) +#define GCC_VERSION MAKE_GCC_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) + #if __has_include(<execution>) && !defined(__CLING__) #include <execution> // for feature test macro __cpp_lib_parallel_algorithm #endif #if defined(HAVE_TBB) && defined(__cpp_lib_parallel_algorithm) -#define HAVE_PARALLEL_ALGORITHM +#define HAVE_PARALLEL_STL_LIBTBB +#endif + +// PoolSTL triggers an internal error in GCC 10, so only enable it for GCC 11 and later +#if GCC_VERSION >= MAKE_GCC_VERSION(11, 0, 0) +#define HAVE_PARALLEL_STL_POOLSTL #endif #if __has_include(<omp.h>) @@ -37,8 +45,8 @@ namespace cbm::algo::BuildInfo false; #endif - inline constexpr bool WITH_PARALLEL_ALGORITHM = -#ifdef HAVE_PARALLEL_ALGORITHM + inline constexpr bool WITH_PARALLEL_STL = +#ifdef HAVE_PARALLEL_STL_LIBTBB true; #else false; diff --git a/algo/base/compat/Algorithm.cxx b/algo/base/compat/Algorithm.cxx new file mode 100644 index 0000000000000000000000000000000000000000..c8981744e49f91a900377abc5661c536dbd48506 --- /dev/null +++ b/algo/base/compat/Algorithm.cxx @@ -0,0 +1,12 @@ +/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ + +#include "Algorithm.h" + + +task_thread_pool::task_thread_pool& cbm::algo::GetGlobalSTLThreadPool() +{ + static task_thread_pool::task_thread_pool pool; + return pool; +} diff --git a/algo/base/compat/Algorithm.h b/algo/base/compat/Algorithm.h index b6864ab8223f406f2e612ea0a4cb9187ddb18868..7c02acaaf8a2bfcb2a1b93275d4858191e3f59e5 100644 --- a/algo/base/compat/Algorithm.h +++ b/algo/base/compat/Algorithm.h @@ -18,6 +18,8 @@ #include "BuildInfo.h" #include <algorithm> +#include <poolstl/algorithm> +#include <poolstl/execution> #ifdef HAVE_PARALLEL_ALGORITHM #include <execution> @@ -26,6 +28,14 @@ namespace cbm::algo { + /** + * @brief Get the global thread pool for parallel stl algorithms + * @details This function returns a reference to the global thread pool used by the parallel stl algorithms. + * At the beginning it's initialized with the number of available threads. Otherwise this function should only be + * used in conjunction with the parallel stl algorithms via poolstl. + **/ + task_thread_pool::task_thread_pool& GetGlobalSTLThreadPool(); + /** * @brief Wrapper for std::sort * @@ -38,9 +48,13 @@ namespace cbm::algo // Disable parallel sorting for the moment // The underlying implementation in libTBB has a massive memory leak: // https://community.intel.com/t5/Intel-oneAPI-Threading-Building/std-sort-std-execution-par-unseq-has-a-memory-leak-on-Linux/m-p/1580910 +// +// Update 2024-05-02: Add poolstl as a replacement for libTBB #if 0 -// #ifdef HAVE_PARALLEL_ALGORITHM +// #ifdef HAVE_PARALLEL_STL_LIBTBB std::sort(std::execution::par_unseq, first, last, comp); +#elif defined(HAVE_PARALLEL_STL_POOLSTL) + std::sort(poolstl::par.on(GetGlobalSTLThreadPool()), first, last, comp); #else std::sort(first, last, comp); #endif diff --git a/external/.gitignore b/external/.gitignore index 5b73f614f982eced3fea6b0117cadba9697d70a0..262b14dfca7e1a46c3ac86d75690d631be7059b7 100644 --- a/external/.gitignore +++ b/external/.gitignore @@ -15,3 +15,4 @@ xpu-dev GSL bba Hal/ +poolSTL/ diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 15fee5f0d73dea12223ab6d667ef381b1aecb080..d0843683605e1ffeec46e52a02158a289405d0ab 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -68,6 +68,7 @@ if(DOWNLOAD_EXTERNALS) endif() Include(InstallYamlCpp.cmake) + Include(InstallPoolSTL.cmake) if (NOT CBM_ONLINE_STANDALONE) # Not required for online standalone diff --git a/external/InstallPoolSTL.cmake b/external/InstallPoolSTL.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8fe1225361c9b9d7ad99374bf1bd7416646c8c1e --- /dev/null +++ b/external/InstallPoolSTL.cmake @@ -0,0 +1,21 @@ +set(POOLSTL_VERSION 5cf834a1625b4c0cb29785ec6e55280343e25436) # v0.3.5 - 2024-05-02 +set(POOLSTL_REPO https://github.com/alugowski/poolSTL.git) +set(POOLSTL_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/poolSTL) +set(POOLSTL_INCLUDE_DIRS ${POOLSTL_SRC_DIR}/include) + +download_project_if_needed(PROJECT poolstl + GIT_REPOSITORY ${POOLSTL_REPO} + GIT_TAG ${POOLSTL_VERSION} + SOURCE_DIR ${POOLSTL_SRC_DIR} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" +) + +# Can't use add_subdirectory because older cmake versions don't set SYSTEM property to avoid warnings +# add_subdirectory(${POOLSTL_SRC_DIR} SYSTEM) +add_library(poolstl INTERFACE) +target_include_directories(poolstl SYSTEM INTERFACE + $<BUILD_INTERFACE:${POOLSTL_INCLUDE_DIRS}> + $<INSTALL_INTERFACE:include> +) diff --git a/reco/app/cbmreco/main.cxx b/reco/app/cbmreco/main.cxx index b6c06bbde93a0fba5371e25f1f00c3f4269c211b..cf1f46e520d7b571f11616c5f39d2ce8e03ee0b8 100644 --- a/reco/app/cbmreco/main.cxx +++ b/reco/app/cbmreco/main.cxx @@ -9,6 +9,7 @@ #include "RecoResultsInputArchive.h" #include "RecoResultsOutputArchive.h" #include "System.h" +#include "compat/Algorithm.h" #include "compat/OpenMP.h" #include "gpu/DeviceImage.h" #include "util/MemoryLogger.h" @@ -150,9 +151,10 @@ int main(int argc, char** argv) L_(debug) << *ompThreads << " OpenMP threads requested"; openmp::SetNumThreads(*ompThreads); } + GetGlobalSTLThreadPool().set_num_threads(openmp::GetMaxThreads()); L_(info) << "CBMRECO buildType=" << BuildInfo::BUILD_TYPE << " gpuDebug=" << BuildInfo::GPU_DEBUG - << " parallelSTL=" << BuildInfo::WITH_PARALLEL_ALGORITHM << " OMP=" << BuildInfo::WITH_OMP + << " parallelSTL=" << BuildInfo::WITH_PARALLEL_STL << " OMP=" << BuildInfo::WITH_OMP << " ZSTD=" << BuildInfo::WITH_ZSTD << " commit=" << BuildInfo::GIT_HASH; std::stringstream ss; for (int i = 0; i < argc; i++) {