From aae18a40c2e015a40ab7df5146768bf7c27357b4 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer <weiglhofer@fias.uni-frankfurt.de> Date: Fri, 3 May 2024 09:06:39 +0000 Subject: [PATCH] external: Add poolSTL as alternative for parallel sorting. --- algo/CMakeLists.txt | 2 ++ algo/base/BuildInfo.h | 14 +++++++++++--- algo/base/compat/Algorithm.cxx | 12 ++++++++++++ algo/base/compat/Algorithm.h | 16 +++++++++++++++- external/.gitignore | 1 + external/CMakeLists.txt | 1 + external/InstallPoolSTL.cmake | 21 +++++++++++++++++++++ reco/app/cbmreco/main.cxx | 4 +++- 8 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 algo/base/compat/Algorithm.cxx create mode 100644 external/InstallPoolSTL.cmake diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt index def6ea41ef..0a3ec4b62d 100644 --- a/algo/CMakeLists.txt +++ b/algo/CMakeLists.txt @@ -76,6 +76,7 @@ set(SRCS base/MainConfig.cxx base/RecoParams.cxx base/System.cxx + base/compat/Algorithm.cxx base/util/MemoryLogger.cxx base/util/StlUtils.cxx base/util/EnumDict.cxx @@ -207,6 +208,7 @@ target_link_libraries(Algo external::fles_ipc external::fles_monitoring cppzmq + poolstl ) target_compile_definitions(Algo PUBLIC NO_ROOT) xpu_attach(Algo ${DEVICE_SRCS}) diff --git a/algo/base/BuildInfo.h b/algo/base/BuildInfo.h index 679d18742e..27213c3e05 100644 --- a/algo/base/BuildInfo.h +++ b/algo/base/BuildInfo.h @@ -6,12 +6,20 @@ #include <string> +#define MAKE_GCC_VERSION(major, minor, patch) ((major) *10000 + (minor) *100 + (patch)) +#define GCC_VERSION MAKE_GCC_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) + #if __has_include(<execution>) && !defined(__CLING__) #include <execution> // for feature test macro __cpp_lib_parallel_algorithm #endif #if defined(HAVE_TBB) && defined(__cpp_lib_parallel_algorithm) -#define HAVE_PARALLEL_ALGORITHM +#define HAVE_PARALLEL_STL_LIBTBB +#endif + +// PoolSTL triggers an internal error in GCC 10, so only enable it for GCC 11 and later +#if GCC_VERSION >= MAKE_GCC_VERSION(11, 0, 0) +#define HAVE_PARALLEL_STL_POOLSTL #endif #if __has_include(<omp.h>) @@ -37,8 +45,8 @@ namespace cbm::algo::BuildInfo false; #endif - inline constexpr bool WITH_PARALLEL_ALGORITHM = -#ifdef HAVE_PARALLEL_ALGORITHM + inline constexpr bool WITH_PARALLEL_STL = +#ifdef HAVE_PARALLEL_STL_LIBTBB true; #else false; diff --git a/algo/base/compat/Algorithm.cxx b/algo/base/compat/Algorithm.cxx new file mode 100644 index 0000000000..c8981744e4 --- /dev/null +++ b/algo/base/compat/Algorithm.cxx @@ -0,0 +1,12 @@ +/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ + +#include "Algorithm.h" + + +task_thread_pool::task_thread_pool& cbm::algo::GetGlobalSTLThreadPool() +{ + static task_thread_pool::task_thread_pool pool; + return pool; +} diff --git a/algo/base/compat/Algorithm.h b/algo/base/compat/Algorithm.h index b6864ab822..7c02acaaf8 100644 --- a/algo/base/compat/Algorithm.h +++ b/algo/base/compat/Algorithm.h @@ -18,6 +18,8 @@ #include "BuildInfo.h" #include <algorithm> +#include <poolstl/algorithm> +#include <poolstl/execution> #ifdef HAVE_PARALLEL_ALGORITHM #include <execution> @@ -26,6 +28,14 @@ namespace cbm::algo { + /** + * @brief Get the global thread pool for parallel stl algorithms + * @details This function returns a reference to the global thread pool used by the parallel stl algorithms. + * At the beginning it's initialized with the number of available threads. Otherwise this function should only be + * used in conjunction with the parallel stl algorithms via poolstl. + **/ + task_thread_pool::task_thread_pool& GetGlobalSTLThreadPool(); + /** * @brief Wrapper for std::sort * @@ -38,9 +48,13 @@ namespace cbm::algo // Disable parallel sorting for the moment // The underlying implementation in libTBB has a massive memory leak: // https://community.intel.com/t5/Intel-oneAPI-Threading-Building/std-sort-std-execution-par-unseq-has-a-memory-leak-on-Linux/m-p/1580910 +// +// Update 2024-05-02: Add poolstl as a replacement for libTBB #if 0 -// #ifdef HAVE_PARALLEL_ALGORITHM +// #ifdef HAVE_PARALLEL_STL_LIBTBB std::sort(std::execution::par_unseq, first, last, comp); +#elif defined(HAVE_PARALLEL_STL_POOLSTL) + std::sort(poolstl::par.on(GetGlobalSTLThreadPool()), first, last, comp); #else std::sort(first, last, comp); #endif diff --git a/external/.gitignore b/external/.gitignore index 5b73f614f9..262b14dfca 100644 --- a/external/.gitignore +++ b/external/.gitignore @@ -15,3 +15,4 @@ xpu-dev GSL bba Hal/ +poolSTL/ diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 15fee5f0d7..d084368360 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -68,6 +68,7 @@ if(DOWNLOAD_EXTERNALS) endif() Include(InstallYamlCpp.cmake) + Include(InstallPoolSTL.cmake) if (NOT CBM_ONLINE_STANDALONE) # Not required for online standalone diff --git a/external/InstallPoolSTL.cmake b/external/InstallPoolSTL.cmake new file mode 100644 index 0000000000..8fe1225361 --- /dev/null +++ b/external/InstallPoolSTL.cmake @@ -0,0 +1,21 @@ +set(POOLSTL_VERSION 5cf834a1625b4c0cb29785ec6e55280343e25436) # v0.3.5 - 2024-05-02 +set(POOLSTL_REPO https://github.com/alugowski/poolSTL.git) +set(POOLSTL_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/poolSTL) +set(POOLSTL_INCLUDE_DIRS ${POOLSTL_SRC_DIR}/include) + +download_project_if_needed(PROJECT poolstl + GIT_REPOSITORY ${POOLSTL_REPO} + GIT_TAG ${POOLSTL_VERSION} + SOURCE_DIR ${POOLSTL_SRC_DIR} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" +) + +# Can't use add_subdirectory because older cmake versions don't set SYSTEM property to avoid warnings +# add_subdirectory(${POOLSTL_SRC_DIR} SYSTEM) +add_library(poolstl INTERFACE) +target_include_directories(poolstl SYSTEM INTERFACE + $<BUILD_INTERFACE:${POOLSTL_INCLUDE_DIRS}> + $<INSTALL_INTERFACE:include> +) diff --git a/reco/app/cbmreco/main.cxx b/reco/app/cbmreco/main.cxx index b6c06bbde9..cf1f46e520 100644 --- a/reco/app/cbmreco/main.cxx +++ b/reco/app/cbmreco/main.cxx @@ -9,6 +9,7 @@ #include "RecoResultsInputArchive.h" #include "RecoResultsOutputArchive.h" #include "System.h" +#include "compat/Algorithm.h" #include "compat/OpenMP.h" #include "gpu/DeviceImage.h" #include "util/MemoryLogger.h" @@ -150,9 +151,10 @@ int main(int argc, char** argv) L_(debug) << *ompThreads << " OpenMP threads requested"; openmp::SetNumThreads(*ompThreads); } + GetGlobalSTLThreadPool().set_num_threads(openmp::GetMaxThreads()); L_(info) << "CBMRECO buildType=" << BuildInfo::BUILD_TYPE << " gpuDebug=" << BuildInfo::GPU_DEBUG - << " parallelSTL=" << BuildInfo::WITH_PARALLEL_ALGORITHM << " OMP=" << BuildInfo::WITH_OMP + << " parallelSTL=" << BuildInfo::WITH_PARALLEL_STL << " OMP=" << BuildInfo::WITH_OMP << " ZSTD=" << BuildInfo::WITH_ZSTD << " commit=" << BuildInfo::GIT_HASH; std::stringstream ss; for (int i = 0; i < argc; i++) { -- GitLab