From aae18a40c2e015a40ab7df5146768bf7c27357b4 Mon Sep 17 00:00:00 2001
From: Felix Weiglhofer <weiglhofer@fias.uni-frankfurt.de>
Date: Fri, 3 May 2024 09:06:39 +0000
Subject: [PATCH] external: Add poolSTL as alternative for parallel sorting.

---
 algo/CMakeLists.txt            |  2 ++
 algo/base/BuildInfo.h          | 14 +++++++++++---
 algo/base/compat/Algorithm.cxx | 12 ++++++++++++
 algo/base/compat/Algorithm.h   | 16 +++++++++++++++-
 external/.gitignore            |  1 +
 external/CMakeLists.txt        |  1 +
 external/InstallPoolSTL.cmake  | 21 +++++++++++++++++++++
 reco/app/cbmreco/main.cxx      |  4 +++-
 8 files changed, 66 insertions(+), 5 deletions(-)
 create mode 100644 algo/base/compat/Algorithm.cxx
 create mode 100644 external/InstallPoolSTL.cmake

diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt
index def6ea41ef..0a3ec4b62d 100644
--- a/algo/CMakeLists.txt
+++ b/algo/CMakeLists.txt
@@ -76,6 +76,7 @@ set(SRCS
   base/MainConfig.cxx
   base/RecoParams.cxx
   base/System.cxx
+  base/compat/Algorithm.cxx
   base/util/MemoryLogger.cxx
   base/util/StlUtils.cxx
   base/util/EnumDict.cxx
@@ -207,6 +208,7 @@ target_link_libraries(Algo
             external::fles_ipc
             external::fles_monitoring
             cppzmq
+            poolstl
 )
 target_compile_definitions(Algo PUBLIC NO_ROOT)
 xpu_attach(Algo ${DEVICE_SRCS})
diff --git a/algo/base/BuildInfo.h b/algo/base/BuildInfo.h
index 679d18742e..27213c3e05 100644
--- a/algo/base/BuildInfo.h
+++ b/algo/base/BuildInfo.h
@@ -6,12 +6,20 @@
 
 #include <string>
 
+#define MAKE_GCC_VERSION(major, minor, patch) ((major) *10000 + (minor) *100 + (patch))
+#define GCC_VERSION MAKE_GCC_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+
 #if __has_include(<execution>) && !defined(__CLING__)
 #include <execution>  // for feature test macro __cpp_lib_parallel_algorithm
 #endif
 
 #if defined(HAVE_TBB) && defined(__cpp_lib_parallel_algorithm)
-#define HAVE_PARALLEL_ALGORITHM
+#define HAVE_PARALLEL_STL_LIBTBB
+#endif
+
+// PoolSTL triggers an internal error in GCC 10, so only enable it for GCC 11 and later
+#if GCC_VERSION >= MAKE_GCC_VERSION(11, 0, 0)
+#define HAVE_PARALLEL_STL_POOLSTL
 #endif
 
 #if __has_include(<omp.h>)
@@ -37,8 +45,8 @@ namespace cbm::algo::BuildInfo
     false;
 #endif
 
-  inline constexpr bool WITH_PARALLEL_ALGORITHM =
-#ifdef HAVE_PARALLEL_ALGORITHM
+  inline constexpr bool WITH_PARALLEL_STL =
+#ifdef HAVE_PARALLEL_STL_LIBTBB
     true;
 #else
     false;
diff --git a/algo/base/compat/Algorithm.cxx b/algo/base/compat/Algorithm.cxx
new file mode 100644
index 0000000000..c8981744e4
--- /dev/null
+++ b/algo/base/compat/Algorithm.cxx
@@ -0,0 +1,12 @@
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+
+#include "Algorithm.h"
+
+
+task_thread_pool::task_thread_pool& cbm::algo::GetGlobalSTLThreadPool()
+{
+  static task_thread_pool::task_thread_pool pool;
+  return pool;
+}
diff --git a/algo/base/compat/Algorithm.h b/algo/base/compat/Algorithm.h
index b6864ab822..7c02acaaf8 100644
--- a/algo/base/compat/Algorithm.h
+++ b/algo/base/compat/Algorithm.h
@@ -18,6 +18,8 @@
 #include "BuildInfo.h"
 
 #include <algorithm>
+#include <poolstl/algorithm>
+#include <poolstl/execution>
 
 #ifdef HAVE_PARALLEL_ALGORITHM
 #include <execution>
@@ -26,6 +28,14 @@
 namespace cbm::algo
 {
 
+  /**
+   * @brief Get the global thread pool for parallel stl algorithms
+   * @details This function returns a reference to the global thread pool used by the parallel stl algorithms.
+   * At the beginning it's initialized with the number of available threads. Otherwise this function should only be
+   * used in conjunction with the parallel stl algorithms via poolstl.
+  **/
+  task_thread_pool::task_thread_pool& GetGlobalSTLThreadPool();
+
   /**
    * @brief Wrapper for std::sort
    *
@@ -38,9 +48,13 @@ namespace cbm::algo
 // Disable parallel sorting for the moment
 // The underlying implementation in libTBB has a massive memory leak:
 // https://community.intel.com/t5/Intel-oneAPI-Threading-Building/std-sort-std-execution-par-unseq-has-a-memory-leak-on-Linux/m-p/1580910
+//
+// Update 2024-05-02: Add poolstl as a replacement for libTBB
 #if 0
-// #ifdef HAVE_PARALLEL_ALGORITHM
+// #ifdef HAVE_PARALLEL_STL_LIBTBB
     std::sort(std::execution::par_unseq, first, last, comp);
+#elif defined(HAVE_PARALLEL_STL_POOLSTL)
+    std::sort(poolstl::par.on(GetGlobalSTLThreadPool()), first, last, comp);
 #else
     std::sort(first, last, comp);
 #endif
diff --git a/external/.gitignore b/external/.gitignore
index 5b73f614f9..262b14dfca 100644
--- a/external/.gitignore
+++ b/external/.gitignore
@@ -15,3 +15,4 @@ xpu-dev
 GSL
 bba
 Hal/
+poolSTL/
diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt
index 15fee5f0d7..d084368360 100644
--- a/external/CMakeLists.txt
+++ b/external/CMakeLists.txt
@@ -68,6 +68,7 @@ if(DOWNLOAD_EXTERNALS)
   endif()
 
   Include(InstallYamlCpp.cmake)
+  Include(InstallPoolSTL.cmake)
 
   if (NOT CBM_ONLINE_STANDALONE) # Not required for online standalone
 
diff --git a/external/InstallPoolSTL.cmake b/external/InstallPoolSTL.cmake
new file mode 100644
index 0000000000..8fe1225361
--- /dev/null
+++ b/external/InstallPoolSTL.cmake
@@ -0,0 +1,21 @@
+set(POOLSTL_VERSION 5cf834a1625b4c0cb29785ec6e55280343e25436) # v0.3.5 - 2024-05-02
+set(POOLSTL_REPO https://github.com/alugowski/poolSTL.git)
+set(POOLSTL_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/poolSTL)
+set(POOLSTL_INCLUDE_DIRS ${POOLSTL_SRC_DIR}/include)
+
+download_project_if_needed(PROJECT poolstl
+  GIT_REPOSITORY ${POOLSTL_REPO}
+  GIT_TAG ${POOLSTL_VERSION}
+  SOURCE_DIR ${POOLSTL_SRC_DIR}
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  INSTALL_COMMAND ""
+)
+
+# Can't use add_subdirectory because older cmake versions don't set SYSTEM property to avoid warnings
+# add_subdirectory(${POOLSTL_SRC_DIR} SYSTEM)
+add_library(poolstl INTERFACE)
+target_include_directories(poolstl SYSTEM INTERFACE
+  $<BUILD_INTERFACE:${POOLSTL_INCLUDE_DIRS}>
+  $<INSTALL_INTERFACE:include>
+)
diff --git a/reco/app/cbmreco/main.cxx b/reco/app/cbmreco/main.cxx
index b6c06bbde9..cf1f46e520 100644
--- a/reco/app/cbmreco/main.cxx
+++ b/reco/app/cbmreco/main.cxx
@@ -9,6 +9,7 @@
 #include "RecoResultsInputArchive.h"
 #include "RecoResultsOutputArchive.h"
 #include "System.h"
+#include "compat/Algorithm.h"
 #include "compat/OpenMP.h"
 #include "gpu/DeviceImage.h"
 #include "util/MemoryLogger.h"
@@ -150,9 +151,10 @@ int main(int argc, char** argv)
     L_(debug) << *ompThreads << " OpenMP threads requested";
     openmp::SetNumThreads(*ompThreads);
   }
+  GetGlobalSTLThreadPool().set_num_threads(openmp::GetMaxThreads());
 
   L_(info) << "CBMRECO buildType=" << BuildInfo::BUILD_TYPE << " gpuDebug=" << BuildInfo::GPU_DEBUG
-           << " parallelSTL=" << BuildInfo::WITH_PARALLEL_ALGORITHM << " OMP=" << BuildInfo::WITH_OMP
+           << " parallelSTL=" << BuildInfo::WITH_PARALLEL_STL << " OMP=" << BuildInfo::WITH_OMP
            << " ZSTD=" << BuildInfo::WITH_ZSTD << " commit=" << BuildInfo::GIT_HASH;
   std::stringstream ss;
   for (int i = 0; i < argc; i++) {
-- 
GitLab