diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt index 67f3fae56624c33dce195a12c791c017fc860af9..fa27d8b4c8e6ad3599121e88a77b4c63f5df6ea3 100644 --- a/algo/CMakeLists.txt +++ b/algo/CMakeLists.txt @@ -102,6 +102,11 @@ target_link_libraries(Algo target_compile_definitions(Algo PUBLIC NO_ROOT) xpu_attach(Algo ${DEVICE_SRCS}) +# Link against OpenMP if available +if (OpenMP_CXX_FOUND) + target_link_libraries(Algo PUBLIC OpenMP::OpenMP_CXX) +endif() + # Try to enable parallel execution in c++17 if TBB is available if (CMAKE_SYSTEM_NAME STREQUAL "Linux") list(APPEND CMAKE_PREFIX_PATH "/opt/intel/oneapi/tbb/latest/") diff --git a/algo/base/BuildInfo.h b/algo/base/BuildInfo.h index bdacc7a166b19cf390075c6ec7e28415c32f9fe4..e96a9cb89c3d4ffa69058a13f983fca5c417276d 100644 --- a/algo/base/BuildInfo.h +++ b/algo/base/BuildInfo.h @@ -10,6 +10,10 @@ #define HAVE_PARALLEL_ALGORITHM #endif +#if __has_include(<omp.h>) +#define HAVE_OMP +#endif + namespace cbm::algo::BuildInfo { @@ -31,6 +35,13 @@ namespace cbm::algo::BuildInfo false; #endif + inline constexpr bool WITH_OMP = +#ifdef HAVE_OMP + true; +#else + false; +#endif + } // namespace cbm::algo::BuildInfo #endif // CBM_ALGO_BUILD_INFO_H diff --git a/algo/base/Options.cxx b/algo/base/Options.cxx index cc1be8932090bd96c5f80e2ea1c600d649ff1075..8a30d58307849426b188bf3c8a66b2f417759589 100644 --- a/algo/base/Options.cxx +++ b/algo/base/Options.cxx @@ -84,6 +84,8 @@ Options::Options(int argc, char** argv) "Stop after <num> timeslices (-1 = all)") ("skip-ts", po::value(&fSkipTimeslices)->default_value(0)->value_name("<num>"), "Skip first <num> timeslices") + ("omp", po::value(&fNumOMPThreads)->default_value(-1)->value_name("<num>"), + "Set number of OpenMP threads (-1 = use OMP_NUM_THREADS environment variable)") ("times,t", po::bool_switch(&fCollectKernelTimes)->default_value(false), "print kernel times") ("help,h", diff --git a/algo/base/Options.h b/algo/base/Options.h index d3c387e467ec9425831a752b399e5da631dd6cb7..182a76e6d10e050928075f547540254d8ff8e540 100644 --- a/algo/base/Options.h +++ b/algo/base/Options.h @@ -35,6 +35,13 @@ namespace cbm::algo bool CollectKernelTimes() const { return fCollectKernelTimes; } int NumTimeslices() const { return fNumTimeslices; } int SkipTimeslices() const { return fSkipTimeslices; } + + std::optional<int> NumOMPThreads() const + { + // omp doesn't allow negative number of threads, so we use -1 to indicate that the user didn't specify a number + // and omp should use OMP_NUM_THREADS environment variable or the default instead + return fNumOMPThreads > 0 ? std::make_optional(fNumOMPThreads) : std::nullopt; + } const std::string& ChildId() const { return fChildId; } const std::vector<Step>& Steps() const { return fRecoSteps; } @@ -64,6 +71,7 @@ namespace cbm::algo bool fCollectKernelTimes = false; int fNumTimeslices = -1; int fSkipTimeslices = 0; + int fNumOMPThreads = -1; std::vector<Step> fRecoSteps; std::vector<RecoData> fOutputTypes; std::vector<fles::Subsystem> fDetectors; diff --git a/algo/base/compat/OpenMP.h b/algo/base/compat/OpenMP.h new file mode 100644 index 0000000000000000000000000000000000000000..9129cdde38b7e09184ad95943abc0ea1570ba305 --- /dev/null +++ b/algo/base/compat/OpenMP.h @@ -0,0 +1,28 @@ +/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ +#ifndef CBM_ALGO_BASE_COMPAT_OPENMP_H +#define CBM_ALGO_BASE_COMPAT_OPENMP_H + +#include "BuildInfo.h" + +#ifdef HAVE_OMP +#include <omp.h> +#endif + +namespace cbm::algo::openmp +{ + +#ifndef HAVE_OMP + inline int GetMaxThreads() { return 1; } + inline int GetThreadNum() { return 0; } + inline void SetNumThreads(int) {} +#else + inline int GetMaxThreads() { return omp_get_max_threads(); } + inline int GetThreadNum() { return omp_get_thread_num(); } + inline void SetNumThreads(int n) { omp_set_num_threads(n); } +#endif + +} // namespace cbm::algo::openmp + +#endif // CBM_ALGO_BASE_COMPAT_OPENMP_H diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx index b1a225fec74ff2d0f2a6ad6c8405dbca8118e48e..f47691b519bd87a2e3282300560e43ffd4659882 100644 --- a/algo/global/Reco.cxx +++ b/algo/global/Reco.cxx @@ -10,6 +10,7 @@ #include <xpu/host.h> +#include "compat/OpenMP.h" #include "config/Yaml.h" #include "evbuild/Config.h" #include "log.hpp" @@ -48,7 +49,8 @@ void Reco::Init(const Options& opts) fStsHitFinder.SetContext(&fContext); xpu::device_prop props {xpu::device::active()}; - L_(info) << "Running CBM Reco on Device " << props.name(); + L_(info) << "Running CBM Reco on Device '" << props.name() << "' (Using " << openmp::GetMaxThreads() + << " OpenMP threads)"; if (!opts.MonitorUri().empty()) { fContext.monitor = std::make_unique<cbm::Monitor>(opts.MonitorUri()); diff --git a/reco/app/cbmreco/main.cxx b/reco/app/cbmreco/main.cxx index 7edd4a4cb684c8ca67a2be6c9e01db2a4dcfe2d0..18ea5e3d0b2111e86330a32288c91239afc849fa 100644 --- a/reco/app/cbmreco/main.cxx +++ b/reco/app/cbmreco/main.cxx @@ -14,6 +14,7 @@ #include "BuildInfo.h" #include "Options.h" #include "Reco.h" +#include "compat/OpenMP.h" using namespace cbm::algo; @@ -36,8 +37,15 @@ int main(int argc, char** argv) xpu::initialize(settings); xpu::preload<GPUReco>(); + auto ompThreads = opts.NumOMPThreads(); + if (ompThreads) { + L_(debug) << *ompThreads << " OpenMP threads requested"; + openmp::SetNumThreads(*ompThreads); + } + L_(info) << "CBMRECO buildType=" << BuildInfo::BUILD_TYPE << " gpuDebug=" << BuildInfo::GPU_DEBUG - << " parallelSTL=" << BuildInfo::WITH_PARALLEL_ALGORITHM << " commit=" << BuildInfo::GIT_HASH; + << " parallelSTL=" << BuildInfo::WITH_PARALLEL_ALGORITHM << " OMP=" << BuildInfo::WITH_OMP + << " commit=" << BuildInfo::GIT_HASH; std::stringstream ss; for (int i = 0; i < argc; i++) { ss << argv[i] << " "; @@ -47,7 +55,6 @@ int main(int argc, char** argv) Reco reco; reco.Init(opts); - Archive archive(ArchiveDescriptor {}); // TODO: use opts.Detector() once detector flag is merged fles::TimesliceAutoSource source {opts.InputLocator()}; int tsIdx = 0;