From 7aab8d24d1250c0594bb87efa219a7cff41e86b5 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer <weiglhofer@fias.uni-frankfurt.de> Date: Tue, 11 Jul 2023 12:29:25 +0000 Subject: [PATCH] alg::sts::Hitfinder: Use more efficient parallelization on CPU. --- algo/detectors/sts/StsHitfinder.cxx | 4 ++-- algo/detectors/sts/StsHitfinderChain.cxx | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/algo/detectors/sts/StsHitfinder.cxx b/algo/detectors/sts/StsHitfinder.cxx index 4bed13ff02..1b5933982c 100644 --- a/algo/detectors/sts/StsHitfinder.cxx +++ b/algo/detectors/sts/StsHitfinder.cxx @@ -496,7 +496,7 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const // On GPU process all front clusters in parallel instead (one thread per cluster) // to fully utilize the GPU. // Currently use option 2 for both as it is faster on CPU as well. -#if XPU_IS_CPU +#if 0 int iModule = ctx.block_idx_x(); #else int iModule = 0; @@ -545,7 +545,7 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const float maxSigmaBoth = 4.f * xpu::sqrt(maxTerrF * maxTerrF + maxTerrB * maxTerrB); int startB = 0; -#if XPU_IS_CPU +#if 0 for (int iClusterF = ctx.thread_idx_x(); iClusterF < nClustersF; iClusterF += ctx.block_dim_x()) { #else int iClusterF = iThread; diff --git a/algo/detectors/sts/StsHitfinderChain.cxx b/algo/detectors/sts/StsHitfinderChain.cxx index d743d65d81..e70bfe9f1c 100644 --- a/algo/detectors/sts/StsHitfinderChain.cxx +++ b/algo/detectors/sts/StsHitfinderChain.cxx @@ -134,7 +134,12 @@ void sts::HitfinderChain::operator()(gsl::span<const CbmStsDigi> digis) queue.wait(); xpu::h_view nClusters {hfc.nClustersPerModule}; size_t nClustersFront = std::accumulate(nClusters.begin(), nClusters.begin() + nModules, 0); - bool isCpu = xpu::device::active().backend() == xpu::cpu; + + // FindHits supports to modes of parallelization: One thread per cluster or one block per module + // Currently we use method one for CPU and GPU. + // See sts::Hitfinder::FindHits() for details. + // bool isCpu = xpu::device::active().backend() == xpu::cpu; + bool isCpu = false; xpu::grid findHitsG = isCpu ? xpu::n_blocks(nModules) : xpu::n_threads(nClustersFront); queue.launch<FindHits>(findHitsG); -- GitLab