diff --git a/algo/detectors/sts/StsHitfinder.cxx b/algo/detectors/sts/StsHitfinder.cxx index 4bed13ff029103417cbe4b885191fe84f1a39672..1b5933982ced0bb1b9ac023425ed5ffa3fb9f167 100644 --- a/algo/detectors/sts/StsHitfinder.cxx +++ b/algo/detectors/sts/StsHitfinder.cxx @@ -496,7 +496,7 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const // On GPU process all front clusters in parallel instead (one thread per cluster) // to fully utilize the GPU. // Currently use option 2 for both as it is faster on CPU as well. -#if XPU_IS_CPU +#if 0 int iModule = ctx.block_idx_x(); #else int iModule = 0; @@ -545,7 +545,7 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const float maxSigmaBoth = 4.f * xpu::sqrt(maxTerrF * maxTerrF + maxTerrB * maxTerrB); int startB = 0; -#if XPU_IS_CPU +#if 0 for (int iClusterF = ctx.thread_idx_x(); iClusterF < nClustersF; iClusterF += ctx.block_dim_x()) { #else int iClusterF = iThread; diff --git a/algo/detectors/sts/StsHitfinderChain.cxx b/algo/detectors/sts/StsHitfinderChain.cxx index d743d65d8168ee662e2ca3153abebc8fc900131e..e70bfe9f1c0c473e21bb306d5ca0d0e8a2c901a8 100644 --- a/algo/detectors/sts/StsHitfinderChain.cxx +++ b/algo/detectors/sts/StsHitfinderChain.cxx @@ -134,7 +134,12 @@ void sts::HitfinderChain::operator()(gsl::span<const CbmStsDigi> digis) queue.wait(); xpu::h_view nClusters {hfc.nClustersPerModule}; size_t nClustersFront = std::accumulate(nClusters.begin(), nClusters.begin() + nModules, 0); - bool isCpu = xpu::device::active().backend() == xpu::cpu; + + // FindHits supports to modes of parallelization: One thread per cluster or one block per module + // Currently we use method one for CPU and GPU. + // See sts::Hitfinder::FindHits() for details. + // bool isCpu = xpu::device::active().backend() == xpu::cpu; + bool isCpu = false; xpu::grid findHitsG = isCpu ? xpu::n_blocks(nModules) : xpu::n_threads(nClustersFront); queue.launch<FindHits>(findHitsG);