diff --git a/algo/detectors/sts/Hitfinder.cxx b/algo/detectors/sts/Hitfinder.cxx index dd85d750b65eec4df000c02a6d1d37edd95cbf62..bc129e2e645b6459cacfc099e00e6499fde93ad0 100644 --- a/algo/detectors/sts/Hitfinder.cxx +++ b/algo/detectors/sts/Hitfinder.cxx @@ -222,18 +222,45 @@ XPU_D void sts::Hitfinder::FindClustersParallel(FindClusters::context& ctx) cons */ XPU_D void sts::Hitfinder::CalculateClustersParallel(FindClusters::context& ctx) const { - int const iModule = ctx.block_idx_x(); - CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]]; - ; - auto const nDigis = GetNDigis(iModule); + const int nModuleSides = 2 * nModules; - if (nDigis == 0) return; + int iModule = 0; + int iThread = ctx.block_dim_x() * ctx.block_idx_x() + ctx.thread_idx_x(); + + for (; iModule < nModuleSides; iModule++) { + i32 nDigis = GetNDigis(iModule); + if (iThread < nDigis) { + break; + } + iThread -= nDigis; + } + + if (iModule >= nModuleSides) { + return; + } + + const CbmStsDigi* digis = &digisPerModule[digiOffsetPerModule[iModule]]; + auto* digiConnector = &digiConnectorsPerModule[digiOffsetPerModule[iModule]]; - auto* digiConnector = &digiConnectorsPerModule[digiOffsetPerModule[iModule]]; - // auto* channelOffsets = &channelOffsetPerModule[iModule * nChannels]; + // Local index of digi in sensor + const int iDigi = iThread; - // calculateClustersChannelWise(digis, digiConnector, channelOffsets, iModule, threadId, nDigis); - CalculateClustersDigiWise(ctx, digis, digiConnector, nDigis); + if (digiConnector[iDigi].HasPrevious()) { + return; + } + + if (!digiConnector[iDigi].HasNext()) { + // Cluster has 1 element + CreateClusterFromConnectors1(iModule, digis, iDigi); + } + else if (!digiConnector[digiConnector[iDigi].next()].HasNext()) { + // Cluster has 2 elements + CreateClusterFromConnectors2(iModule, digis, digiConnector, iDigi); + } + else { + // Cluster has >2 elements + CreateClusterFromConnectorsN(iModule, digis, digiConnector, iDigi); + } } /** @@ -264,23 +291,10 @@ XPU_D void sts::Hitfinder::CalculateClustersDigiWise(FindClusters::context& ctx, for (unsigned int currIter = ctx.thread_idx_x(); currIter < nDigis; currIter += (unsigned int) ctx.block_dim_x()) { if (digiConnector[currIter].HasPrevious()) continue; - - if (!digiConnector[currIter].HasNext()) { - //if Cluster has 1 element - CreateClusterFromConnectors1(iModule, digis, currIter); - } - else if (!digiConnector[digiConnector[currIter].next()].HasNext()) { - //if Cluster has 2 elements - CreateClusterFromConnectors2(iModule, digis, digiConnector, currIter); - } - else { - //if Cluster has N elements - CreateClusterFromConnectorsN(iModule, digis, digiConnector, currIter); - } } } -XPU_D void sts::Hitfinder::CreateClusterFromConnectors1(int const iModule, CbmStsDigi* digis, int digiIndex) const +XPU_D void sts::Hitfinder::CreateClusterFromConnectors1(int const iModule, const CbmStsDigi* digis, int digiIndex) const { const CbmStsDigi& digi = digis[digiIndex]; @@ -302,7 +316,7 @@ XPU_D void sts::Hitfinder::CreateClusterFromConnectors1(int const iModule, CbmSt AddCluster(iModule, time, cluster); } -XPU_D void sts::Hitfinder::CreateClusterFromConnectors2(int const iModule, CbmStsDigi* digis, +XPU_D void sts::Hitfinder::CreateClusterFromConnectors2(int const iModule, const CbmStsDigi* digis, sts::DigiConnector* digiConnector, int const digiIndex) const { @@ -375,7 +389,7 @@ XPU_D void sts::Hitfinder::CreateClusterFromConnectors2(int const iModule, CbmSt AddCluster(iModule, time, cls); } -XPU_D void sts::Hitfinder::CreateClusterFromConnectorsN(int iModule, CbmStsDigi* digis, +XPU_D void sts::Hitfinder::CreateClusterFromConnectorsN(int iModule, const CbmStsDigi* digis, sts::DigiConnector* digiConnector, int digiIndex) const { ClusterCalculationProperties cProps; @@ -634,7 +648,7 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const IntersectClusters(iModule, pars, clsIdxF, clsDataF, clsIdxB, clsDataB); } } -// clang-format on + // clang-format on } XPU_D void sts::Hitfinder::IntersectClusters(int iBlock, const HitfinderCache& pars, const ClusterIdx& idxF, diff --git a/algo/detectors/sts/Hitfinder.h b/algo/detectors/sts/Hitfinder.h index af69fab65a2451fd91ee77191962412a207cbc45..383e48cd226534e9627804f42904bade77d7d823 100644 --- a/algo/detectors/sts/Hitfinder.h +++ b/algo/detectors/sts/Hitfinder.h @@ -322,10 +322,10 @@ namespace cbm::algo::sts XPU_D void CalculateClustersDigiWise(FindClusters::context& ctx, CbmStsDigi* digis, DigiConnector* digiConnector, unsigned int const nDigis) const; - XPU_D void CreateClusterFromConnectors1(int const iModule, CbmStsDigi* digis, int const digiIndex) const; - XPU_D void CreateClusterFromConnectors2(int const iModule, CbmStsDigi* digis, DigiConnector* digiConnector, + XPU_D void CreateClusterFromConnectors1(int const iModule, const CbmStsDigi* digis, int const digiIndex) const; + XPU_D void CreateClusterFromConnectors2(int const iModule, const CbmStsDigi* digis, DigiConnector* digiConnector, int const digiIndex) const; - XPU_D void CreateClusterFromConnectorsN(int const iModule, CbmStsDigi* digis, DigiConnector* digiConnector, + XPU_D void CreateClusterFromConnectorsN(int const iModule, const CbmStsDigi* digis, DigiConnector* digiConnector, int const digiIndex) const; private: @@ -361,7 +361,7 @@ namespace cbm::algo::sts ClusterIdx* tgtIdx = &clusterIdxPerModule[iModule * maxClustersPerModule]; sts::Cluster* tgtData = &clusterDataPerModule[iModule * maxClustersPerModule]; - u32 pos = xpu::atomic_add_block(&nClustersPerModule[iModule], 1); + u32 pos = xpu::atomic_add(&nClustersPerModule[iModule], 1); if (size_t(pos) >= maxClustersPerModule) { xpu::atomic_add(&monitor->fNumClusterBucketOverflow, 1); diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx index 68d253d2592ce0e1e6f6856d3bcb9c004cdba457..280911c5c2125eff15efe0cec2c4da25fb4acff7 100644 --- a/algo/detectors/sts/HitfinderChain.cxx +++ b/algo/detectors/sts/HitfinderChain.cxx @@ -141,7 +141,7 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS xpu::k_add_bytes<ChannelOffsets>(nDigisTotal * sizeof(CbmStsDigi)); queue.launch<CreateDigiConnections>(xpu::n_threads(nDigisTotal)); xpu::k_add_bytes<CreateDigiConnections>(nDigisTotal * sizeof(CbmStsDigi)); - queue.launch<CreateClusters>(xpu::n_blocks(nModuleSides)); + queue.launch<CreateClusters>(xpu::n_threads(nDigisTotal)); xpu::k_add_bytes<CreateClusters>(nDigisTotal * sizeof(CbmStsDigi)); } if (Opts().LogLevel() == trace) { @@ -167,12 +167,6 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS EnsureClustersSane(clusterIdxPerModule, nClustersPerModule); } - // Run cluster finding steps in indivual kernels, useful for debugging / profiling - // xpu::run_kernel<CalculateOffsets>(xpu::grid::n_blocks(hfc.nModules * 2)); - // xpu::run_kernel<FindClustersBasic>(xpu::grid::n_blocks(hfc.nModules * 2)); - // xpu::run_kernel<CalculateClusters>(xpu::grid::n_blocks(hfc.nModules * 2)); - // xpu::run_kernel<FindClustersBasic>(xpu::grid::n_blocks(hfc.nModules * 2)); - // xpu::run_kernel<CalculateClustersBasic>(xpu::grid::n_blocks(hfc.nModules * 2)); L_(debug) << "STS Hitfinder Chain: Sort Clusters..."; queue.launch<SortClusters>(xpu::n_blocks(nModuleSides));