diff --git a/algo/detectors/sts/StsHitfinder.cxx b/algo/detectors/sts/StsHitfinder.cxx index 0e11237f0d87dfe215db4b9e21c422bf2d621754..4bed13ff029103417cbe4b885191fe84f1a39672 100644 --- a/algo/detectors/sts/StsHitfinder.cxx +++ b/algo/detectors/sts/StsHitfinder.cxx @@ -102,8 +102,6 @@ XPU_D void sts::Hitfinder::CalculateChannelOffsets(FindClusters::context& ctx, C channelOffsets[i] = pos + 1; } } - XPU_ASSERT(digis[pos].GetChannel() - <= digis[pos + 1].GetChannel()); //channel are supposed to be sorted increasingly } for (int c = digis[nDigis - 1].GetChannel() + 1; c < nChannels; c++) { @@ -438,9 +436,6 @@ XPU_D void sts::Hitfinder::CreateClusterFromConnectorsN(int iModule, CbmStsDigi* // Correction for corrupt clusters if (x < cProps.chanF || x > cProps.chanL) { x = cProps.xSum / qSum; } - XPU_ASSERT(x >= cProps.chanF && x <= cProps.chanL); - XPU_ASSERT(nDigis > 2); - if (IsBackside(iModule)) { x += nChannels; } sts::Cluster cls { @@ -792,8 +787,12 @@ XPU_D void sts::Hitfinder::CreateHit(int iModule, float xLocal, float yLocal, fl int idx = xpu::atomic_add(&nHitsPerModule[iModule], 1); - assert(size_t(idx) < maxHitsPerModule); - if (size_t(idx) < maxHitsPerModule) { hitsPerModule[iModule * maxHitsPerModule + idx] = hit; } + if (size_t(idx) >= maxHitsPerModule) { + xpu::atomic_add(&monitor->fNumHitBucketOverflow, 1); + return; + } + + hitsPerModule[iModule * maxHitsPerModule + idx] = hit; } XPU_D float sts::Hitfinder::LandauWidth(float charge) const diff --git a/algo/detectors/sts/StsHitfinder.h b/algo/detectors/sts/StsHitfinder.h index 570bdc6a40bd86bd607b4a1622a4ef0422b6150f..07c0cd2bd67f4f9ee6fd2476e35a691a8219afa4 100644 --- a/algo/detectors/sts/StsHitfinder.h +++ b/algo/detectors/sts/StsHitfinder.h @@ -111,6 +111,13 @@ namespace cbm::algo::sts XPU_D void operator()(context&); }; + struct HitfinderMonitor { + u32 fNumClusterBucketOverflow = 0; + u32 fNumHitBucketOverflow = 0; + + bool HasErrors() const { return fNumClusterBucketOverflow > 0 || fNumHitBucketOverflow > 0; } + }; + // Calibration data structures struct SensorPar { float dY; @@ -212,6 +219,9 @@ namespace cbm::algo::sts // size = nModules xpu::buffer<SensorPar> sensorPars; + // Monitor data + xpu::buffer<HitfinderMonitor> monitor; + // input // Store all digis in a flat array with a header that contains the offset for every module (front and back) xpu::buffer<size_t> digiOffsetPerModule; // size = 2 * nModules + 1 entries, last entry contains total digi count @@ -336,13 +346,15 @@ namespace cbm::algo::sts sts::Cluster* tgtData = &clusterDataPerModule[iModule * maxClustersPerModule]; int pos = xpu::atomic_add_block(&nClustersPerModule[iModule], 1); - XPU_ASSERT(size_t(pos) < maxClustersPerModule); - if (size_t(pos) < maxClustersPerModule) { - GpuClusterIdx idx {time, pos}; - tgtIdx[idx.fIdx] = idx; - tgtData[idx.fIdx] = cls; + if (size_t(pos) >= maxClustersPerModule) { + xpu::atomic_add(&monitor->fNumClusterBucketOverflow, 1); + return; } + + GpuClusterIdx idx {time, pos}; + tgtIdx[idx.fIdx] = idx; + tgtData[idx.fIdx] = cls; } XPU_D bool IsBackside(int iModule) const { return iModule >= nModules; } diff --git a/algo/detectors/sts/StsHitfinderChain.cxx b/algo/detectors/sts/StsHitfinderChain.cxx index e2899fb2ac1241669764eb1e9d51ef9546919e0c..d743d65d8168ee662e2ca3153abebc8fc900131e 100644 --- a/algo/detectors/sts/StsHitfinderChain.cxx +++ b/algo/detectors/sts/StsHitfinderChain.cxx @@ -57,6 +57,7 @@ void sts::HitfinderChain::operator()(gsl::span<const CbmStsDigi> digis) // xpu::memset(hfc.digisPerModuleTmp, 0); // xpu::memset(hfc.digisSortedPerModule, 0); // xpu::memset(hfc.digiOffsetPerModule, 0); + queue.memset(hfc.monitor, 0); queue.memset(hfc.digiConnectorsPerModule, 0); queue.memset(hfc.channelOffsetPerModule, 0); queue.memset(hfc.clusterIdxPerModule, 0); @@ -147,10 +148,44 @@ void sts::HitfinderChain::operator()(gsl::span<const CbmStsDigi> digis) queue.copy(hfc.hitsPerModule, xpu::d2h); queue.copy(hfc.nHitsPerModule, xpu::d2h); + queue.copy(hfc.monitor, xpu::d2h); + queue.wait(); - xpu::h_view nHits {hfc.nHitsPerModule}; - // xpu::h_view nClusters{hfc.nClustersPerModule}; + xpu::h_view monitor(hfc.monitor); + xpu::h_view nHits(hfc.nHitsPerModule); + + // Note: Checking for cluster bucket overflow is probably paranoid + // as we allocate enough memory for one cluster per digi. + if (monitor[0].fNumClusterBucketOverflow > 0) { + L_(error) << "STS Hitfinder Chain: Cluster bucket overflow! " << monitor[0].fNumClusterBucketOverflow + << " clusters were discarded!"; + + for (size_t m = 0; m < nModules * 2; m++) { + L_(info) << nClusters[m] << " clusters in module " << m << " (of " << hfc.maxClustersPerModule << " max)"; + if (nClusters[m] > hfc.maxClustersPerModule) { + L_(error) << "STS Hitfinder Chain: Cluster bucket overflow in module " << m << " with " << nClusters[m] + << " (of " << hfc.maxClustersPerModule << " max)" + << " clusters!"; + nClusters[m] = hfc.maxClustersPerModule; + } + } + } + + if (monitor[0].fNumHitBucketOverflow > 0) { + L_(error) << "STS Hitfinder Chain: Hit bucket overflow! " << monitor[0].fNumHitBucketOverflow + << " hits were discarded!"; + + for (size_t m = 0; m < nModules; m++) { + if (nHits[m] > hfc.maxHitsPerModule) { + L_(error) << "STS Hitfinder Chain: Hit bucket overflow in module " << m << " with " << nHits[m] << " (of " + << hfc.maxHitsPerModule << " max)" + << " hits!"; + nHits[m] = hfc.maxHitsPerModule; + } + } + } + size_t nHitsTotal = std::accumulate(nHits.begin(), nHits.end(), 0); size_t nClustersTotal = std::accumulate(nClusters.begin(), nClusters.end(), 0); @@ -223,6 +258,8 @@ void sts::HitfinderChain::AllocateStatic() // Time errors fHitfinder.maxClusterTimeErrorByModuleSide.reset(nModuleSides, xpu::buf_device); + fHitfinder.monitor.reset(1, xpu::buf_io); + q.wait(); }