From a253e60c7e16734ce662430e235f0fce3c0ff290 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer <weiglhofer@fias.uni-frankfurt.de> Date: Fri, 22 Mar 2024 13:50:46 +0100 Subject: [PATCH] online: Rework STS hitfinder monitoring. --- algo/detectors/sts/Hitfinder.cxx | 2 +- algo/detectors/sts/Hitfinder.h | 18 ++++++++---------- algo/detectors/sts/HitfinderChain.cxx | 20 ++++++++------------ algo/detectors/sts/HitfinderChain.h | 8 +++++++- algo/global/Reco.cxx | 18 +++++++++--------- algo/global/Reco.h | 4 ++-- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/algo/detectors/sts/Hitfinder.cxx b/algo/detectors/sts/Hitfinder.cxx index bc129e2e64..09ebeb6a8d 100644 --- a/algo/detectors/sts/Hitfinder.cxx +++ b/algo/detectors/sts/Hitfinder.cxx @@ -838,7 +838,7 @@ XPU_D void sts::Hitfinder::CreateHit(int iModule, float xLocal, float yLocal, fl int idx = xpu::atomic_add(&nHitsPerModule[iModule], 1); if (size_t(idx) >= maxHitsPerModule) { - xpu::atomic_add(&monitor->fNumHitBucketOverflow, 1); + xpu::atomic_add(&monitor->nHitBucketOverflow, 1); return; } diff --git a/algo/detectors/sts/Hitfinder.h b/algo/detectors/sts/Hitfinder.h index 383e48cd22..69d8828ec9 100644 --- a/algo/detectors/sts/Hitfinder.h +++ b/algo/detectors/sts/Hitfinder.h @@ -15,7 +15,6 @@ #include "sts/HitfinderPars.h" #include <xpu/device.h> -#include <xpu/host.h> // Required for xpu::timings on monitoring struct. Move to separate header? namespace cbm::algo { @@ -112,13 +111,12 @@ namespace cbm::algo::sts XPU_D void operator()(context&); }; - struct HitfinderMonitor { - i32 fNumClusterBucketOverflow = 0; - i32 fNumHitBucketOverflow = 0; - u64 fNumClusterTotal = 0; - u64 fNumHitsTotal = 0; - xpu::timings fTime; - bool HasErrors() const { return fNumClusterBucketOverflow > 0 || fNumHitBucketOverflow > 0; } + struct HitfinderMonDevice { + i32 nClusterBucketOverflow; + i32 nHitBucketOverflow; + u64 nClusterTotal; + u64 nHitsTotal; + bool HasErrors() const { return nClusterBucketOverflow > 0 || nHitBucketOverflow > 0; } }; // Calibration data structures @@ -223,7 +221,7 @@ namespace cbm::algo::sts xpu::buffer<SensorPar> sensorPars; // Monitor data - xpu::buffer<HitfinderMonitor> monitor; + xpu::buffer<HitfinderMonDevice> monitor; // input // Store all digis in a flat array with a header that contains the offset for every module (front and back) @@ -364,7 +362,7 @@ namespace cbm::algo::sts u32 pos = xpu::atomic_add(&nClustersPerModule[iModule], 1); if (size_t(pos) >= maxClustersPerModule) { - xpu::atomic_add(&monitor->fNumClusterBucketOverflow, 1); + xpu::atomic_add(&monitor->nClusterBucketOverflow, 1); return; } diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx index 280911c5c2..41aa192dde 100644 --- a/algo/detectors/sts/HitfinderChain.cxx +++ b/algo/detectors/sts/HitfinderChain.cxx @@ -50,7 +50,9 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS { EnsureParameters(); - xpu::push_timer("STS Hitfinder"); + Result result; + + xpu::scoped_timer t_("STS Hitfinder", &result.monitor.time); xpu::t_add_bytes(digis.size_bytes()); size_t nModules = fPars->setup.modules.size(); @@ -213,8 +215,8 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS // Note: Checking for cluster bucket overflow is probably paranoid // as we allocate enough memory for one cluster per digi. - if (monitor[0].fNumClusterBucketOverflow > 0) { - L_(error) << "STS Hitfinder Chain: Cluster bucket overflow! " << monitor[0].fNumClusterBucketOverflow + if (monitor[0].nClusterBucketOverflow > 0) { + L_(error) << "STS Hitfinder Chain: Cluster bucket overflow! " << monitor[0].nClusterBucketOverflow << " clusters were discarded!"; for (size_t m = 0; m < nModules * 2; m++) { @@ -228,8 +230,8 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS } xpu::h_view nHits(hfc.nHitsPerModule); - if (monitor[0].fNumHitBucketOverflow > 0) { - L_(error) << "STS Hitfinder Chain: Hit bucket overflow! " << monitor[0].fNumHitBucketOverflow + if (monitor[0].nHitBucketOverflow > 0) { + L_(error) << "STS Hitfinder Chain: Hit bucket overflow! " << monitor[0].nHitBucketOverflow << " hits were discarded!"; for (size_t m = 0; m < nModules; m++) { @@ -248,15 +250,9 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS size_t nHitsTotal = std::accumulate(nHits.begin(), nHits.end(), 0); L_(info) << "Timeslice contains " << nHitsTotal << " STS hits and " << nClustersTotal << " STS clusters"; - new (&monitor[0]) sts::HitfinderMonitor{}; - monitor[0].fTime = xpu::pop_timer(); // TODO use xpu::scoped_timer instead - monitor[0].fNumClusterTotal = nClustersTotal; - monitor[0].fNumHitsTotal = nHitsTotal; - - Result result; result.hits = std::move(hits); result.clusters = std::move(clusters); - result.monitor = monitor[0]; + result.monitor.SetDeviceMon(monitor[0]); return result; } diff --git a/algo/detectors/sts/HitfinderChain.h b/algo/detectors/sts/HitfinderChain.h index fa3e695c35..d63a8ca571 100644 --- a/algo/detectors/sts/HitfinderChain.h +++ b/algo/detectors/sts/HitfinderChain.h @@ -25,6 +25,12 @@ namespace cbm::algo::sts { + struct HitfinderMon : HitfinderMonDevice { + xpu::timings time; + + void SetDeviceMon(const HitfinderMonDevice& devMon) { HitfinderMonDevice::operator=(devMon); } + }; + struct HitfinderChainPars { HitfinderPars setup; // TODO: HitfinderPars should renamed to HitfinderSetup RecoParams::STS::Memory memory; @@ -42,7 +48,7 @@ namespace cbm::algo::sts struct Result { PartitionedSpan<sts::Hit> hits; PartitionedVector<sts::Cluster> clusters; - sts::HitfinderMonitor monitor; + HitfinderMon monitor; }; void SetParameters(const HitfinderChainPars& parameters); diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx index b57f01ee97..9626a1f25a 100644 --- a/algo/global/Reco.cxx +++ b/algo/global/Reco.cxx @@ -243,7 +243,7 @@ RecoResults Reco::Run(const fles::Timeslice& ts) fStsDigiQa->Exec(); } - sts::HitfinderMonitor stsHitfinderMonitor; + sts::HitfinderMon stsHitfinderMonitor; PartitionedSpan<sts::Hit> stsHits; PartitionedVector<sts::Cluster> stsClusters; if (fStsHitFinder) { @@ -251,7 +251,7 @@ RecoResults Reco::Run(const fles::Timeslice& ts) auto stsResults = (*fStsHitFinder)(digis.fSts, storeClusters); stsHits = stsResults.hits; stsClusters = std::move(stsResults.clusters); - stsHitfinderMonitor = stsResults.monitor; + stsHitfinderMonitor = std::move(stsResults.monitor); QueueStsRecoMetrics(stsHitfinderMonitor); } @@ -373,18 +373,18 @@ void Reco::QueueUnpackerMetricsDet(const UnpackMonitor<MSMonitor>& monitor) }); } -void Reco::QueueStsRecoMetrics(const sts::HitfinderMonitor& monitor) +void Reco::QueueStsRecoMetrics(const sts::HitfinderMon& monitor) { if (!HasMonitor()) return; GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}}, { - {"stsRecoTimeTotal", monitor.fTime.wall()}, - {"stsRecoThroughput", monitor.fTime.throughput()}, - {"stsRecoNumClusters", (unsigned long) monitor.fNumClusterTotal}, - {"stsRecoNumHits", (unsigned long) monitor.fNumHitsTotal}, - {"stsRecoNumClusterBucketOverflow", monitor.fNumClusterBucketOverflow}, - {"stsRecoNumHitBucketOverflow", monitor.fNumHitBucketOverflow}, + {"stsRecoTimeTotal", monitor.time.wall()}, + {"stsRecoThroughput", monitor.time.throughput()}, + {"stsRecoNumClusters", (unsigned long) monitor.nClusterTotal}, + {"stsRecoNumHits", (unsigned long) monitor.nHitsTotal}, + {"stsRecoNumClusterBucketOverflow", monitor.nClusterBucketOverflow}, + {"stsRecoNumHitBucketOverflow", monitor.nHitBucketOverflow}, }); } diff --git a/algo/global/Reco.h b/algo/global/Reco.h index 7db340ca93..8da3eeefc5 100644 --- a/algo/global/Reco.h +++ b/algo/global/Reco.h @@ -44,7 +44,7 @@ namespace cbm::algo class Unpack; class DigiQa; class HitfinderChain; - class HitfinderMonitor; + class HitfinderMon; } namespace tof @@ -152,7 +152,7 @@ namespace cbm::algo template<class MSMonitor> void QueueUnpackerMetricsDet(const UnpackMonitor<MSMonitor>&); - void QueueStsRecoMetrics(const sts::HitfinderMonitor&); + void QueueStsRecoMetrics(const sts::HitfinderMon&); void QueueTofRecoMetrics(const tof::HitfindMonitorData&); void QueueTofCalibMetrics(const tof::CalibrateMonitorData&); void QueueEvbuildMetrics(const evbuild::EventbuildChainMonitorData&); -- GitLab