From a253e60c7e16734ce662430e235f0fce3c0ff290 Mon Sep 17 00:00:00 2001
From: Felix Weiglhofer <weiglhofer@fias.uni-frankfurt.de>
Date: Fri, 22 Mar 2024 13:50:46 +0100
Subject: [PATCH] online: Rework STS hitfinder monitoring.

---
 algo/detectors/sts/Hitfinder.cxx      |  2 +-
 algo/detectors/sts/Hitfinder.h        | 18 ++++++++----------
 algo/detectors/sts/HitfinderChain.cxx | 20 ++++++++------------
 algo/detectors/sts/HitfinderChain.h   |  8 +++++++-
 algo/global/Reco.cxx                  | 18 +++++++++---------
 algo/global/Reco.h                    |  4 ++--
 6 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/algo/detectors/sts/Hitfinder.cxx b/algo/detectors/sts/Hitfinder.cxx
index bc129e2e64..09ebeb6a8d 100644
--- a/algo/detectors/sts/Hitfinder.cxx
+++ b/algo/detectors/sts/Hitfinder.cxx
@@ -838,7 +838,7 @@ XPU_D void sts::Hitfinder::CreateHit(int iModule, float xLocal, float yLocal, fl
   int idx = xpu::atomic_add(&nHitsPerModule[iModule], 1);
 
   if (size_t(idx) >= maxHitsPerModule) {
-    xpu::atomic_add(&monitor->fNumHitBucketOverflow, 1);
+    xpu::atomic_add(&monitor->nHitBucketOverflow, 1);
     return;
   }
 
diff --git a/algo/detectors/sts/Hitfinder.h b/algo/detectors/sts/Hitfinder.h
index 383e48cd22..69d8828ec9 100644
--- a/algo/detectors/sts/Hitfinder.h
+++ b/algo/detectors/sts/Hitfinder.h
@@ -15,7 +15,6 @@
 #include "sts/HitfinderPars.h"
 
 #include <xpu/device.h>
-#include <xpu/host.h>  // Required for xpu::timings on monitoring struct. Move to separate header?
 
 namespace cbm::algo
 {
@@ -112,13 +111,12 @@ namespace cbm::algo::sts
     XPU_D void operator()(context&);
   };
 
-  struct HitfinderMonitor {
-    i32 fNumClusterBucketOverflow = 0;
-    i32 fNumHitBucketOverflow     = 0;
-    u64 fNumClusterTotal          = 0;
-    u64 fNumHitsTotal             = 0;
-    xpu::timings fTime;
-    bool HasErrors() const { return fNumClusterBucketOverflow > 0 || fNumHitBucketOverflow > 0; }
+  struct HitfinderMonDevice {
+    i32 nClusterBucketOverflow;
+    i32 nHitBucketOverflow;
+    u64 nClusterTotal;
+    u64 nHitsTotal;
+    bool HasErrors() const { return nClusterBucketOverflow > 0 || nHitBucketOverflow > 0; }
   };
 
   // Calibration data structures
@@ -223,7 +221,7 @@ namespace cbm::algo::sts
     xpu::buffer<SensorPar> sensorPars;
 
     // Monitor data
-    xpu::buffer<HitfinderMonitor> monitor;
+    xpu::buffer<HitfinderMonDevice> monitor;
 
     // input
     // Store all digis in a flat array with a header that contains the offset for every module (front and back)
@@ -364,7 +362,7 @@ namespace cbm::algo::sts
       u32 pos = xpu::atomic_add(&nClustersPerModule[iModule], 1);
 
       if (size_t(pos) >= maxClustersPerModule) {
-        xpu::atomic_add(&monitor->fNumClusterBucketOverflow, 1);
+        xpu::atomic_add(&monitor->nClusterBucketOverflow, 1);
         return;
       }
 
diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx
index 280911c5c2..41aa192dde 100644
--- a/algo/detectors/sts/HitfinderChain.cxx
+++ b/algo/detectors/sts/HitfinderChain.cxx
@@ -50,7 +50,9 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
 {
   EnsureParameters();
 
-  xpu::push_timer("STS Hitfinder");
+  Result result;
+
+  xpu::scoped_timer t_("STS Hitfinder", &result.monitor.time);
   xpu::t_add_bytes(digis.size_bytes());
 
   size_t nModules     = fPars->setup.modules.size();
@@ -213,8 +215,8 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
 
   // Note: Checking for cluster bucket overflow is probably paranoid
   // as we allocate enough memory for one cluster per digi.
-  if (monitor[0].fNumClusterBucketOverflow > 0) {
-    L_(error) << "STS Hitfinder Chain: Cluster bucket overflow! " << monitor[0].fNumClusterBucketOverflow
+  if (monitor[0].nClusterBucketOverflow > 0) {
+    L_(error) << "STS Hitfinder Chain: Cluster bucket overflow! " << monitor[0].nClusterBucketOverflow
               << " clusters were discarded!";
 
     for (size_t m = 0; m < nModules * 2; m++) {
@@ -228,8 +230,8 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
   }
 
   xpu::h_view nHits(hfc.nHitsPerModule);
-  if (monitor[0].fNumHitBucketOverflow > 0) {
-    L_(error) << "STS Hitfinder Chain: Hit bucket overflow! " << monitor[0].fNumHitBucketOverflow
+  if (monitor[0].nHitBucketOverflow > 0) {
+    L_(error) << "STS Hitfinder Chain: Hit bucket overflow! " << monitor[0].nHitBucketOverflow
               << " hits were discarded!";
 
     for (size_t m = 0; m < nModules; m++) {
@@ -248,15 +250,9 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
   size_t nHitsTotal = std::accumulate(nHits.begin(), nHits.end(), 0);
   L_(info) << "Timeslice contains " << nHitsTotal << " STS hits and " << nClustersTotal << " STS clusters";
 
-  new (&monitor[0]) sts::HitfinderMonitor{};
-  monitor[0].fTime            = xpu::pop_timer();  // TODO use xpu::scoped_timer instead
-  monitor[0].fNumClusterTotal = nClustersTotal;
-  monitor[0].fNumHitsTotal    = nHitsTotal;
-
-  Result result;
   result.hits     = std::move(hits);
   result.clusters = std::move(clusters);
-  result.monitor  = monitor[0];
+  result.monitor.SetDeviceMon(monitor[0]);
   return result;
 }
 
diff --git a/algo/detectors/sts/HitfinderChain.h b/algo/detectors/sts/HitfinderChain.h
index fa3e695c35..d63a8ca571 100644
--- a/algo/detectors/sts/HitfinderChain.h
+++ b/algo/detectors/sts/HitfinderChain.h
@@ -25,6 +25,12 @@
 namespace cbm::algo::sts
 {
 
+  struct HitfinderMon : HitfinderMonDevice {
+    xpu::timings time;
+
+    void SetDeviceMon(const HitfinderMonDevice& devMon) { HitfinderMonDevice::operator=(devMon); }
+  };
+
   struct HitfinderChainPars {
     HitfinderPars setup;  // TODO: HitfinderPars should renamed to HitfinderSetup
     RecoParams::STS::Memory memory;
@@ -42,7 +48,7 @@ namespace cbm::algo::sts
     struct Result {
       PartitionedSpan<sts::Hit> hits;
       PartitionedVector<sts::Cluster> clusters;
-      sts::HitfinderMonitor monitor;
+      HitfinderMon monitor;
     };
 
     void SetParameters(const HitfinderChainPars& parameters);
diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx
index b57f01ee97..9626a1f25a 100644
--- a/algo/global/Reco.cxx
+++ b/algo/global/Reco.cxx
@@ -243,7 +243,7 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
       fStsDigiQa->Exec();
     }
 
-    sts::HitfinderMonitor stsHitfinderMonitor;
+    sts::HitfinderMon stsHitfinderMonitor;
     PartitionedSpan<sts::Hit> stsHits;
     PartitionedVector<sts::Cluster> stsClusters;
     if (fStsHitFinder) {
@@ -251,7 +251,7 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
       auto stsResults     = (*fStsHitFinder)(digis.fSts, storeClusters);
       stsHits             = stsResults.hits;
       stsClusters         = std::move(stsResults.clusters);
-      stsHitfinderMonitor = stsResults.monitor;
+      stsHitfinderMonitor = std::move(stsResults.monitor);
       QueueStsRecoMetrics(stsHitfinderMonitor);
     }
 
@@ -373,18 +373,18 @@ void Reco::QueueUnpackerMetricsDet(const UnpackMonitor<MSMonitor>& monitor)
                            });
 }
 
-void Reco::QueueStsRecoMetrics(const sts::HitfinderMonitor& monitor)
+void Reco::QueueStsRecoMetrics(const sts::HitfinderMon& monitor)
 {
   if (!HasMonitor()) return;
 
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
                            {
-                             {"stsRecoTimeTotal", monitor.fTime.wall()},
-                             {"stsRecoThroughput", monitor.fTime.throughput()},
-                             {"stsRecoNumClusters", (unsigned long) monitor.fNumClusterTotal},
-                             {"stsRecoNumHits", (unsigned long) monitor.fNumHitsTotal},
-                             {"stsRecoNumClusterBucketOverflow", monitor.fNumClusterBucketOverflow},
-                             {"stsRecoNumHitBucketOverflow", monitor.fNumHitBucketOverflow},
+                             {"stsRecoTimeTotal", monitor.time.wall()},
+                             {"stsRecoThroughput", monitor.time.throughput()},
+                             {"stsRecoNumClusters", (unsigned long) monitor.nClusterTotal},
+                             {"stsRecoNumHits", (unsigned long) monitor.nHitsTotal},
+                             {"stsRecoNumClusterBucketOverflow", monitor.nClusterBucketOverflow},
+                             {"stsRecoNumHitBucketOverflow", monitor.nHitBucketOverflow},
                            });
 }
 
diff --git a/algo/global/Reco.h b/algo/global/Reco.h
index 7db340ca93..8da3eeefc5 100644
--- a/algo/global/Reco.h
+++ b/algo/global/Reco.h
@@ -44,7 +44,7 @@ namespace cbm::algo
     class Unpack;
     class DigiQa;
     class HitfinderChain;
-    class HitfinderMonitor;
+    class HitfinderMon;
   }
 
   namespace tof
@@ -152,7 +152,7 @@ namespace cbm::algo
 
     template<class MSMonitor>
     void QueueUnpackerMetricsDet(const UnpackMonitor<MSMonitor>&);
-    void QueueStsRecoMetrics(const sts::HitfinderMonitor&);
+    void QueueStsRecoMetrics(const sts::HitfinderMon&);
     void QueueTofRecoMetrics(const tof::HitfindMonitorData&);
     void QueueTofCalibMetrics(const tof::CalibrateMonitorData&);
     void QueueEvbuildMetrics(const evbuild::EventbuildChainMonitorData&);
-- 
GitLab