From 4d090efdc539890b6a7de29e2e98dbd5a707d829 Mon Sep 17 00:00:00 2001
From: Felix Weiglhofer <weiglhofer@fias.uni-frankfurt.de>
Date: Tue, 24 Oct 2023 18:48:55 +0000
Subject: [PATCH] algo: Monitor throughput.

---
 algo/base/DigiData.cxx                | 13 ++++++++++
 algo/base/DigiData.h                  | 10 ++++++++
 algo/ca/TrackingChain.cxx             |  9 ++++++-
 algo/detectors/sts/Hitfinder.h        |  4 +--
 algo/detectors/sts/HitfinderChain.cxx | 19 ++++++---------
 algo/detectors/tof/Hitfind.cxx        | 30 +++++++++++------------
 algo/detectors/tof/Hitfind.h          | 10 +++++---
 algo/evbuild/EventBuilder.cxx         |  4 +--
 algo/evbuild/EventBuilder.h           |  4 ++-
 algo/evbuild/EventbuildChain.cxx      |  4 +--
 algo/evbuild/EventbuildChain.h        |  1 -
 algo/global/Reco.cxx                  | 35 +++++++++++++++++++++------
 algo/global/Reco.h                    |  5 ++++
 algo/trigger/TimeClusterTrigger.cxx   |  4 +--
 algo/trigger/TimeClusterTrigger.h     | 10 +++++---
 algo/unpack/Unpack.cxx                |  6 ++---
 algo/unpack/Unpack.h                  |  4 ++-
 17 files changed, 113 insertions(+), 59 deletions(-)

diff --git a/algo/base/DigiData.cxx b/algo/base/DigiData.cxx
index a76d0d0726..8997428654 100644
--- a/algo/base/DigiData.cxx
+++ b/algo/base/DigiData.cxx
@@ -40,6 +40,19 @@ size_t DigiData::Size(ECbmModuleId system) const
   }
 }
 
+size_t DigiData::TotalSize() const
+{
+  return fSts.size() + fMuch.size() + fTof.size() + fBmon.size() + fTrd.size() + fTrd2d.size() + fRich.size()
+         + fPsd.size() + fFsd.size();
+}
+
+size_t DigiData::TotalSizeBytes() const
+{
+  return sizeof(CbmStsDigi) * fSts.size() + sizeof(CbmMuchDigi) * fMuch.size() + sizeof(CbmTofDigi) * fTof.size()
+         + sizeof(CbmBmonDigi) * fBmon.size() + sizeof(CbmTrdDigi) * fTrd.size() + sizeof(CbmTrdDigi) * fTrd2d.size()
+         + sizeof(CbmRichDigi) * fRich.size() + sizeof(CbmPsdDigi) * fPsd.size() + sizeof(CbmFsdDigi) * fFsd.size();
+}
+
 CbmDigiData DigiData::ToStorable() const
 {
   return CbmDigiData {
diff --git a/algo/base/DigiData.h b/algo/base/DigiData.h
index b7aa9cde79..25030d490c 100644
--- a/algo/base/DigiData.h
+++ b/algo/base/DigiData.h
@@ -54,6 +54,16 @@ namespace cbm::algo
      */
     size_t Size(ECbmModuleId system) const;
 
+    /**
+     * @brief Get the total number of digis across all subsystems.
+     */
+    size_t TotalSize() const;
+
+    /**
+     * @brief Get the total number of bytes used by all digis.
+     */
+    size_t TotalSizeBytes() const;
+
     /**
      * @brief Convert to CbmDigiData for file storage
      *
diff --git a/algo/ca/TrackingChain.cxx b/algo/ca/TrackingChain.cxx
index b288a10e30..a6a6ff34b1 100644
--- a/algo/ca/TrackingChain.cxx
+++ b/algo/ca/TrackingChain.cxx
@@ -11,7 +11,7 @@
 
 #include "tof/Config.h"
 
-#include <boost/filesystem.hpp>
+#include <xpu/host.h>
 
 #include "CaConstants.h"
 #include "CaHit.h"
@@ -53,6 +53,7 @@ void TrackingChain::Init()
 //
 TrackingChain::Return_t TrackingChain::Run(Input_t recoResults)
 {
+  xpu::scoped_timer t_("CA");  // TODO: pass timings to monitoring for throughput?
   // ----- Init input data ---------------------------------------------------------------------------------------------
   this->PrepareInput(recoResults);
 
@@ -93,7 +94,11 @@ void TrackingChain::PrepareInput(Input_t recoResults)
 template<EDetectorID DetID>
 void TrackingChain::ReadHits(PartitionedSpan<const ca::HitTypes_t::at<DetID>> hits)
 {
+  using Hit_t          = ca::HitTypes_t::at<DetID>;
   constexpr bool IsSTS = (DetID == EDetectorID::Sts);
+  constexpr bool IsTOF = (DetID == EDetectorID::Tof);
+
+  xpu::t_add_bytes(hits.NElements() * sizeof(Hit_t));  // Assumes call from Run, for existence of timer!
 
   ca::HitKeyIndex_t firstHitKey = fNofHitKeys;
   int64_t dataStreamDet         = static_cast<int64_t>(DetID) << 60;  // detector part of the data stream
@@ -105,6 +110,8 @@ void TrackingChain::ReadHits(PartitionedSpan<const ca::HitTypes_t::at<DetID>> hi
     // FIXME: This definition of the station index works only for STS, and there is no any guaranty, that it will
     //        work for other mCBM setups.
     if constexpr (IsSTS) { iStLocal = (extHitAddress >> 4) & 0xF; }
+    if constexpr (IsTOF) { iStLocal = tof::Config::GetTofTrackingStation(extHitAddress); }
+
     int iStActive  = (iStLocal != -1) ? fCaFramework.GetParameters().GetStationIndexActive(iStLocal, DetID) : -1;
     size_t iOffset = hits.Offsets()[iPartition];
     if (iStActive < 0) { continue; }
diff --git a/algo/detectors/sts/Hitfinder.h b/algo/detectors/sts/Hitfinder.h
index f4f0f79c96..d7760f3296 100644
--- a/algo/detectors/sts/Hitfinder.h
+++ b/algo/detectors/sts/Hitfinder.h
@@ -8,6 +8,7 @@
 #include "CbmStsDigi.h"
 
 #include <xpu/device.h>
+#include <xpu/host.h>  // Required for xpu::timings on monitoring struct. Move to separate header?
 
 #include "Definitions.h"
 #include "gpu/DeviceImage.h"
@@ -116,8 +117,7 @@ namespace cbm::algo::sts
     i32 fNumHitBucketOverflow     = 0;
     u64 fNumClusterTotal          = 0;
     u64 fNumHitsTotal             = 0;
-    f64 fTimeTotal                = 0.;
-
+    xpu::timings fTime;
     bool HasErrors() const { return fNumClusterBucketOverflow > 0 || fNumHitBucketOverflow > 0; }
   };
 
diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx
index 6bdda8b11e..239f342220 100644
--- a/algo/detectors/sts/HitfinderChain.cxx
+++ b/algo/detectors/sts/HitfinderChain.cxx
@@ -49,11 +49,11 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
   EnsureParameters();
 
   xpu::push_timer("STS Hitfinder");
+  xpu::t_add_bytes(digis.size_bytes());
 
   size_t nModules     = fPars->setup.modules.size();
   size_t nModuleSides = nModules * 2;
   size_t nDigisTotal = digis.size();
-  xpu::t_add_bytes(nDigisTotal * sizeof(CbmStsDigi));
 
   // Getting the digis on the GPU requires 3 steps
   // 1. Sort digis into buckets by module
@@ -63,14 +63,12 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
   if (fPars->memory.IsDynamic()) AllocateDynamic(digiMap.maxNDigisPerModule, nDigisTotal);
   else {
     if (digiMap.maxNDigisPerModule > fPars->memory.maxNDigisPerModule) {
-      throw std::runtime_error(
-        fmt::format("STS Hitfinder Chain: Too many digis per module for static allocation: {} > {}",
-                    digiMap.maxNDigisPerModule, fPars->memory.maxNDigisPerModule));
+      throw ProcessingError("STS Hitfinder Chain: Too many digis per module for static allocation: {} > {}",
+                            digiMap.maxNDigisPerModule, fPars->memory.maxNDigisPerModule);
     }
     if (nDigisTotal > fPars->memory.maxNDigisPerTS) {
-      throw std::runtime_error(
-        fmt::format("STS Hitfinder Chain: Too many digis per timeslice for static allocation: {} > {}", nDigisTotal,
-                    fPars->memory.maxNDigisPerTS));
+      throw ProcessingError("STS Hitfinder Chain: Too many digis per timeslice for static allocation: {} > {}",
+                            nDigisTotal, fPars->memory.maxNDigisPerTS);
     }
   }
   // 3. Copy digis into flat array with offsets per module
@@ -99,7 +97,7 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
   queue.memset(hfc.monitor, 0);
   queue.memset(hfc.digiConnectorsPerModule, 0);
   queue.memset(hfc.channelOffsetPerModule, 0);
-  queue.memset(hfc.clusterIdxPerModule, 0);
+  // queue.memset(hfc.clusterIdxPerModule, 0);
   // xpu::memset(hfc.clusterIdxPerModuleTmp, 0);
   // xpu::memset(hfc.clusterIdxSortedPerModule, 0);
   // xpu::memset(hfc.clusterDataPerModule, 0);
@@ -236,9 +234,8 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
   size_t nHitsTotal = std::accumulate(nHits.begin(), nHits.end(), 0);
   L_(info) << "Timeslice contains " << nHitsTotal << " STS hits and " << nClustersTotal << " STS clusters";
 
-  auto timings = xpu::pop_timer();
-
-  monitor[0].fTimeTotal       = timings.wall();
+  new (&monitor[0]) sts::HitfinderMonitor {};
+  monitor[0].fTime            = xpu::pop_timer();  // TODO use xpu::scoped_timer instead
   monitor[0].fNumClusterTotal = nClustersTotal;
   monitor[0].fNumHitsTotal    = nHitsTotal;
 
diff --git a/algo/detectors/tof/Hitfind.cxx b/algo/detectors/tof/Hitfind.cxx
index e1c1379171..4937cf80c6 100644
--- a/algo/detectors/tof/Hitfind.cxx
+++ b/algo/detectors/tof/Hitfind.cxx
@@ -6,8 +6,6 @@
 
 #include <chrono>
 
-#include <xpu/host.h>
-
 #include "log.hpp"
 
 using namespace std;
@@ -19,11 +17,12 @@ namespace cbm::algo::tof
   Hitfind::resultType Hitfind::operator()(gsl::span<CbmTofDigi> digiIn)
   {
     xpu::push_timer("TofHitfind");
+    xpu::t_add_bytes(digiIn.size_bytes());
 
     // --- Output data
-    resultType result                  = {};
-    auto& clusterTs                    = result.first;
-    auto& monitor                      = result.second;
+    resultType result = {};
+    auto& clusterTs   = result.first;
+    auto& monitor     = result.second;
 
     // Do calibration globally (optional, should not be used together with RPC-wise calibration)
     CalibRawDigis(digiIn, monitor);
@@ -80,10 +79,9 @@ namespace cbm::algo::tof
     }
 
     // Monitoring
-    xpu::timings timings = xpu::pop_timer();
-    monitor.fTimeHitfind = timings.wall();
-    monitor.fNumDigis    = digiIn.size();
-    monitor.fNumHits     = clustersFlat.size();
+    monitor.fTime     = xpu::pop_timer();
+    monitor.fNumDigis = digiIn.size();
+    monitor.fNumHits  = clustersFlat.size();
 
     // Create ouput vector
     clusterTs = PartitionedVector(std::move(clustersFlat), rpcSizes, rpcAddresses);
@@ -169,13 +167,13 @@ namespace cbm::algo::tof
 
     for (size_t iDigi = 0; iDigi < digiVec.size(); iDigi++) {
 
-      CbmTofDigi pDigi              = digiVec[iDigi];
-      const double SmType           = pDigi.GetType();
-      const double Sm               = pDigi.GetSm();
-      const double Rpc              = pDigi.GetRpc();
-      const double Chan             = pDigi.GetChannel();
-      const double Side             = pDigi.GetSide();
-      const int NbRpc               = fNbRpc[SmType];
+      CbmTofDigi pDigi    = digiVec[iDigi];
+      const double SmType = pDigi.GetType();
+      const double Sm     = pDigi.GetSm();
+      const double Rpc    = pDigi.GetRpc();
+      const double Chan   = pDigi.GetChannel();
+      const double Side   = pDigi.GetSide();
+      const int NbRpc     = fNbRpc[SmType];
 
       auto& rpcs = fTofConfig.rpcs;
       if (SmType >= rpcs.size() || Sm * NbRpc + Rpc >= rpcs.at(SmType).size()) {
diff --git a/algo/detectors/tof/Hitfind.h b/algo/detectors/tof/Hitfind.h
index 897ce36b8d..a8d80d8df4 100644
--- a/algo/detectors/tof/Hitfind.h
+++ b/algo/detectors/tof/Hitfind.h
@@ -15,6 +15,8 @@
 #include <sstream>
 #include <vector>
 
+#include <xpu/host.h>
+
 #include "PartitionedVector.h"
 
 namespace cbm::algo::tof
@@ -27,15 +29,15 @@ namespace cbm::algo::tof
    **/
   struct HitfindMonitorData {
     //std::vector<tof::ClusterizerMonitorData> fMonitor;   //Per RPC monitoring data, to be implemented
-    double fTimeHitfind = 0;
-    size_t fNumDigis    = 0;
-    size_t fNumHits     = 0;
+    xpu::timings fTime;
+    size_t fNumDigis            = 0;
+    size_t fNumHits             = 0;
     size_t fDigiCalibUnknownRPC = 0;
 
     std::string print() const
     {
       std::stringstream ss;
-      ss << "Hitfind stats: num digis " << fNumDigis << ", time " << fTimeHitfind << ", num hits " << fNumHits
+      ss << "Hitfind stats: num digis " << fNumDigis << ", time " << fTime.wall() << ", num hits " << fNumHits
          << std::endl;
       return ss.str();
     }
diff --git a/algo/evbuild/EventBuilder.cxx b/algo/evbuild/EventBuilder.cxx
index 4416fd877d..4ab28d55dc 100644
--- a/algo/evbuild/EventBuilder.cxx
+++ b/algo/evbuild/EventBuilder.cxx
@@ -20,6 +20,7 @@ namespace cbm::algo::evbuild
                                                     std::optional<DigiEventSelector> selector) const
   {
     xpu::push_timer("EventBuilder");
+    xpu::t_add_bytes(ts.TotalSizeBytes());
 
     // --- Output data
     resultType result = {};
@@ -48,8 +49,7 @@ namespace cbm::algo::evbuild
     monitor.bmon.nDigis += ts.fBmon.size();
     monitor.nEvents += result.first.size();
 
-    auto timings   = xpu::pop_timer();
-    monitor.timeMs = timings.wall();
+    monitor.time = xpu::pop_timer();
 
     return result;
   }
diff --git a/algo/evbuild/EventBuilder.h b/algo/evbuild/EventBuilder.h
index 89a2ccaeac..7a345acf90 100644
--- a/algo/evbuild/EventBuilder.h
+++ b/algo/evbuild/EventBuilder.h
@@ -14,6 +14,8 @@
 #include <string>
 #include <vector>
 
+#include <xpu/host.h>
+
 #include "DigiData.h"
 #include "DigiEventSelector.h"
 #include "EventBuilderConfig.h"
@@ -48,7 +50,7 @@ namespace cbm::algo::evbuild
     EventBuilderDetectorMonitorData psd;    ///< Monitoring data for PSD
     EventBuilderDetectorMonitorData fsd;    ///< Monitoring data for FSD
     size_t nEvents = 0;                     ///< Number of events found in TS
-    double timeMs  = 0.;                    ///< Time for event building in ms
+    xpu::timings time;                      ///< Time for event building
   };
 
 
diff --git a/algo/evbuild/EventbuildChain.cxx b/algo/evbuild/EventbuildChain.cxx
index 6d2af6325a..aa930b3311 100644
--- a/algo/evbuild/EventbuildChain.cxx
+++ b/algo/evbuild/EventbuildChain.cxx
@@ -58,9 +58,7 @@ EventbuildChain::EventbuildChain(const Config& config, std::shared_ptr<Histogram
 // ----------------------------------------------------------------------------
 
 // -----   Destructor   ------------------------------------------------------
-EventbuildChain::~EventbuildChain()
-{
-}
+EventbuildChain::~EventbuildChain() {}
 // ----------------------------------------------------------------------------
 
 // -----   Run event building on a timeslice   --------------------------------
diff --git a/algo/evbuild/EventbuildChain.h b/algo/evbuild/EventbuildChain.h
index 6b46763831..e70ae37f36 100644
--- a/algo/evbuild/EventbuildChain.h
+++ b/algo/evbuild/EventbuildChain.h
@@ -73,7 +73,6 @@ namespace cbm::algo::evbuild
      ** @return Vector of digi times for the specified system
      **/
     std::vector<double> GetDigiTimes(const DigiData& timeslice, ECbmModuleId system);
-
   };
 
 }  // namespace cbm::algo::evbuild
diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx
index 54f169c521..1500cfb35c 100644
--- a/algo/global/Reco.cxx
+++ b/algo/global/Reco.cxx
@@ -103,14 +103,13 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
 {
   if (!fInitialized) throw std::runtime_error("Chain not initialized");
 
-  for (uint64_t comp = 0; comp < ts.num_components(); comp++) {
-    xpu::t_add_bytes(ts.size_component(comp));
-  }
+  xpu::t_add_bytes(ts_utils::SizeBytes(ts));
 
   RecoResults results;
   xpu::timings tsTimes;
   {
     xpu::scoped_timer t_(fmt::format("TS {}", ts.index()), &tsTimes);
+    xpu::t_add_bytes(ts_utils::SizeBytes(ts));
 
     L_(info) << ">>> Processing TS " << ts.index();
     xpu::set<cbm::algo::Params>(Params());
@@ -175,6 +174,10 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
   }
   PrintTimings(tsTimes);
 
+  ProcessingMonitor processingMonitor;
+  processingMonitor.fTime = tsTimes;
+  QueueProcessingMetrics(processingMonitor);
+
   return results;
 }
 
@@ -232,7 +235,8 @@ void Reco::QueueUnpackerMetrics(const fles::Timeslice& ts, const UnpackMonitorDa
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
                            {
                              {"tsIdDelta", tsDelta},
-                             {"unpackTimeTotal", monitor.fTimeUnpack},
+                             {"unpackTimeTotal", monitor.fTime.wall()},
+                             {"unpackThroughput", monitor.fTime.throughput()},
                              {"unpackBytesInSts", monitor.fNumBytesInSts},
                              {"unpackBytesInMuch", monitor.fNumBytesInMuch},
                              {"unpackBytesInTof", monitor.fNumBytesInTof},
@@ -267,7 +271,8 @@ void Reco::QueueStsRecoMetrics(const sts::HitfinderMonitor& monitor)
 
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
                            {
-                             {"stsRecoTimeTotal", monitor.fTimeTotal},
+                             {"stsRecoTimeTotal", monitor.fTime.wall()},
+                             {"stsRecoThroughput", monitor.fTime.throughput()},
                              {"stsRecoNumClusters", (unsigned long) monitor.fNumClusterTotal},
                              {"stsRecoNumHits", (unsigned long) monitor.fNumHitsTotal},
                              {"stsRecoNumClusterBucketOverflow", monitor.fNumClusterBucketOverflow},
@@ -281,7 +286,8 @@ void Reco::QueueTofRecoMetrics(const tof::HitfindMonitorData& mon)
 
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
                            {
-                             {"tofRecoTimeTotal", mon.fTimeHitfind},
+                             {"tofRecoTimeTotal", mon.fTime.wall()},
+                             {"tofRecoThroughput", mon.fTime.throughput()},
                              {"tofRecoNumDigisIn", mon.fNumDigis},
                              {"tofRecoNumHits", mon.fNumHits},
                              {"tofRecoUnknownRPC", mon.fDigiCalibUnknownRPC},
@@ -321,8 +327,10 @@ void Reco::QueueEvbuildMetrics(const evbuild::EventbuildChainMonitorData& mon)
 
   double totalSelectionRatio = nDigisTotal > 0 ? double(nDigisInEventsTotal) / nDigisTotal : 0;
   GetMonitor().QueueMetric("cbmreco", tags,
-                           {{"digiTriggerTimeTotal", mon.trigger.timeMs},
-                            {"eventbuildTimeTotal", mon.evbuild.timeMs},
+                           {{"digiTriggerTimeTotal", mon.trigger.time.wall()},
+                            {"digiTriggerThroughput", mon.trigger.time.throughput()},
+                            {"eventbuildTimeTotal", mon.evbuild.time.wall()},
+                            {"eventbuildThroughput", mon.evbuild.time.throughput()},
                             {"numTrigger", mon.trigger.nTriggers},
                             {"numEvents", mon.evbuild.nEvents},
                             {"totalEvSelectionRatio", totalSelectionRatio}});
@@ -341,3 +349,14 @@ void Reco::QueueTrackingMetrics(const ca::TrackingMonitorData& monitor)
                             {"caNofRecoHitsUsed", monitor.GetCounterValue(ca::ECounter::RecoHitUsed)},
                             {"caNofWindowa", monitor.GetCounterValue(ca::ECounter::SubTS)}});
 }
+
+void Reco::QueueProcessingMetrics(const ProcessingMonitor& mon)
+{
+  if (!HasMonitor()) { return; }
+
+  GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
+                           {
+                             {"processingTimeTotal", mon.fTime.wall()},
+                             {"processingThroughput", mon.fTime.throughput()},
+                           });
+}
diff --git a/algo/global/Reco.h b/algo/global/Reco.h
index 3fe138a22b..da5aa97748 100644
--- a/algo/global/Reco.h
+++ b/algo/global/Reco.h
@@ -25,6 +25,10 @@ namespace cbm::algo
 {
   class Options;
 
+  struct ProcessingMonitor {
+    xpu::timings fTime;  //< total processing time
+  };
+
   class Reco : SubChain {
   public:
     Reco();
@@ -68,6 +72,7 @@ namespace cbm::algo
     void QueueTofRecoMetrics(const tof::HitfindMonitorData&);
     void QueueEvbuildMetrics(const evbuild::EventbuildChainMonitorData&);
     void QueueTrackingMetrics(const ca::TrackingMonitorData&);
+    void QueueProcessingMetrics(const ProcessingMonitor&);
   };
 }  // namespace cbm::algo
 
diff --git a/algo/trigger/TimeClusterTrigger.cxx b/algo/trigger/TimeClusterTrigger.cxx
index 504ee44d71..2eaf4f0ead 100644
--- a/algo/trigger/TimeClusterTrigger.cxx
+++ b/algo/trigger/TimeClusterTrigger.cxx
@@ -25,6 +25,7 @@ namespace cbm::algo::evbuild
     if (!std::is_sorted(dataVec.begin(), dataVec.end())) throw std::runtime_error("TimeClusterTrigger: unsorted input");
 
     xpu::push_timer("TimeClusterTrigger");
+    xpu::t_add_bytes(dataVec.size() * sizeof(double));
 
     // --- Parameters
     double winSize     = fConfig.Window();
@@ -67,8 +68,7 @@ namespace cbm::algo::evbuild
     monitor.num += dataVec.size();
     monitor.nTriggers = triggerVec.size();
 
-    auto timings   = xpu::pop_timer();
-    monitor.timeMs = timings.wall();
+    monitor.time = xpu::pop_timer();
 
     return result;
   }
diff --git a/algo/trigger/TimeClusterTrigger.h b/algo/trigger/TimeClusterTrigger.h
index 814efe4a12..eb4f72b055 100644
--- a/algo/trigger/TimeClusterTrigger.h
+++ b/algo/trigger/TimeClusterTrigger.h
@@ -9,6 +9,8 @@
 #include <cstdint>
 #include <vector>
 
+#include <xpu/host.h>
+
 #include "DigiTriggerConfig.h"
 
 namespace cbm::algo::evbuild
@@ -20,10 +22,10 @@ namespace cbm::algo::evbuild
    ** @brief Monitoring data for time cluster trigger algorithm
    **/
   struct TimeClusterTriggerMonitorData {
-    size_t numInTrigger = 0;   ///< Time stamps used for trigger building
-    size_t num          = 0;   ///< Total time stamps in input
-    size_t nTriggers    = 0;   ///< Number of triggers
-    double timeMs       = 0.;  ///< Time for trigger building in ms
+    size_t numInTrigger = 0;  ///< Time stamps used for trigger building
+    size_t num          = 0;  ///< Total time stamps in input
+    size_t nTriggers    = 0;  ///< Number of triggers
+    xpu::timings time;        ///< Time for trigger building
   };
 
   /** @class TimeClusterTrigger
diff --git a/algo/unpack/Unpack.cxx b/algo/unpack/Unpack.cxx
index c46cb91408..668de3d834 100644
--- a/algo/unpack/Unpack.cxx
+++ b/algo/unpack/Unpack.cxx
@@ -12,6 +12,7 @@
 #include "compat/Algorithm.h"
 #include "compat/OpenMP.h"
 #include "log.hpp"
+#include "util/TsUtils.h"
 
 using namespace std;
 using fles::Subsystem;
@@ -22,6 +23,7 @@ namespace cbm::algo
   Unpack::resultType Unpack::operator()(const fles::Timeslice* timeslice)
   {
     xpu::push_timer("Unpack");
+    xpu::t_add_bytes(ts_utils::SizeBytes(*timeslice));
 
     // --- Output data
     resultType result          = {};
@@ -121,9 +123,7 @@ namespace cbm::algo
     DoSort(digiTs.fRich);
     xpu::pop_timer();
 
-    xpu::timings timings = xpu::pop_timer();
-
-    monitor.fTimeUnpack = timings.wall();
+    monitor.fTime = xpu::pop_timer();
 
     return result;
   }
diff --git a/algo/unpack/Unpack.h b/algo/unpack/Unpack.h
index 4ad0379c77..f8ab66438d 100644
--- a/algo/unpack/Unpack.h
+++ b/algo/unpack/Unpack.h
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <vector>
 
+#include <xpu/host.h>
+
 #include "DigiData.h"
 #include "PODVector.h"
 #include "bmon/ReadoutConfig.h"
@@ -46,6 +48,7 @@ namespace cbm::algo
     std::vector<trd::UnpackMonitorData> fTrd;      ///< Monitoring data for TRD
     std::vector<trd2d::UnpackMonitorData> fTrd2d;  ///< Monitoring data for TRD2D
     std::vector<rich::UnpackMonitorData> fRich;    ///< Monitoring data for RICH
+    xpu::timings fTime;
     size_t fNumMs       = 0;
     size_t fNumBytes    = 0;
     size_t fNumBytesInSts       = 0;
@@ -59,7 +62,6 @@ namespace cbm::algo
     size_t fNumCompUsed = 0;
     size_t fNumErrInvalidEqId   = 0;
     size_t fNumErrInvalidSysVer = 0;
-    double fTimeUnpack          = 0;
     std::string print() const
     {
       std::stringstream ss;
-- 
GitLab